def videolist(params, url, category): xbmc.output("[rtvv.py] videolist") # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- data = scrapertools.cachePage(url) # xbmc.output(data) # -------------------------------------------------------- # Extrae los videos # -------------------------------------------------------- patron = '<div class="texto">.*?<a href="([^"]+)">([^<]+)(.*?)</div>.*?<img src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = scrapertools.entityunescape(match[1]) # TODO: Sacar la fecha de la descripcion patronfechas = "<p>Emissió: ([^<]+)<" matchesfechas = re.compile(patronfechas, re.DOTALL).findall(match[2]) if len(matchesfechas) > 0: scrapedtitle = scrapedtitle + " (" + matchesfechas[0] + ")" scrapedurl = "http://www.rtvv.es/alacarta/secciones.asp" + match[0].replace("&", "&") scrapedthumbnail = urlparse.urljoin(url, match[3]).replace(" ", "%20") scrapedplot = "%s" % match[2] scrapedplot = scrapedplot.strip() scrapedplot = scrapedplot.replace("</a>", "") scrapedplot = scrapedplot.replace("</p>", "") scrapedplot = scrapedplot.replace("<p>", "") scrapedplot = scrapertools.entityunescape(scrapedplot) if DEBUG: xbmc.output("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC # addvideo( scrapedtitle , scrapedurl , category ) xbmctools.addnewvideo( CHANNELCODE, "play", CHANNELNAME, "", scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot ) # Label (top-right)... xbmcplugin.setPluginCategory(handle=pluginhandle, category=category) # Disable sorting... # xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory(handle=pluginhandle, succeeded=True)
def novedades(params,url,category): logger.info("[capitancinema.py] novedades") # Descarga la página data = scrapertools.cachePage(url) # Extrae las entradas (carpetas) patronvideos = '<td width="23\%"><a href="([^"]+)"[^>]+><img style="[^"]+" src="([^"]+)" border="0" alt="([^"]+)"[^>]+></a></td>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Atributos scrapedtitle = match[2] scrapedtitle = scrapedtitle.replace(""","") scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedurl = urlparse.urljoin(url,match[0]) scrapedthumbnail = urlparse.urljoin(url,match[1]) scrapedplot = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELNAME , "mirrors" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def mainlist(item): logger.info("[totlol.py] mainlist") # Descarga la página data = scrapertools.cachePage("http://www.totlol.com/videos") #logger.info(data) patron = '<select class="subnavbarform" name="vdolanguage"[^>]+>(.*?)</select>' matches = re.compile(patron,re.DOTALL).findall(data) #if DEBUG: scrapertools.printMatches(matches) data = matches[0] patron = "<option.*?value=([^>]+)>(.*?)</option>" matches = re.compile(patron,re.DOTALL).findall(data) ''' <select class="subnavbarform" name="vdolanguage" onchange="location.href = this[this.selectedIndex].value;"> <option selected="selected" value="http://www.totlol.com/videos?&flang=all&orderby=df&u=&c=&search_id=&catid=0">- All Languages -</option> <option value='http://www.totlol.com/videos?&flang=zz&u=&c=&search_id='>- No Language -</option><option value='http://www.totlol.com/videos?&flang=ar&u=&c=&search_id='>Arabic</option><option value='http://www.totlol.com/videos?&flang=nl&u=&c=&search_id='>Dutch</option><option value='http://www.totlol.com/videos?&flang=en&u=&c=&search_id='>English</option><option value='http://www.totlol.com/videos?&flang=fr&u=&c=&search_id='>French</option><option value='http://www.totlol.com/videos?&flang=de&u=&c=&search_id='>German</option><option value='http://www.totlol.com/videos?&flang=el&u=&c=&search_id='>Greek</option><option value='http://www.totlol.com/videos?&flang=he&u=&c=&search_id='>Hebrew</option><option value='http://www.totlol.com/videos?&flang=it&u=&c=&search_id='>Italian</option><option value='http://www.totlol.com/videos?&flang=ja&u=&c=&search_id='>Japanese</option><option value='http://www.totlol.com/videos?&flang=pt&u=&c=&search_id='>Portuguese</option><option value='http://www.totlol.com/videos?&flang=es&u=&c=&search_id='>Spanish</option> </select> ''' itemlist = [] for match in matches: scrapedtitle = match[1] scrapedurl = scrapertools.entityunescape(match[0][1:-1]) scrapedthumbnail = "" scrapedplot = "" #if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="categoria" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , folder=True) ) return itemlist
def novedades(item): logger.info("[capitancinema.py] novedades") # Descarga la página data = scrapertools.cachePage(item.url) # Extrae las entradas (carpetas) patronvideos = '<td width="23\%"><a href="([^"]+)"[^>]+><img style="[^"]+" src="([^"]+)" border="0" alt="([^"]+)"[^>]+></a></td>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist = [] for match in matches: # Atributos scrapedtitle = match[2] scrapedtitle = scrapedtitle.replace(""", "") scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedurl = urlparse.urljoin(item.url, match[0]) scrapedthumbnail = urlparse.urljoin(item.url, match[1]) scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=CHANNELNAME, action="mirrors", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) return itemlist
def novedades(item): logger.info("[capitancinema.py] novedades") # Descarga la página data = scrapertools.cachePage(item.url) # Extrae las entradas (carpetas) patronvideos = '<td width="23\%"><a href="([^"]+)"[^>]+><img style="[^"]+" src="([^"]+)" border="0" alt="([^"]+)"[^>]+></a></td>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist=[] for match in matches: # Atributos scrapedtitle = match[2] scrapedtitle = scrapedtitle.replace(""","") scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = urlparse.urljoin(item.url,match[1]) scrapedplot = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=CHANNELNAME, action="mirrors", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) return itemlist
def listtemporadacaratula(item): logger.info("[cinetube.py] listtemporadacaratula") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) # Extrae las entradas """ <li> <a href="/series/en-tierra-de-lobos/temporada-1/capitulo-12/"><img src="http://caratulas.cinetube.es/series/8912.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="espanol" /> <img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="megavideo.png" /> <img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> <p><span class="rosa"></span></p></div> <p class="tit_ficha"><a class="tit_ficha" title="Ver serie Tierra de lobos" href="/series/en-tierra-de-lobos/temporada-1/capitulo-12/">Tierra de lobos </a></p> <p class="tem_fich">1a Temporada - Cap 12</p> </li> """ patronvideos = "<li>[^<]+" patronvideos += '<a href="([^"]+)"><img src="([^"]+)"[^>]+></a>[^>]+' patronvideos += '<div class="icos_lg">(.*?)</div>.*?' patronvideos += '<p class="tit_ficha"><a[^>]+>([^<]+)</a></p>[^<]+' patronvideos += '<p class="tem_fich">([^<]+)</p>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = match[3].strip() + " - " + match[4].strip() matchesconectores = re.compile('<img.*?alt="([^"]*)"', re.DOTALL).findall(match[2]) conectores = "" for matchconector in matchesconectores: logger.info("matchconector=" + matchconector) if matchconector == "": matchconector = "megavideo" conectores = conectores + matchconector + "/" if len(matchesconectores) > 0: scrapedtitle = scrapedtitle + " (" + conectores[:-1] + ")" scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedplot = "" scrapedurl = urlparse.urljoin(item.url, match[0]) scrapedthumbnail = match[1] if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item( channel=CHANNELNAME, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True, ) ) return itemlist
def videolist(params, url, category): xbmc.output("[clantv.py] videolist") # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- data = scrapertools.cachePage2( url, [ ["Referer", "http://www.rtve.es/infantil/videos-juegos/#/videos/edebits/todos/"], [ "User-Agent", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; .NET CLR 3.0.04506; InfoPath.2)", ], ], ) xbmc.output(data) # -------------------------------------------------------- # Extrae los capítulos # -------------------------------------------------------- patron = '<video id="[^"]+" thumbnail="([^"]+)" url="([^"]+)" publication_date="([^T]+)T[^>]+>[^<]+<title>([^<]+)</title>[^<]+<sinopsis([^<]+)<' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: try: scrapedtitle = unicode(match[3], "utf-8").encode("iso-8859-1") except: scrapedtitle = match[3] scrapedtitle = scrapedtitle + " (" + match[2] + ")" scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedurl = urlparse.urljoin(url, match[1]) scrapedthumbnail = urlparse.urljoin(url, match[0]) scrapedplot = match[4] # Depuracion if DEBUG: xbmc.output("scrapedtitle=" + scrapedtitle) xbmc.output("scrapedurl=" + scrapedurl) xbmc.output("scrapedthumbnail=" + scrapedthumbnail) # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELCODE, "play", CHANNELNAME, "", scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot ) # Label (top-right)... xbmcplugin.setPluginCategory(handle=pluginhandle, category=category) # Disable sorting... # xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory(handle=pluginhandle, succeeded=True)
def episodios(params,url,category): xbmc.output("[tresmellizas.py] episodios") # Descarga la página data = scrapertools.cachePage(url) #xbmc.output(data) # Extrae las entradas (videos, el parámetro url es el id de la serie) ''' <tr> <td width="106" valign="top"><img src="./img_captures/3bb_weare.jpg" alt="Som Les Tres Bebès" width="106" height="78" vspace="5" /></td> <td width="119" valign="top"> <form id="form1" name="form1" method="post" action="cine.php"> <table width="50" border="0" cellpadding="0" cellspacing="0"> <tr> <td><label><select name="idiomesCombo" id="idiomesCombo"><option value="3">English</option><option value="6">Français</option></select></label> <label></label></td> <td valign="middle"><input type="submit" name="enviar" id="enviar" value=">"/></td> </tr> </table> <label></label> <input name="idCapitol" type="hidden" id="idCapitol" value="4"> <input name="idIdiomaInterficie" type="hidden" id="idIdiomaInterficie" value="1"> </form> <p> </p></td> </tr><tr> <td width="5" rowspan="2" valign="top"> </td> <td colspan="2" valign="top" class="titolcapitol">Les Tres Bebès Flors i papallones</td> </tr> ''' patronvideos = '<tr>[^<]+' patronvideos += '<td[^>]+><img src="([^"]+)" alt="([^"]+)"[^>]+>.*?' patronvideos += '<select name="idiomesCombo" id="idiomesCombo">(.*?)</select>.*?' patronvideos += '<input name="idCapitol" type="hidden" id="idCapitol" value="([^"]+)">[^<]+' patronvideos += '<input name="idIdiomaInterficie" type="hidden" id="idIdiomaInterficie" value="([^"]+)">' matches = re.compile(patronvideos,re.DOTALL).findall(data) if (DEBUG): scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[1] scrapedurl = "" scrapedthumbnail = urlparse.urljoin(url,match[0]) scrapedplot = "" if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") patronidiomas = '<option.*?value="([^"]+)">([^<]+)</option>' matchesidiomas = re.compile(patronidiomas,re.DOTALL).findall(match[2]) for matchidioma in matchesidiomas: scrapedtitle = scrapertools.entityunescape(match[1] + " (" + matchidioma[1] + ")") scrapedurl = "idiomesCombo="+matchidioma[0]+"&enviar=%3E&idCapitol="+match[3]+"&idIdiomaInterficie="+match[4] xbmctools.addnewvideo( CHANNELCODE , "play" , category , "Directo" , scrapedtitle , scrapedurl , scrapedthumbnail , scrapedplot ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_LABEL ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def videolist(params,url,category): xbmc.output("[publicotv.py] videolist") # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- data = scrapertools.cachePage(url) #xbmc.output(data) # Extrae los vídeos patron = '<div class="video-overview a1">[^<]+' patron += '<a href="([^"]+)" title="Play">' patron += '<img.*?src="(.*?)".*?title="([^"]+)"[^>]+></a>\W*<h4></h4>\W*<p class="title">(.*?)</p>\W*<div class="video-info-line">\W*<p>(.*?)</p>\W*<p>(.*?)</p>\W*</div>\W*</div>' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[3] + " ("+match[5]+") ("+match[4]+")" scrapedurl = urlparse.urljoin(url,match[0]) scrapedthumbnail = urlparse.urljoin(url,match[1]) scrapedplot = scrapertools.entityunescape(match[2]) seppos = scrapedplot.find("--") scrapedplot = scrapedplot[seppos+2:] if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELCODE , "play" , CHANNELNAME , "" , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) # Página siguiente patron = '<a href="([^"]+)" title="Ir a la siguiente[^"]+">Siguiente \»\;</a></div>' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) if len(matches)>0: match = matches[0] scrapedtitle = "Página siguiente" scrapedurl = urlparse.urljoin(url,match) scrapedthumbnail = "" scrapedplot = "" if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELCODE , "videolist" , CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail , scrapedplot ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) # Disable sorting... xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def folderlist(params, url, category): xbmc.output("[rtvemediateca.py] folderlist") title = urllib.unquote_plus(params.get("title")) url = "http://www.rtve.es/mediateca/video/" + category + "/medialist.inc" # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- data = scrapertools.cachePage(url) # xbmc.output(data) # -------------------------------------------------------- # Extrae las carpetas (nivel 1) # -------------------------------------------------------- # primer nivel # <li id="la-2-noticias" class="node-end"><a rel="nofollow" href="javascript://" onclick="loadVideos('noticias/la-2-noticias');" class="sup">La 2 Noticias</a></li> # <li id="informativos-territoriales" class="expandable"><span onclick="loadVideos('noticias/informativos-territoriales');" class="closed">Informativos territoriales</span><ul> # segundo nivel # <li id="informatiu-balear"><a rel="nofollow" href="javascript://" onclick="loadVideos('noticias/informativos-territoriales/informatiu-balear');" class="inf">Informatiu Balear</a></li> patron = '<li id="([^"]+)" class="([^"]+)"><.*?onclick="([^"]+)"[^>]*>([^<]+)<' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: try: scrapedtitle = unicode(match[3], "utf-8").encode("iso-8859-1") except: scrapedtitle = match[3] scrapedtitle = scrapertools.entityunescape(scrapedtitle) urlrelativa = match[2][12:-3] # scrapedurl = "http://www.rtve.es/mediateca/video/"+urlrelativa+"/pagines_ajax/pagina1.html" scrapedurl = match[0] scrapedthumbnail = "" scrapedplot = "" if DEBUG: xbmc.output("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC # addvideo( scrapedtitle , scrapedurl , category ) if match[1] == "node-end": xbmctools.addnewfolder( CHANNELCODE, "videolist", urlrelativa, scrapedtitle, "", scrapedthumbnail, scrapedplot ) elif match[1] == "expandable": xbmctools.addnewfolder( CHANNELCODE, "subfolderlist", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot ) xbmcplugin.setPluginCategory(handle=pluginhandle, category=category) xbmcplugin.addSortMethod(handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE) xbmcplugin.endOfDirectory(handle=pluginhandle, succeeded=True)
def videolist2(params,url,category): xbmc.output("[tva.py] videolist2") # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- data = scrapertools.cachePage(url) #xbmc.output(data) # -------------------------------------------------------- # Extrae los videos # -------------------------------------------------------- #patron = '<div class="texto">.*?<a href="([^"]+)">([^<]+)(.*?)</div>.*?<img src="([^"]+)"' titulo = params.get("title") patron='urlToPlay=([^"]+)\'>' match = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(match) xbmc.output(match[0]) scrapedtitle = scrapertools.entityunescape(match[0]) # TODO: Sacar la fecha de la descripcion #patronfechas = "<p>Emissió: ([^<]+)<" #matchesfechas = re.compile(patronfechas,re.DOTALL).findall(match[2]) #if len(matchesfechas)>0: # scrapedtitle = scrapedtitle + " (" + matchesfechas[0] + ")" scrapedurl = match[0] scrapedthumbnail = "" scrapedplot = "" #scrapedplot = "%s" % match[2] #scrapedplot = scrapedplot.strip() #scrapedplot = scrapedplot.replace("</a>","") #scrapedplot = scrapedplot.replace("</p>","") #scrapedplot = scrapedplot.replace("<p>","") #scrapedplot = scrapertools.entityunescape(scrapedplot) if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC #addvideo( scrapedtitle , scrapedurl , category ) xbmctools.addnewvideo( CHANNELCODE , "play" , CHANNELNAME , "" , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) #server = "Directo" #xbmctools.playvideo(CHANNELNAME,server,scrapedurl,CHANNELNAME,scrapedtitle,scrapedthumbnail,scrapedplot) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) #xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def mainlist(params, url, category): xbmc.output("[clantv.py] mainlist") url = "http://www.rtve.es/infantil/videos-juegos/#/videos/clan/todos/" # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- data = scrapertools.cachePage(url) # xbmc.output(data) # -------------------------------------------------------- # Extrae las categorias (carpetas) # -------------------------------------------------------- patron = '<li.*?><a rel="([^"]+)" title="[^"]+" href="([^"]+)"><strong>([^<]+)</strong><img src="([^"]+)".*?><span>([^<]+)</span>' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: try: scrapedtitle = unicode(match[2], "utf-8").encode("iso-8859-1") except: scrapedtitle = match[2] scrapedtitle = scrapedtitle + " (" + match[4].replace("í", "i") + ")" scrapedtitle = scrapertools.entityunescape(scrapedtitle) # scrapedurl = "http://www.rtve.es/infantil/components/"+match[0]+"/videos/videos-1.inc" scrapedurl = "http://www.rtve.es/infantil/components/" + match[0] + "/videos.xml.inc" scrapedthumbnail = urlparse.urljoin(url, match[3]) scrapedplot = "" # Depuracion if DEBUG: xbmc.output("scrapedtitle=" + scrapedtitle) xbmc.output("scrapedurl=" + scrapedurl) xbmc.output("scrapedthumbnail=" + scrapedthumbnail) # Añade al listado de XBMC # addvideo( scrapedtitle , scrapedurl , category ) xbmctools.addnewfolder( CHANNELCODE, "videolist", CHANNELNAME, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot ) # Label (top-right)... xbmcplugin.setPluginCategory(handle=pluginhandle, category=category) # Disable sorting... xbmcplugin.addSortMethod(handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE) # End of directory... xbmcplugin.endOfDirectory(handle=pluginhandle, succeeded=True)
def anyadevideos(matches): if DEBUG: scrapertools.printMatches(matches) ''' patron = '<li id="video-(\d+)" class="videoThumbnail">[^<]+' [0] 244902 patron += '<div>[^<]+' patron += "<a.*?href='([^']+)'[^>]+>[^<]+" [1] .shtml patron += '<img src="([^"]+)" alt="([^"]+)"[^>]+>[^<]+' [2] .jpg [3] Telediario Internacional Edición 18 horas (10/04/10) patron += '<img alt="Reproducir"[^<]+' patron += '</a>[^<]+' patron += '<h3>[^<]+' patron += '<a[^>]+>([^<]+)</a>[^<]+' [4] TD Internacional 18h (10/04/10) patron += '</h3>[^<]+' patron += '<p>([^<]+)</p>[^<]+' [5] Polonia, de luto. El presidente del Parlamento asume las funciones de Kaczynski. patron += '<span>([^<]+)<' [6] Emitido: 10/04/2010 / 18:00h ''' for match in matches: patron = "Emitido:\s+([^\s]+)\s+\/\s+(\d+)\:(\d+)h" fechahora = re.compile(patron,re.DOTALL).findall(match[6]) if DEBUG: scrapertools.printMatches(fechahora) if len(fechahora)>0: scrapedtitle = scrapertools.entityunescape(match[4] + " ("+fechahora[0][0]+") (" + fechahora[0][1]+"'"+fechahora[0][2]+'")') else: scrapedtitle = scrapertools.entityunescape(match[4]) scrapedurl = urlparse.urljoin("http://www.rtve.es",match[1]) scrapedthumbnail = "http://www.rtve.es%s" % match[2] scrapedplot = scrapertools.entityunescape(match[3].strip()+"\n"+match[5].strip()) # Depuracion if (DEBUG): xbmc.output("scrapedtitle="+scrapedtitle) xbmc.output("scrapedurl="+scrapedurl) xbmc.output("scrapedthumbnail="+scrapedthumbnail) # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELCODE , "play" , CHANNELNAME , "" , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot )
def listado(item): logger.info("[totlol.py] listado") # Descarga la página data = scrapertools.cachePage(item.url) #logger.info(data) patron = '<select class="subnavbarform" name="vdoorderby"[^>]+>(.*?)</select>' matches = re.compile(patron,re.DOTALL).findall(data) #if DEBUG: scrapertools.printMatches(matches) data = matches[0] patron = "<option.*?value=([^>]+)>(.*?)</option>" matches = re.compile(patron,re.DOTALL).findall(data) ''' <select class="subnavbarform" name="vdoorderby" onchange="location.href = this[this.selectedIndex].value;"> <option value="http://www.totlol.com/videos?&orderby=df&u=&c=&search_id=&catid=0">- All Categories -</option> <option value='http://www.totlol.com/videos?&catid=1&u=&c=&search_id='>Animals</option><option value='http://www.totlol.com/videos?&catid=6&u=&c=&search_id='>Animated</option><option value='http://www.totlol.com/videos?&catid=16&u=&c=&search_id='>Classic</option><option value='http://www.totlol.com/videos?&catid=17&u=&c=&search_id='>Dance</option><option value='http://www.totlol.com/videos?&catid=13&u=&c=&search_id='>Educational</option><option value='http://www.totlol.com/videos?&catid=24&u=&c=&search_id='>Filmed or Live action</option><option value='http://www.totlol.com/videos?&catid=8&u=&c=&search_id='>Food and Drinks</option><option value='http://www.totlol.com/videos?&catid=2&u=&c=&search_id='>Funny</option><option value='http://www.totlol.com/videos?&catid=9&u=&c=&search_id='>Games and Sports</option><option value='http://www.totlol.com/videos?&catid=7&u=&c=&search_id='>Holidays and Special Events</option><option value='http://www.totlol.com/videos?&catid=26&u=&c=&search_id='>Movie</option><option value='http://www.totlol.com/videos?&catid=23&u=&c=&search_id='>Music</option><option value='http://www.totlol.com/videos?&catid=14&u=&c=&search_id='>Places</option><option value='http://www.totlol.com/videos?&catid=19&u=&c=&search_id='>Puppets Etc.</option><option value='http://www.totlol.com/videos?&catid=3&u=&c=&search_id='>Songs</option><option value='http://www.totlol.com/videos?&catid=15&u=&c=&search_id='>Stories</option><option value='http://www.totlol.com/videos?&catid=4&u=&c=&search_id='>Transportation</option><option value='http://www.totlol.com/videos?&catid=27&u=&c=&search_id='>TV show</option><option value='http://www.totlol.com/videos?&catid=20&u=&c=&search_id='>Videos of Babies or Infants</option><option value='http://www.totlol.com/videos?&catid=22&u=&c=&search_id='>Videos of Kindergarten or School Kids</option><option value='http://www.totlol.com/videos?&catid=21&u=&c=&search_id='>Videos of Toddlers or Preschoolers</option><option value='http://www.totlol.com/videos?&catid=25&u=&c=&search_id='>Videos of Tweens or Teens</option> </select> ''' itemlist = [] import config itemlist.append( Item(channel=CHANNELNAME, title=config.getLocalizedString(30103) , action="search" , folder=True) ) for match in matches: scrapedtitle = match[1] scrapedurl = scrapertools.entityunescape(match[0][1:-1]) scrapedthumbnail = "" scrapedplot = "" #if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="listado" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , folder=True) ) return itemlist
def getlistpeliconcaratula(params,url,category): logger.info("[cinetube.py] getlistpeliconcaratula") # ------------------------------------------------------ # Descarga la página # ------------------------------------------------------ data = scrapertools.cachePage(url) #logger.info(data) # ------------------------------------------------------ # Extrae las entradas # ------------------------------------------------------ # seccion novedades ''' <!--PELICULA--> <div class="peli_item textcenter"> <div class="pelicula_img"><a href='/peliculas/thriller/ver-pelicula-un-segundo-despues-2.html' > <img src="http://caratulas.cinetube.es/pelis/7058.jpg" alt="Un segundo después 2" /></a> </div><a href="/peliculas/thriller/ver-pelicula-un-segundo-despues-2.html" ><div class="dvdrip"></div></a><a href='/peliculas/thriller/ver-pelicula-un-segundo-despues-2.html' ><p class="white">Un segundo después 2</p></a><p><span class="rosa">DVD-RIP</span></p><div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="espanol" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> </div> <!--FIN PELICULA--> ''' # listado alfabetico ''' <!--PELICULA--> <div class="peli_item textcenter"> <div class="pelicula_img"><a href="/peliculas/musical/ver-pelicula-a-chorus-line.html"> <img src="http://caratulas.cinetube.es/pelis/246.jpg" alt="A Chorus Line" /></a> </div> <a href="/peliculas/musical/ver-pelicula-a-chorus-line.html"><p class="white">A Chorus Line</p></a> <p><span class="rosa">DVD-RIP</span></p><div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="espanol" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> </div> <!--FIN PELICULA--> ''' patronvideos = '<!--PELICULA-->[^<]+' patronvideos += '<div class="peli_item textcenter">[^<]+' patronvideos += '<div class="pelicula_img"><a[^<]+' patronvideos += '<img src="([^"]+)"[^<]+</a>[^<]+' patronvideos += '</div[^<]+<a href="([^"]+)".*?<p class="white">([^<]+)</p>.*?<p><span class="rosa">([^>]+)</span></p><div class="icos_lg">(.*?)</div>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: # Titulo scrapedtitle = match[2] + " [" + match[3] + "]" matchesconectores = re.compile('<img.*?alt="([^"]*)"',re.DOTALL).findall(match[4]) conectores = "" for matchconector in matchesconectores: logger.info("matchconector="+matchconector) if matchconector=="": matchconector = "megavideo" conectores = conectores + matchconector + "/" if len(matchesconectores)>0: scrapedtitle = scrapedtitle + " (" + conectores[:-1] + ")" # Convierte desde UTF-8 y quita entidades HTML try: scrapedtitle = unicode( scrapedtitle, "utf-8" ).encode("iso-8859-1") except: pass scrapedtitle = scrapertools.entityunescape(scrapedtitle) # procesa el resto scrapedplot = "" scrapedurl = urlparse.urljoin("http://www.cinetube.es/",match[1]) scrapedthumbnail = match[0] if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, action="listmirrors", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) # ------------------------------------------------------ # Extrae el paginador # ------------------------------------------------------ #<li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(url,matches[0]) itemlist.append( Item(channel=CHANNELNAME, action="listpeliconcaratula", title="!Página siguiente" , url=scrapedurl , folder=True) ) return itemlist
def listseriesincaratula(params, url, category): logger.info("[cinetube.py] listseriesincaratula") # ------------------------------------------------------ # Descarga la página # ------------------------------------------------------ data = scrapertools.cachePage(url) # logger.info(data) # ------------------------------------------------------ # Extrae las entradas # ------------------------------------------------------ """ <!--SERIE--> <div class="pelicula_bar_border"> <div class="pelicula_bar series iframe3"> <ul class="tabs-nav" id="video-1687"> <li><span style="cursor:pointer;" class="peli_ico_1 bold" onclick="location.href='/peliculas/drama/ver-pelicula-belleza-robada.html'">Belleza robada </a></span></li> <li><a class="peli_ico_3" style="margin-left:10px;" href="/peliculas/drama/ver-pelicula-belleza-robada.html"><span> Ver ficha</span></a></li> </ul> </div> <div id="p_ver1" class="peli_bg1" style="display:none;"> </div> </div> """ """ <!--SERIE--> <div class="pelicula_bar_border"> <div class="pelicula_bar series iframe3"> <ul class="tabs-nav" id="video-1692"> <li><span style="cursor:pointer;" class="peli_ico_1 bold" onclick="location.href='/peliculas/terror/ver-pelicula-bendicion-mortal.html'">Bendición mortal </a></span></li> <li><a class="peli_ico_3" style="margin-left:10px;" href="/peliculas/terror/ver-pelicula-bendicion-mortal.html"><span> Ver ficha</span></a></li> </ul> </div> <div id="p_ver1" class="peli_bg1" style="display:none;"> </div> </div> """ patronvideos = "<!--SERIE-->[^<]+" patronvideos += '<div class="pelicula_bar_border">[^<]+' patronvideos += '<div class="pelicula_bar series iframe3">[^<]+' patronvideos += '<ul class="tabs-nav" id="([^"]+)">[^<]+' patronvideos += "<li><span[^>]+>([^<]+)</a></span></li>[^<]+" patronvideos += '<li><a.*?href="([^"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo try: scrapedtitle = unicode(match[1], "utf-8").encode("iso-8859-1") except: scrapedtitle = match[1] scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedplot = "" scrapedurl = urlparse.urljoin(url, match[2]) scrapedthumbnail = "" if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELNAME, "findvideos", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot ) # ------------------------------------------------------ # Extrae el paginador # ------------------------------------------------------ # <li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: xbmctools.addnewfolder( CHANNELNAME, "listseriesincaratula", category, "!Página siguiente", urlparse.urljoin(url, matches[0]), "", "", ) # Label (top-right)... xbmcplugin.setPluginCategory(handle=int(sys.argv[1]), category=category) xbmcplugin.addSortMethod(handle=int(sys.argv[1]), sortMethod=xbmcplugin.SORT_METHOD_TITLE) xbmcplugin.endOfDirectory(handle=int(sys.argv[1]), succeeded=True)
def listserieconcaratula(params, url, category): logger.info("[cinetube.py] listserieconcaratula") # Descarga la página data = scrapertools.cachePage(url) # logger.info(data) # Extrae las entradas """ <li> <a href="/series/las-reglas-del-juego-leverage/temporada-3/capitulo-15/"><img src="http://caratulas.cinetube.es/series/166.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/sub.png" alt="sub" /> <img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="megavideo.png" /> <img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> <p><span class="rosa"></span></p></div> <p class="tit_ficha"><a class="tit_ficha" title="Ver serie Las reglas del juego (Leverage)" href="/series/las-reglas-del-juego-leverage/temporada-3/capitulo-15/">Las reglas del juego (Leverage) </a></p> <p class="tem_fich">3a Temporada - Cap 15</p> </li> """ patronvideos = "<li>[^<]+" patronvideos += '<a href="([^"]+)"><img src="([^"]+)"[^>]+></a>.*?' patronvideos += '<div class="icos_lg">(.*?)</div>[^<]+' patronvideos += '<p class="tem_ficha">([^<]+)</p>[^<]+' patronvideos += "</li>" matches = re.compile(patronvideos, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = match[3].strip() matchesconectores = re.compile('<img.*?alt="([^"]*)"', re.DOTALL).findall(match[2]) conectores = "" for matchconector in matchesconectores: logger.info("matchconector=" + matchconector) if matchconector == "": matchconector = "megavideo" conectores = conectores + matchconector + "/" if len(matchesconectores) > 0: scrapedtitle = scrapedtitle + " (" + conectores[:-1] + ")" # Convierte desde UTF-8 y quita entidades HTML try: scrapedtitle = unicode(scrapedtitle, "utf-8").encode("iso-8859-1") except: pass scrapedtitle = scrapertools.entityunescape(scrapedtitle) # procesa el resto scrapedplot = "" scrapedurl = urlparse.urljoin(url, match[0]) scrapedthumbnail = match[1] if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELNAME, "findvideos", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot ) # ------------------------------------------------------ # Extrae el paginador # ------------------------------------------------------ # <li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: xbmctools.addnewfolder( CHANNELNAME, "listserieconcaratula", category, "!Página siguiente", urlparse.urljoin(url, matches[0]), "", "", ) # Label (top-right)... xbmcplugin.setPluginCategory(handle=int(sys.argv[1]), category=category) xbmcplugin.addSortMethod(handle=int(sys.argv[1]), sortMethod=xbmcplugin.SORT_METHOD_NONE) xbmcplugin.endOfDirectory(handle=int(sys.argv[1]), succeeded=True)
def listpelisincaratula(item): logger.info("[cinetube.py] listpelisincaratula") url = item.url # ------------------------------------------------------ # Descarga la página # ------------------------------------------------------ data = scrapertools.cachePage(url) # logger.info(data) # ------------------------------------------------------ # Extrae las entradas # ------------------------------------------------------ """ <!--SERIE--> <div class="pelicula_bar_border"> <div class="pelicula_bar series iframe3"> <ul class="tabs-nav" id="video-1687"> <li><span style="cursor:pointer;" class="peli_ico_1 bold" onclick="location.href='/peliculas/drama/ver-pelicula-belleza-robada.html'">Belleza robada </a></span></li> <li><a class="peli_ico_3" style="margin-left:10px;" href="/peliculas/drama/ver-pelicula-belleza-robada.html"><span> Ver ficha</span></a></li> </ul> </div> <div id="p_ver1" class="peli_bg1" style="display:none;"> </div> </div> """ """ <!--SERIE--> <div class="pelicula_bar_border"> <div class="pelicula_bar series iframe3"> <ul class="tabs-nav" id="video-1692"> <li><span style="cursor:pointer;" class="peli_ico_1 bold" onclick="location.href='/peliculas/terror/ver-pelicula-bendicion-mortal.html'">Bendición mortal </a></span></li> <li><a class="peli_ico_3" style="margin-left:10px;" href="/peliculas/terror/ver-pelicula-bendicion-mortal.html"><span> Ver ficha</span></a></li> </ul> </div> <div id="p_ver1" class="peli_bg1" style="display:none;"> </div> </div> """ patronvideos = "<!--SERIE-->[^<]+" patronvideos += '<div class="pelicula_bar_border">[^<]+' patronvideos += '<div class="pelicula_bar series iframe3">[^<]+' patronvideos += '<ul class="tabs-nav" id="([^"]+)">[^<]+' patronvideos += "<li><span[^>]+?>.+?<a[^>]+?>([^<]+)</a></span></li>[^<]+" patronvideos += '<li><a.*?href="([^"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist = [] for match in matches: try: scrapedtitle = unicode(match[1], "utf-8").encode("iso-8859-1") except: scrapedtitle = match[1] scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedplot = "" scrapedurl = urlparse.urljoin("http://www.cinetube.es/", match[2]) scrapedthumbnail = "" if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item( channel=CHANNELNAME, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True, ) ) # ------------------------------------------------------ # Extrae el paginador # ------------------------------------------------------ # <li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedurl = urlparse.urljoin(url, matches[0]) itemlist.append( Item( channel=CHANNELNAME, action="listpelisincaratula", title="!Página siguiente", url=scrapedurl, folder=True, ) ) return itemlist
def listserieconcaratula(params, url, category): logger.info("[programastv.py] listserieconcaratula") # ------------------------------------------------------ # Descarga la página # ------------------------------------------------------ data = scrapertools.cachePage(url) #logger.info(data) # ------------------------------------------------------ # Extrae las entradas # ------------------------------------------------------ ''' <li> <a href="/series/the-middleman/temporada-1/"><img src="http://caratulas.programastv.es/series/233.jpg" alt="peli" /></a> <p><span class="rosa"></span></p><div class="icos_lg"><img src="http://caratulas.programastv.es/img/cont/sub.png" alt="sub" /><img src="http://caratulas.programastv.es/img/cont/megavideo.png" alt="" /> </div> <p class="tit_ficha">The middleman </p> <p class="tem_fich">1a Temporada</p> </li> ''' ''' <li> <a href="/series/cranford-de-e-gaskell/"><img src="http://caratulas.programastv.es/series/64.jpg" alt="peli" /></a> <p><span class="rosa"></span></p><div class="icos_lg"><img src="http://caratulas.programastv.es/img/cont/sub.png" alt="sub" /><img src="http://caratulas.programastv.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.programastv.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> <p class="tit_ficha">Cranford, de E. Gaskell </p> </li> ''' patronvideos = '<li>[^<]+' patronvideos += '<a href="([^"]+)"><img src="([^"]+)"[^>]+></a>[^<]+' patronvideos += '<p><span class="rosa"></span></p><div class="icos_lg">(.*?)</div>[^<]+' patronvideos += '<p class="tit_ficha">([^<]+)</p>[^<]+' patronvideos += '</li>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = match[3].strip() matchesconectores = re.compile('<img.*?alt="([^"]*)"', re.DOTALL).findall(match[2]) conectores = "" for matchconector in matchesconectores: logger.info("matchconector=" + matchconector) if matchconector == "": matchconector = "megavideo" conectores = conectores + matchconector + "/" if len(matchesconectores) > 0: scrapedtitle = scrapedtitle + " (" + conectores[:-1] + ")" # Convierte desde UTF-8 y quita entidades HTML try: scrapedtitle = unicode(scrapedtitle, "utf-8").encode("iso-8859-1") except: pass scrapedtitle = scrapertools.entityunescape(scrapedtitle) # procesa el resto scrapedplot = "" scrapedurl = urlparse.urljoin(url, match[0]) scrapedthumbnail = match[1] if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC xbmctools.addnewfolder(CHANNELNAME, "listmirrors", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot) # ------------------------------------------------------ # Extrae el paginador # ------------------------------------------------------ #<li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: xbmctools.addnewfolder(CHANNELNAME, "listserieconcaratula", category, "!Página siguiente", urlparse.urljoin(url, matches[0]), "", "") # Label (top-right)... xbmcplugin.setPluginCategory(handle=int(sys.argv[1]), category=category) xbmcplugin.addSortMethod(handle=int(sys.argv[1]), sortMethod=xbmcplugin.SORT_METHOD_NONE) xbmcplugin.endOfDirectory(handle=int(sys.argv[1]), succeeded=True)
def ListarVideos(params, url, category): url1 = "http://www.series21.com" url1 = urlparse.urljoin(url1, url) title = urllib.unquote_plus(params.get("title")) thumbnail = urllib.unquote_plus(params.get("thumbnail")) plot = urllib.unquote_plus(params.get("plot")) data = scrapertools.cachePage(url1) # Busca el area donde estan los videos y la descripcion patronvideos = '<div id="content">(.*?)<!-- FIN #content-->' matches = re.compile(patronvideos, re.DOTALL).findall(data) matchesBK = matches[0] # Extrae las entradas (videos) para megavideo patronvideos = '<span style="font-size:12px;"><strong>(.*?)</strong></span><br/>.*?' patronvideos += '<span.*?>.*?<a href="http\:\/\/www.megavideo.com\/([\?v=|v/|\?d=]+)([A-Z0-9]{8}).*?" target' matches = re.compile(patronvideos, re.DOTALL).findall(matches[0]) scrapertools.printMatches(matches) encontrados = set() for match in matches: if match[2] not in encontrados: encontrados.add(match[2]) if 'v' in match[1]: server = "Megavideo" else: server = "Megaupload" doblaje = scrapertools.entityunescape(match[0]) # Titulo scrapedtitle = title + " - [" + doblaje + "]" + " (" + server + ")" # URL scrapedurl = match[2] # Thumbnail scrapedthumbnail = thumbnail # Argumento #print 'este es el plot %s ' %plot #print ' doblaje %s ' %doblaje scrapedplot = plot if ("Español" in plot) and not (doblaje in plot): scrapedplot = scrapedplot.replace("Español", doblaje) elif "subtitulado" in plot and not (doblaje in plot): scrapedplot = scrapedplot.replace( "Versión original (subtitulado)", doblaje) elif not doblaje in plot: scrapedplot += "\n" + "Doblaje : " + doblaje # Depuracion if (DEBUG): logger.info("scrapedtitle=" + scrapedtitle) logger.info("scrapedurl=" + scrapedurl) logger.info("scrapedthumbnail=" + scrapedthumbnail) # Añade al listado de XBMC xbmctools.addnewvideo(CHANNELNAME, "play", category, server, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot) if len(matches) == 0: listavideos = servertools.findvideos(data) encontrados = set() for titulo, scrapedurl, servidor in listavideos: if scrapedurl.strip() not in encontrados: encontrados.add(scrapedurl.strip()) xbmctools.addnewvideo(CHANNELNAME, "play", category, servidor, title + " - %s" % titulo, scrapedurl, thumbnail, plot) patronvideos = '<span class="bloque-doblaje">(.+?)</span>[^<]+' patronvideos += '<span class="bloque-link">[^<]+<a href="javascript\:goTo\(\'([^\']+)\'\, \'([^\']+)\'\)"(.+?)</span>' #patronvideos +='(?:\| <a href="javascript\:goTo\(\'([^\']+)\'\, \'([^\']+)\'\)".*?)</span>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # URL if "megavideo" in match[2]: server = "Megavideo" elif "megaupload" in match[2]: server = "Megaupload" if "esp.gif" in match[0]: doblaje = "Español" else: doblaje = match[0].strip() base64 = decrypt21.Base64() try: url2 = re.compile( "javascript\:goTo\(\'([^\']+)\'\, \'([^\']+)\'\)").findall( match[3])[0] scrapedurl2 = base64._extract_code(base64.decode(url2[0])) scrapedurl = base64._extract_code(base64.decode(match[1])) part1 = " Parte 1 " part2 = " Parte 2 " scrapedtitle2 = title + part2 + " - [" + doblaje + "]" + " (" + server + ")" #print match[3] except: scrapedurl = base64._extract_code(base64.decode(match[1])) part1 = "" part2 = "" scrapedtitle = title + part1 + " - [" + doblaje + "]" + " (" + server + ")" # Thumbnail scrapedthumbnail = thumbnail # Argumento scrapedplot = plot # Depuracion if (DEBUG): logger.info("scrapedtitle=" + scrapedtitle) logger.info("scrapedurl=" + scrapedurl) logger.info("scrapedthumbnail=" + scrapedthumbnail) # Añade al listado de XBMC xbmctools.addnewvideo(CHANNELNAME, "play", category, server, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot) if part2: xbmctools.addnewvideo(CHANNELNAME, "play", category, server, scrapedtitle2, scrapedurl2, scrapedthumbnail, scrapedplot) # Extrae las entradas (videos) directos patronvideos = 'flashvars="file=([^\&]+)\&controlbar=over' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: data1 = scrapertools.cachePage(matches[0]) #logger.info(data) patron = 'author">(.*?)</media:credit>.*?<media\:content url="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data1) scrapertools.printMatches(matches) for match in matches: # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELNAME, "play", category, "Directo", title + " - [" + match[0] + "]" + " (Directo)", match[1], thumbnail, plot) # Busca el Spoiler patronvideos = '(http://www.youtube.com[^"]+)"' matchSpoiler = re.compile(patronvideos, re.DOTALL).findall(data) if len(matchSpoiler) > 0: encontrados = set() for match in matchSpoiler: if match not in encontrados: encontrados.add(match) # Añade al listado de XBMC xbmctools.addnewvideo(CHANNELNAME, "youtubeplay", category, "Directo", "Ver El Spoiler de : " + title, match, thumbnail, "Ver Video Spoiler") # Lista series relacionadas titulo = "Ver otros capitulos de esta temporada" matches = buscarelacionados(matchesBK) plot2 = "CAPITULOS DE ESTA TEMPORADA :\n\n" for match in matches: plot2 = plot2 + "-" + match[2] + "\n" xbmctools.addnewfolderextra(CHANNELNAME, "listarelacionados", category, titulo, url, thumbnail, plot2, matchesBK) #<div class="film"><a href="/house/#t_57"><img src="/thumbs/temporadas/95/120/57.jpg" # Cambiar de Temporada patron = 'div class="film"><a href="([^"]+)"><img src="([^"]+)" style' matchSerie = re.compile(patron, re.DOTALL).findall(matchesBK) if len(matchSerie) > 1: for temp in matchSerie: url2 = urlparse.urljoin(url1, temp[0]) thumbnail = urlparse.urljoin(url1, temp[1]) titulo = "Cambiar a otras temporadas" titulo_serie = temp[0].split("/") titulo2 = titulo_serie[1].replace("-", " ") #print ' titulo%s ' %titulo2 xbmctools.addnewfolderextra(CHANNELNAME, "listarTemporada", category, titulo, url2, thumbnail, plot, titulo2) break # Label (top-right)... xbmcplugin.setPluginCategory(handle=pluginhandle, category=category) # Disable sorting... xbmcplugin.addSortMethod(handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE) # End of directory... xbmcplugin.endOfDirectory(handle=pluginhandle, succeeded=True)
def listvideos(params,url,category): logger.info("[peliculas21.py] listvideos") if url=="": url = "http://www.peliculas21.com" # Descarga la página data = scrapertools.cachePage(url) #logger.info(data) #title = urllib.unquote_plus(params.get("title")) #thumbnail = urllib.unquote_plus(params.get("thumbnail")) #plot = urllib.unquote_plus(params.get("plot")) # Busca el area donde estan los videos y la descripcion patronvideos = '<div class="peliculadoblaje">(.*?)<!-- FIN #content-->' matches = re.compile(patronvideos,re.DOTALL).findall(data) # busca el titulo y el thumbnail patronvideos = '<img src="([^"]+)"[^>]+>[^<]+<[^>]+>([^<]+)</div>' matches2 = re.compile(patronvideos,re.DOTALL).findall(matches[0]) for match in matches2: title = match[1] thumbnail = urlparse.urljoin(url,match[0]) plot = "" patronvideos = '<b>Duración:</b>(.*?)<br />' duracion = re.compile(patronvideos,re.DOTALL).findall(matches[0]) if len(duracion)>0:plot = "Duracion:"+duracion[0] + "\n" patronvideos = '<b>Género:</b>(.*?)<br />' genero = re.compile(patronvideos,re.DOTALL).findall(matches[0]) if len(genero)>0:plot = plot + "Genero: "+genero[0] +"\n" patronvideos = '<b>Sinopsis:</b>(.*?)</div>' sinopsis = re.compile(patronvideos,re.DOTALL).findall(matches[0]) # Busca los actores matchesactores = buscactores(matches[0]) if len(matchesactores)>0: plot = plot + "Actores: " c = 0 actores = "ACTORES DE ESTA PELICULA :\n\n" for match in matchesactores: c = c + 1 actores = actores + "-"+match[1] + "\n" if c == 3: plot = plot + match[1] + "\n" elif c == 4: plot = plot + "* " + match[1]+" " else: plot = plot + match[1]+ " , " plot = plot + "\nSinopsis: " + sinopsis[0] plot = re.sub("<[^>]+>"," ",plot) # Busca el trailer patronvideos = '<param name="movie" value="([^"]+)"></param>' matchtrailer = re.compile(patronvideos,re.DOTALL).findall(matches[0]) if len(matchtrailer)>0: for match in matchtrailer: # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELNAME , "youtubeplay" , category ,"Directo", "Ver El Trailer de : "+title , match , thumbnail, plot ) else: import trailertools print title s = unicode( title, "latin-1" ) # Añade al listado de XBMC xbmctools.addnewfolder( "trailertools" , "buscartrailer" , category , config.getLocalizedString(30110)+" "+title , url , os.path.join(IMAGES_PATH, 'trailertools.png'), plot ) # Buscar trailer para matchesBK = matches[0] # Extrae las entradas (videos) para megavideo con tipo de audio patronvideos = '<span style="font-size:12px;"><strong>(.*?)</strong></span><br/>.*?' patronvideos += '<span.*?>.*?<a href="http\:\/\/www.megavideo.com\/[\?v=|v/]+([A-Z0-9]{8}).*?" target="_blank">1</a>.</span><br />' matches = re.compile(patronvideos,re.DOTALL).findall(matches[0]) scrapertools.printMatches(matches) encontrados = set() for match in matches: if match[1] not in encontrados: encontrados.add(match[1]) # Titulo scrapedtitle = title + " - [" +scrapertools.entityunescape(match[0])+ "]" + " (Megavideo)" # URL scrapedurl = match[1] # Thumbnail scrapedthumbnail = thumbnail # Argumento scrapedplot = plot # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELNAME , "play" , category ,"Megavideo", scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) if len(matches)==0: listavideos = servertools.findvideos(data) encontrados = set() for titulo,scrapedurl,servidor in listavideos: if scrapedurl.strip() not in encontrados: encontrados.add(scrapedurl.strip()) xbmctools.addnewvideo( CHANNELNAME , "play" , category ,servidor, title+ " - %s" % titulo , scrapedurl , thumbnail, plot ) ''' <span class="bloque-uploader">Anónimo</span> <span class="bloque-doblaje"><img src="../images/esp.gif" class="bandera" /></span> <span class="bloque-link">Opción 8: <a href="javascript:goTo('aHR0cDovL3d3dy5tZWdhdmlkZW8uY29tLz92PTVOM0JYOVMx', 'megavideo.com')" rel="nofollow">Ver película</a></span> ''' patronvideos = '<span class="bloque-doblaje">(.+?)</span>[^<]+' patronvideos +='<span class="bloque-link">[^<]+<a href="javascript\:goTo\(\'([^\']+)\'\, \'([^\']+)\'\)"(.+?)</span>' #patronvideos +='(?:\| <a href="javascript\:goTo\(\'([^\']+)\'\, \'([^\']+)\'\)".*?)</span>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # URL if "megavideo" in match[2]: server = "Megavideo" elif "megaupload" in match[2]: server = "Megaupload" if "esp.gif" in match[0]: doblaje = "Español" else: doblaje = match[0].strip() base64 = decrypt21.Base64() try: url2 = re.compile("javascript\:goTo\(\'([^\']+)\'\, \'([^\']+)\'\)").findall(match[3])[0] scrapedurl2 = base64._extract_code(base64.decode(url2[0])) scrapedurl = base64._extract_code(base64.decode(match[1])) part1 = " Parte 1 " part2 = " Parte 2 " scrapedtitle2 = title + part2+ " - [" +doblaje+ "]" + " ("+server+")" #print match[3] except: scrapedurl = base64._extract_code(base64.decode(match[1])) part1 = "" part2 = "" scrapedtitle = title + part1+ " - [" +doblaje+ "]" + " ("+server+")" # Thumbnail scrapedthumbnail = thumbnail # Argumento scrapedplot = plot # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELNAME , "play" , category ,server, scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) if part2: xbmctools.addnewvideo( CHANNELNAME , "play" , category ,server, scrapedtitle2 , scrapedurl2 , scrapedthumbnail, scrapedplot ) # Extrae las entradas (videos) directos patronvideos = 'flashvars="file=([^\&]+)\&controlbar=over' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: data1 = scrapertools.cachePage(matches[0]) #logger.info(data) patron = 'author">(.*?)</media:credit>.*?<media\:content url="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data1) scrapertools.printMatches(matches) for match in matches: # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELNAME , "play" , category , "Directo" , title +" - ["+match[0]+"]"+ " (Directo)" , match[1] , thumbnail , plot ) # Busca otras peliculas relacionadas con los actores if len(matchesactores)>0: titulo = "Busca otros Films de los actores de esta pelicula" xbmctools.addnewfolder( CHANNELNAME , "listaractores" , category , titulo , matchesBK , thumbnail, actores ) # Lista peliculas relacionadas titulo = "Ver Peliculas Relacionadas" matches = buscarelacionados(matchesBK) plot2 = "PELICULAS RELACIONADAS :\n\n" for match in matches: plot2 = plot2 + "-"+match[1]+"\n" xbmctools.addnewfolder( CHANNELNAME , "listarelacionados" , category , titulo , matchesBK , thumbnail, plot2 ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) # Disable sorting... xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def videolist(params,url,category): xbmc.output("[rtve.py] videolist") title = urllib.unquote_plus( params.get("title") ) # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- xbmc.output("[rtve.py] videolist descarga página principal "+url) data = scrapertools.cachePage(url) #xbmc.output(data) # -------------------------------------------------------- # Extrae las categorias (carpetas) # -------------------------------------------------------- patron = '<a href="(/alacarta/todos/[^"]+)".*?>([^<]+)</a>' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: # Datos scrapedtitle = scrapertools.entityunescape(match[1].strip()) scrapedurl = urlparse.urljoin("http://www.rtve.es", match[0]) scrapedthumbnail = "" scrapedplot = "" if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC #addvideo( scrapedtitle , scrapedurl , category ) if scrapedtitle=="Recomendados" or scrapedtitle=="Temas" or scrapedtitle=="Todos A-Z" or scrapedtitle=="Archivo TVE" or scrapedtitle=="Ultimos 7 dias" or scrapedtitle=="Adelante": pass else: xbmctools.addnewfolder( CHANNELCODE , "videolist" , CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) # -------------------------------------------------------- # Extrae los videos de la página actual # -------------------------------------------------------- #patron = '<li id="video-(\d+)">\W*<div>\W*<a rel="facebox" href="([^"]+)"><img src="([^"]+)" alt="([^"]+)"><img alt="Reproducir" src="/css/i/mediateca/play.png" class="play_mini"></a>\W+<h3>\W+<a[^<]+</a>\W+</h3>\W+<p>([^<]+)</p>[^<]+<span>([^<]+)<' patron = '<li id="video-(\d+)" class="videoThumbnail">[^<]+' patron += '<div>[^<]+' patron += "<a.*?href='([^']+)'[^>]+>[^<]+" patron += '<img src="([^"]+)" alt="([^"]+)"[^>]+>[^<]+' patron += '<img alt="Reproducir"[^<]+' patron += '</a>[^<]+' patron += '<h3>[^<]+' patron += '<a[^>]+>([^<]+)</a>[^<]+' patron += '</h3>[^<]+' patron += '<p>([^<]+)</p>[^<]+' patron += '<span>([^<]+)<' matches = re.compile(patron,re.DOTALL).findall(data) anyadevideos(matches) # -------------------------------------------------------- # Extrae los videos del resto de páginas # -------------------------------------------------------- xbmc.output("Paginación...") ''' <a href="/alacarta/todos/recomendados/2.html" class=""> Adelante </a> ''' patronpaginas = '<a href="([^"]+)" class="">\s+Adelante\s+</a>' paginas = re.compile(patronpaginas,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(paginas) print paginas # Hay al menos otra página while len(paginas)>0: urlpagina = urlparse.urljoin(url,paginas[0]) xbmc.output("urlpagina="+urlpagina) datapagina = scrapertools.cachePage(urlpagina) matches = re.compile(patron,re.DOTALL).findall(datapagina) anyadevideos(matches) paginas = re.compile(patronpaginas,re.DOTALL).findall(datapagina) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) #xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_TITLE ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def listseriesincaratula(params, url, category): logger.info("[programastv.py] listseriesincaratula") # ------------------------------------------------------ # Descarga la página # ------------------------------------------------------ data = scrapertools.cachePage(url) #logger.info(data) # ------------------------------------------------------ # Extrae las entradas # ------------------------------------------------------ ''' <!--SERIE--> <div class="pelicula_bar_border"> <div class="pelicula_bar series iframe3"> <ul class="tabs-nav" id="video-1687"> <li><span style="cursor:pointer;" class="peli_ico_1 bold" onclick="location.href='/peliculas/drama/ver-pelicula-belleza-robada.html'">Belleza robada </a></span></li> <li><a class="peli_ico_3" style="margin-left:10px;" href="/peliculas/drama/ver-pelicula-belleza-robada.html"><span> Ver ficha</span></a></li> </ul> </div> <div id="p_ver1" class="peli_bg1" style="display:none;"> </div> </div> ''' ''' <!--SERIE--> <div class="pelicula_bar_border"> <div class="pelicula_bar series iframe3"> <ul class="tabs-nav" id="video-1692"> <li><span style="cursor:pointer;" class="peli_ico_1 bold" onclick="location.href='/peliculas/terror/ver-pelicula-bendicion-mortal.html'">Bendición mortal </a></span></li> <li><a class="peli_ico_3" style="margin-left:10px;" href="/peliculas/terror/ver-pelicula-bendicion-mortal.html"><span> Ver ficha</span></a></li> </ul> </div> <div id="p_ver1" class="peli_bg1" style="display:none;"> </div> </div> ''' patronvideos = '<!--SERIE-->[^<]+' patronvideos += '<div class="pelicula_bar_border">[^<]+' patronvideos += '<div class="pelicula_bar series iframe3">[^<]+' patronvideos += '<ul class="tabs-nav" id="([^"]+)">[^<]+' patronvideos += '<li><span[^>]+>([^<]+)</a></span></li>[^<]+' patronvideos += '<li><a.*?href="([^"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo try: scrapedtitle = unicode(match[1], "utf-8").encode("iso-8859-1") except: scrapedtitle = match[1] scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedplot = "" scrapedurl = urlparse.urljoin(url, match[2]) scrapedthumbnail = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC xbmctools.addnewfolder(CHANNELNAME, "listmirrors", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot) # ------------------------------------------------------ # Extrae el paginador # ------------------------------------------------------ #<li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: xbmctools.addnewfolder(CHANNELNAME, "listseriesincaratula", category, "!Página siguiente", urlparse.urljoin(url, matches[0]), "", "") # Label (top-right)... xbmcplugin.setPluginCategory(handle=int(sys.argv[1]), category=category) xbmcplugin.addSortMethod(handle=int(sys.argv[1]), sortMethod=xbmcplugin.SORT_METHOD_TITLE) xbmcplugin.endOfDirectory(handle=int(sys.argv[1]), succeeded=True)
def videolist(params, url, category): xbmc.output("[rtvemediateca.py] videolist") # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- if url == "": url = "http://www.rtve.es/mediateca/video/" + category + "/pagines_ajax/pagina1.html" data = scrapertools.cachePage(url) # xbmc.output(data) # -------------------------------------------------------- # Extrae los videos # -------------------------------------------------------- patron = '<div class="vthumb">.*?<a.*?href="([^"]+)"><img src="[^>]+><img src="([^"]+)[^>]+>.*?<a.*?href=[^>]+>([^<]+)</a></h2><span class="hour">([^<]+)</span>' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: try: scrapedtitle = unicode(match[2] + " (" + match[3] + ")", "utf-8").encode("iso-8859-1") except: scrapedtitle = match[2] + " (" + match[3] + ")" scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedurl = urlparse.urljoin(url, match[0]) try: scrapedplot = unicode(match[2], "utf-8").encode("iso-8859-1") except: scrapedplot = match[2] scrapedplot = scrapertools.entityunescape(scrapedplot) scrapedthumbnail = urlparse.urljoin(url, match[1]) if DEBUG: xbmc.output("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC xbmctools.addnewvideo("rtve", "play", CHANNELNAME, "", scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot) # -------------------------------------------------------- # Extrae los videos # -------------------------------------------------------- patron = '<a onclick="([^"]+)" href="javascript\:\/\/" rel="nofollow">Siguiente<' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) if len(matches) > 0: # pagina('content_videos','2', '/mediateca/video/programas/series/aguila-roja') # http://www.rtve.es/mediateca/video/programas/series/aguila-roja/pagines_ajax/pagina1.html # http://www.rtve.es/mediateca/video/programas/series/aguila-roja/pagines_ajax/pagina2.html scrapedtitle = "Página siguiente" scrapedurl = "http://www.rtve.es" + matches[0][30:-2] + "/pagines_ajax/pagina" + matches[0][25:26] + ".html" scrapedplot = "" scrapedthumbnail = "" if DEBUG: xbmc.output("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELCODE, "videolist", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot ) xbmcplugin.setPluginCategory(handle=pluginhandle, category=category) xbmcplugin.addSortMethod(handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE) xbmcplugin.endOfDirectory(handle=pluginhandle, succeeded=True)
def espanoles(params,url,category): xbmc.output("[rtveprogramas.py] espanoles") # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- data = scrapertools.cachePage(url) #xbmc.output(data) # -------------------------------------------------------- # Extrae las categorias (carpetas) # -------------------------------------------------------- #<div class="mark"><div class="news bg01 comp"><span class="imgL"><a href="http://www.rtve.es/television/espanoles-en-el-mundo/camerun/" title="Destino#46 Camerún"><img src="/imagenes/destino46-camerun/1267557505140.jpg" alt="Destino#46 Camerún" title="Destino#46 Camerún"/></a></span><h3 class="M "><a href="http://www.rtve.es/television/espanoles-en-el-mundo/camerun/" title="Destino#46 Camerún">Destino#46 Camerún</a></h3><div class="chapeaux">Este destino es mucho más que un país... es todo un continente. <strong>Vuelve a verlo</strong>.</div></div></div> #<div class="mark"><div class="news bg01 comp"><span class="imgT"><a href="http://www.rtve.es/television/espanoles-en-el-mundo/jalisco/" title="Destino#42 Jalisco (México)"><img src="/imagenes/destino42-jalisco-mexico/1264699947886.jpg" alt="Destino#42 Jalisco (México)" title="Destino#42 Jalisco (México)"/></a></span><h3 class="M "><a href="http://www.rtve.es/television/espanoles-en-el-mundo/jalisco/" title="Destino#42 Jalisco (México)">Destino#42 Jalisco (México)</a></h3><div class="chapeaux">¿Sabías que hay mariachis de chicas? ¿De dónde viene el tequila? <strong>Vuelve a verlo</strong>.</div></div></div><div class="mark"><div class=" patron = '<div class="mark"><div class="news bg01 comp"><span class="img."><a href="([^"]+)" title="([^"]+)"><img src="([^"]+)" alt="[^"]+" title="[^"]+"/></a></span><h. class=". "><a href="[^"]+" title="[^"]+">([^<]+)</a></h.><div class="chapeaux">([^<]+)<' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: # Datos scrapedtitle = scrapertools.entityunescape(match[3]) scrapedurl = urlparse.urljoin(url, match[0]) scrapedthumbnail = urlparse.urljoin(url, match[2]) scrapedplot = match[4] if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELCODE , "generico" , CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- url = "http://www.rtve.es/television/espanoles-en-el-mundo/programas-anteriores/" data = scrapertools.cachePage(url) #xbmc.output(data) # -------------------------------------------------------- # Extrae las categorias (carpetas) # -------------------------------------------------------- #<div class="news bg01 comp"><span class="imgT"><a href="http://www.rtve.es/television/espanoles-en-el-mundo/seul/" title="Destino#28 Seúl"> #<img src="/imagenes/destino28-seul/1264701360856.jpg" alt="Destino#28 Seúl" title="Destino#28 Seúl"/></a></span> #<h3 class="M "><a href="http://www.rtve.es/television/espanoles-en-el-mundo/seul/" title="Destino#28 Seúl">Destino#28 Seúl</a></h3><div class="chapeaux"> Viajamos a la capital de Corea del Sur, la segunda urbe más poblada del planeta.<strong> Vuelve a verlo</strong></div> patron = '<div class="news bg01 comp">[^<]*' patron += '<span class="img."><a href="([^"]+)" title="[^"]+"><img src="([^"]+)" alt="[^"]+" title="[^"]+"/></a></span>[^<]*' patron += '<h. class=". ">[^<]*' patron += '<a href="[^"]+" title="([^"]+)">([^<]+)</a>[^<]*' patron += '</h.>[^<]*' patron += '<div class="chapeaux">(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: # Datos scrapedtitle = scrapertools.entityunescape(match[3]) scrapedurl = urlparse.urljoin(url, match[0]) scrapedthumbnail = urlparse.urljoin(url, match[1]) scrapedplot = match[4] if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELCODE , "generico" , CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) # Cierra el directorio xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def series(item): logger.info("[cinetube.py] series") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) logger.info("Pagina de %d caracteres" % len(data)) # Extrae las entradas ''' <li> <a href="/series/en-tierra-de-lobos/temporada-1/capitulo-12/"><img src="http://caratulas.cinetube.es/series/8912.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="espanol" /> <img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="megavideo.png" /> <img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> <p><span class="rosa"></span></p></div> <p class="tit_ficha"><a class="tit_ficha" title="Ver serie Tierra de lobos" href="/series/en-tierra-de-lobos/temporada-1/capitulo-12/">Tierra de lobos </a></p> <p class="tem_fich">1a Temporada - Cap 12</p> </li> ''' ''' <li> <a href="/series/gabriel-un-amor-inmortal/"><img src="http://caratulas.cinetube.es/series/7952.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/latino.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> <p class="tit_ficha">Gabriel, un amor inmortal </p> </li> ''' ''' <li> <a href="/series-anime/star-driver-kagayaki-no-takuto/temporada-1/capitulo-13/"><img src="http://caratulas.cinetube.es/seriesa/9009.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/sub.png" alt="sub" /> <img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="megavideo.png" /> <img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> <p><span class="rosa"></span></p></div> <p class="tit_ficha"><a class="tit_ficha" title="Ver serie Star Driver Kagayaki no Takuto" href="/series-anime/star-driver-kagayaki-no-takuto/temporada-1/capitulo-13/">Star Driver Kagayaki no Takuto </a></p> <p class="tem_fich">1a Temporada - Cap 13</p> </li> ''' patronvideos = '<li>[^<]+' patronvideos += '<a href="([^"]+)"><img src="([^"]+)"[^>]*></a>[^<]+' patronvideos += '<div class="icos_lg">(.*?)</div>[^<]+' patronvideos += '<p class="tit_ficha">(.*?)</p>[^<]+' patronvideos += '(?:<p class="tem_fich">([^<]+)</p>)?' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = match[3].strip() if len(match)>=5: scrapedtitle = scrapedtitle+" "+match[4] matchesconectores = re.compile('<img.*?alt="([^"]*)"',re.DOTALL).findall(match[2]) conectores = "" for matchconector in matchesconectores: logger.info("matchconector="+matchconector) if matchconector=="": matchconector = "megavideo" conectores = conectores + matchconector + "/" if len(matchesconectores)>0: scrapedtitle = scrapedtitle + " (" + conectores[:-1] + ")" scrapedtitle = scrapedtitle.replace("megavideo/megavideo","megavideo") scrapedtitle = scrapedtitle.replace("megavideo/megavideo","megavideo") scrapedtitle = scrapedtitle.replace("megavideo/megavideo","megavideo") scrapedtitle = scrapedtitle.replace("descarga directa","DD") scrapedtitle = scrapertools.htmlclean(scrapedtitle) scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedplot = "" scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = match[1] if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=CHANNELNAME, action="temporadas", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) # Paginador #<li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(item.url,matches[0]) itemlist.append( Item(channel=CHANNELNAME, action="series", title="!Página siguiente" , url=scrapedurl , folder=True) ) return itemlist
def listvideos(params,url,category): logger.info("[peliculas21.py] listvideos") if url=="": url = "http://www.peliculas21.com" # Descarga la página data = scrapertools.cachePage(url) #logger.info(data) #title = urllib.unquote_plus(params.get("title")) #thumbnail = urllib.unquote_plus(params.get("thumbnail")) #plot = urllib.unquote_plus(params.get("plot")) # Busca el area donde estan los videos y la descripcion patronvideos = '<div class="peliculadoblaje">(.*?)<!-- FIN #content-->' matches = re.compile(patronvideos,re.DOTALL).findall(data) # busca el titulo y el thumbnail patronvideos = '<img src="([^"]+)"[^>]+>[^<]+<[^>]+>([^<]+)</div>' matches2 = re.compile(patronvideos,re.DOTALL).findall(matches[0]) for match in matches2: title = match[1] thumbnail = urlparse.urljoin(url,match[0]) plot = "" patronvideos = '<b>Duración:</b>(.*?)<br />' duracion = re.compile(patronvideos,re.DOTALL).findall(matches[0]) if len(duracion)>0:plot = "Duracion:"+duracion[0] + "\n" patronvideos = '<b>Género:</b>(.*?)<br />' genero = re.compile(patronvideos,re.DOTALL).findall(matches[0]) if len(genero)>0:plot = plot + "Genero: "+genero[0] +"\n" patronvideos = '<b>Sinopsis:</b>(.*?)</div>' sinopsis = re.compile(patronvideos,re.DOTALL).findall(matches[0]) # Busca los actores matchesactores = buscactores(matches[0]) if len(matchesactores)>0: plot = plot + "Actores: " c = 0 actores = "ACTORES DE ESTA PELICULA :\n\n" for match in matchesactores: c = c + 1 actores = actores + "-"+match[1] + "\n" if c == 3: plot = plot + match[1] + "\n" elif c == 4: plot = plot + "* " + match[1]+" " else: plot = plot + match[1]+ " , " plot = plot + "\nSinopsis: " + sinopsis[0] plot = re.sub("<[^>]+>"," ",plot) # Busca el trailer patronvideos = '<param name="movie" value="([^"]+)"></param>' matchtrailer = re.compile(patronvideos,re.DOTALL).findall(matches[0]) if len(matchtrailer)>0: for match in matchtrailer: # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELNAME , "youtubeplay" , category ,"Directo", "Ver El Trailer de : "+title , match , thumbnail, plot ) else: import trailertools # Añade al listado de XBMC xbmctools.addnewfolder( "trailertools" , "buscartrailer" , category , "Buscar trailer para : "+title , url , os.path.join(IMAGES_PATH, 'trailertools.png'), plot ) matchesBK = matches[0] # Extrae las entradas (videos) para megavideo con tipo de audio patronvideos = '<span style="font-size:12px;"><strong>(.*?)</strong></span><br/>.*?' patronvideos += '<span.*?>.*?<a href="http\:\/\/www.megavideo.com\/[\?v=|v/]+([A-Z0-9]{8}).*?" target="_blank">1</a>.</span><br />' matches = re.compile(patronvideos,re.DOTALL).findall(matches[0]) scrapertools.printMatches(matches) encontrados = set() for match in matches: if match[1] not in encontrados: encontrados.add(match[1]) # Titulo scrapedtitle = title + " - [" +scrapertools.entityunescape(match[0])+ "]" + " (Megavideo)" # URL scrapedurl = match[1] # Thumbnail scrapedthumbnail = thumbnail # Argumento scrapedplot = plot # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELNAME , "play" , category ,"Megavideo", scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) if len(matches)==0: listavideos = servertools.findvideos(data) encontrados = set() for titulo,scrapedurl,servidor in listavideos: if scrapedurl.strip() not in encontrados: encontrados.add(scrapedurl.strip()) xbmctools.addnewvideo( CHANNELNAME , "play" , category ,servidor, title+ " - %s" % titulo , scrapedurl , thumbnail, plot ) # Extrae las entradas (videos) directos patronvideos = 'flashvars="file=([^\&]+)\&controlbar=over' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: data1 = scrapertools.cachePage(matches[0]) #logger.info(data) patron = 'author">(.*?)</media:credit>.*?<media\:content url="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data1) scrapertools.printMatches(matches) for match in matches: # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELNAME , "play" , category , "Directo" , title +" - ["+match[0]+"]"+ " (Directo)" , match[1] , thumbnail , plot ) # Busca otras peliculas relacionadas con los actores if len(matchesactores)>0: titulo = "Busca otros Films de los actores de esta pelicula" xbmctools.addnewfolder( CHANNELNAME , "listaractores" , category , titulo , matchesBK , thumbnail, actores ) # Lista peliculas relacionadas titulo = "Ver Peliculas Relacionadas" matches = buscarelacionados(matchesBK) plot2 = "PELICULAS RELACIONADAS :\n\n" for match in matches: plot2 = plot2 + "-"+match[1]+"\n" xbmctools.addnewfolder( CHANNELNAME , "listarelacionados" , category , titulo , matchesBK , thumbnail, plot2 ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) # Disable sorting... xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def ListarVideos(params,url,category): url1 = "http://www.series21.com" url1 = urlparse.urljoin(url1,url) title = urllib.unquote_plus(params.get("title")) thumbnail = urllib.unquote_plus(params.get("thumbnail")) plot = urllib.unquote_plus(params.get("plot")) data = scrapertools.cachePage(url1) # Busca el area donde estan los videos y la descripcion patronvideos = '<div id="content">(.*?)<!-- FIN #content-->' matches = re.compile(patronvideos,re.DOTALL).findall(data) matchesBK = matches[0] # Extrae las entradas (videos) para megavideo patronvideos = '<span style="font-size:12px;"><strong>(.*?)</strong></span><br/>.*?' patronvideos += '<span.*?>.*?<a href="http\:\/\/www.megavideo.com\/([\?v=|v/|\?d=]+)([A-Z0-9]{8}).*?" target' matches = re.compile(patronvideos,re.DOTALL).findall(matches[0]) scrapertools.printMatches(matches) encontrados = set() for match in matches: if match[2] not in encontrados: encontrados.add(match[2]) if 'v' in match[1]: server = "Megavideo" else: server = "Megaupload" doblaje = scrapertools.entityunescape(match[0]) # Titulo scrapedtitle = title + " - [" +doblaje+ "]" + " ("+server+")" # URL scrapedurl = match[2] # Thumbnail scrapedthumbnail = thumbnail # Argumento #print 'este es el plot %s ' %plot #print ' doblaje %s ' %doblaje scrapedplot = plot if ("Español" in plot) and not (doblaje in plot): scrapedplot = scrapedplot.replace("Español",doblaje) elif "subtitulado" in plot and not (doblaje in plot): scrapedplot = scrapedplot.replace("Versión original (subtitulado)",doblaje) elif not doblaje in plot: scrapedplot += "\n" + "Doblaje : " + doblaje # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELNAME , "play" , category ,server, scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) if len(matches)==0: listavideos = servertools.findvideos(data) encontrados = set() for titulo,scrapedurl,servidor in listavideos: if scrapedurl.strip() not in encontrados: encontrados.add(scrapedurl.strip()) xbmctools.addnewvideo( CHANNELNAME , "play" , category ,servidor, title+ " - %s" %titulo , scrapedurl , thumbnail, plot ) patronvideos = '<span class="bloque-doblaje">(.+?)</span>[^<]+' patronvideos +='<span class="bloque-link">[^<]+<a href="javascript\:goTo\(\'([^\']+)\'\, \'([^\']+)\'\)"(.+?)</span>' #patronvideos +='(?:\| <a href="javascript\:goTo\(\'([^\']+)\'\, \'([^\']+)\'\)".*?)</span>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # URL if "megavideo" in match[2]: server = "Megavideo" elif "megaupload" in match[2]: server = "Megaupload" if "esp.gif" in match[0]: doblaje = "Español" else: doblaje = match[0].strip() base64 = decrypt21.Base64() try: url2 = re.compile("javascript\:goTo\(\'([^\']+)\'\, \'([^\']+)\'\)").findall(match[3])[0] scrapedurl2 = base64._extract_code(base64.decode(url2[0])) scrapedurl = base64._extract_code(base64.decode(match[1])) part1 = " Parte 1 " part2 = " Parte 2 " scrapedtitle2 = title + part2+ " - [" +doblaje+ "]" + " ("+server+")" #print match[3] except: scrapedurl = base64._extract_code(base64.decode(match[1])) part1 = "" part2 = "" scrapedtitle = title + part1+ " - [" +doblaje+ "]" + " ("+server+")" # Thumbnail scrapedthumbnail = thumbnail # Argumento scrapedplot = plot # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELNAME , "play" , category ,server, scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) if part2: xbmctools.addnewvideo( CHANNELNAME , "play" , category ,server, scrapedtitle2 , scrapedurl2 , scrapedthumbnail, scrapedplot ) # Extrae las entradas (videos) directos patronvideos = 'flashvars="file=([^\&]+)\&controlbar=over' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: data1 = scrapertools.cachePage(matches[0]) #logger.info(data) patron = 'author">(.*?)</media:credit>.*?<media\:content url="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data1) scrapertools.printMatches(matches) for match in matches: # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELNAME , "play" , category , "Directo" , title +" - ["+match[0]+"]"+ " (Directo)" , match[1] , thumbnail , plot ) # Busca el Spoiler patronvideos = '(http://www.youtube.com[^"]+)"' matchSpoiler = re.compile(patronvideos,re.DOTALL).findall(data) if len(matchSpoiler)>0: encontrados = set() for match in matchSpoiler: if match not in encontrados: encontrados.add(match) # Añade al listado de XBMC xbmctools.addnewvideo( CHANNELNAME , "youtubeplay" , category ,"Directo", "Ver El Spoiler de : "+title , match , thumbnail, "Ver Video Spoiler" ) # Lista series relacionadas titulo = "Ver otros capitulos de esta temporada" matches = buscarelacionados(matchesBK) plot2 = "CAPITULOS DE ESTA TEMPORADA :\n\n" for match in matches: plot2 = plot2 + "-"+match[2]+"\n" xbmctools.addnewfolderextra( CHANNELNAME , "listarelacionados" , category , titulo , url , thumbnail, plot2,matchesBK ) #<div class="film"><a href="/house/#t_57"><img src="/thumbs/temporadas/95/120/57.jpg" # Cambiar de Temporada patron = 'div class="film"><a href="([^"]+)"><img src="([^"]+)" style' matchSerie= re.compile(patron,re.DOTALL).findall(matchesBK) if len(matchSerie)>1: for temp in matchSerie: url2 = urlparse.urljoin(url1,temp[0]) thumbnail = urlparse.urljoin(url1,temp[1]) titulo = "Cambiar a otras temporadas" titulo_serie = temp[0].split("/") titulo2 = titulo_serie[1].replace("-"," ") #print ' titulo%s ' %titulo2 xbmctools.addnewfolderextra( CHANNELNAME , "listarTemporada" , category , titulo , url2 , thumbnail, plot,titulo2 ) break # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) # Disable sorting... xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def peliculas(item): logger.info("[cinetube.py] peliculas") url = item.url # Descarga la página data = scrapertools.cachePage(url) # Extrae las entradas ''' <!--PELICULA--> <div class="peli_item textcenter"> <div class="pelicula_img"><a href='/peliculas/thriller/ver-pelicula-un-segundo-despues-2.html' > <img src="http://caratulas.cinetube.es/pelis/7058.jpg" alt="Un segundo después 2" /></a> </div><a href="/peliculas/thriller/ver-pelicula-un-segundo-despues-2.html" ><div class="dvdrip"></div></a><a href='/peliculas/thriller/ver-pelicula-un-segundo-despues-2.html' ><p class="white">Un segundo después 2</p></a><p><span class="rosa">DVD-RIP</span></p><div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="espanol" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> </div> <!--FIN PELICULA--> ''' # listado alfabetico ''' <!--PELICULA--> <div class="peli_item textcenter"> <div class="pelicula_img"><a href="/peliculas/musical/ver-pelicula-a-chorus-line.html"> <img src="http://caratulas.cinetube.es/pelis/246.jpg" alt="A Chorus Line" /></a> </div> <a href="/peliculas/musical/ver-pelicula-a-chorus-line.html"><p class="white">A Chorus Line</p></a> <p><span class="rosa">DVD-RIP</span></p><div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="espanol" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> </div> <!--FIN PELICULA--> ''' ''' <!--PELICULA--> <div class="peli_item textcenter"><a href="/documentales/otros/ver-documental-tu-cerebro-inmortal.html"> <div class="pelicula_img"> <img src="http://caratulas.cinetube.es/docus/9570.jpg" alt="Tu Cerebro Inmortal" /> </div> <p class="white"><a class="white" href="/documentales/otros/ver-documental-tu-cerebro-inmortal.html" title="Ver documental Tu Cerebro Inmortal">Tu Cerebro Inmortal</a></p></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> </div> <!--FIN PELICULA--> ''' patronvideos = '<!--PELICULA-->[^<]+' patronvideos += '<div class="peli_item textcenter">[^<]+' patronvideos += '<div class="pelicula_img"><a[^<]+' patronvideos += '<img src=["|\']([^"]+?)["|\'][^<]+</a>[^<]+' patronvideos += '</div[^<]+<a href=["|\']([^"]+?)["|\'].*?<p class="white">([^<]+)</p>.*?<p><span class="rosa">([^>]+)</span></p><div class="icos_lg">(.*?)</div>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: scrapedtitle = match[2] + " [" + match[3] + "]" matchesconectores = re.compile('<img.*?alt="([^"]*)"', re.DOTALL).findall(match[4]) conectores = "" for matchconector in matchesconectores: logger.info("matchconector=" + matchconector) if matchconector == "": matchconector = "megavideo" conectores = conectores + matchconector + "/" if len(matchesconectores) > 0: scrapedtitle = scrapedtitle + " (" + conectores[:-1] + ")" scrapedtitle = scrapedtitle.replace("megavideo/megavideo", "megavideo") scrapedtitle = scrapedtitle.replace("megavideo/megavideo", "megavideo") scrapedtitle = scrapedtitle.replace("megavideo/megavideo", "megavideo") scrapedtitle = scrapedtitle.replace("descarga directa", "DD") # Convierte desde UTF-8 y quita entidades HTML scrapedtitle = scrapertools.entityunescape(scrapedtitle) # procesa el resto scrapedplot = "" scrapedurl = urlparse.urljoin("http://www.cinetube.es/", match[1]) scrapedthumbnail = match[0] if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) # Extrae el paginador #<li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedurl = urlparse.urljoin(url, matches[0]) itemlist.append( Item(channel=CHANNELNAME, action="peliculas", title="!Página siguiente", url=scrapedurl, folder=True)) return itemlist
def listtemporadacaratula(params,url,category): xbmc.output("[cinetube.py] listtemporadacaratula") # ------------------------------------------------------ # Descarga la página # ------------------------------------------------------ data = scrapertools.cachePage(url) #xbmc.output(data) # ------------------------------------------------------ # Extrae las entradas # ------------------------------------------------------ ''' <li> <a href="/series/the-middleman/temporada-1/"><img src="http://caratulas.cinetube.es/series/233.jpg" alt="peli" /></a> <p><span class="rosa"></span></p><div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/sub.png" alt="sub" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /> </div> <p class="tit_ficha">The middleman </p> <p class="tem_fich">1a Temporada</p> </li> ''' ''' <li> <a href="/series/cranford-de-e-gaskell/"><img src="http://caratulas.cinetube.es/series/64.jpg" alt="peli" /></a> <p><span class="rosa"></span></p><div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/sub.png" alt="sub" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> <p class="tit_ficha">Cranford, de E. Gaskell </p> </li> ''' patronvideos = '<li>[^<]+' patronvideos += '<a href="([^"]+)"><img src="([^"]+)"[^>]+></a>[^<]+' patronvideos += '<p><span class="rosa"></span></p><div class="icos_lg">(.*?)</div>[^<]+' patronvideos += '<p class="tit_ficha">([^<]+)</p>[^<]+' patronvideos += '<p class="tem_fich">([^<]+)</p>[^<]+' patronvideos += '</li>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = match[3].strip() + " - " + match[4].strip() matchesconectores = re.compile('<img.*?alt="([^"]*)"',re.DOTALL).findall(match[2]) conectores = "" for matchconector in matchesconectores: xbmc.output("matchconector="+matchconector) if matchconector=="": matchconector = "megavideo" conectores = conectores + matchconector + "/" if len(matchesconectores)>0: scrapedtitle = scrapedtitle + " (" + conectores[:-1] + ")" # Convierte desde UTF-8 y quita entidades HTML try: scrapedtitle = unicode( scrapedtitle, "utf-8" ).encode("iso-8859-1") except: pass scrapedtitle = scrapertools.entityunescape(scrapedtitle) # procesa el resto scrapedplot = "" scrapedurl = urlparse.urljoin(url,match[0]) scrapedthumbnail = match[1] if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELNAME , "listmirrors" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) # ------------------------------------------------------ # Extrae el paginador # ------------------------------------------------------ #<li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: xbmctools.addnewfolder( CHANNELNAME , "listtemporadacaratula" , category , "!Página siguiente" , urlparse.urljoin(url,matches[0]) , "", "" ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )
def series(item): logger.info("[cinetube.py] series") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) logger.info("Pagina de %d caracteres" % len(data)) # Extrae las entradas ''' <li> <a href="/series/en-tierra-de-lobos/temporada-1/capitulo-12/"><img src="http://caratulas.cinetube.es/series/8912.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="espanol" /> <img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="megavideo.png" /> <img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> <p><span class="rosa"></span></p></div> <p class="tit_ficha"><a class="tit_ficha" title="Ver serie Tierra de lobos" href="/series/en-tierra-de-lobos/temporada-1/capitulo-12/">Tierra de lobos </a></p> <p class="tem_fich">1a Temporada - Cap 12</p> </li> ''' ''' <li> <a href="/series/gabriel-un-amor-inmortal/"><img src="http://caratulas.cinetube.es/series/7952.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/latino.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> <p class="tit_ficha">Gabriel, un amor inmortal </p> </li> ''' ''' <li> <a href="/series-anime/star-driver-kagayaki-no-takuto/temporada-1/capitulo-13/"><img src="http://caratulas.cinetube.es/seriesa/9009.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/sub.png" alt="sub" /> <img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="megavideo.png" /> <img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> <p><span class="rosa"></span></p></div> <p class="tit_ficha"><a class="tit_ficha" title="Ver serie Star Driver Kagayaki no Takuto" href="/series-anime/star-driver-kagayaki-no-takuto/temporada-1/capitulo-13/">Star Driver Kagayaki no Takuto </a></p> <p class="tem_fich">1a Temporada - Cap 13</p> </li> ''' patronvideos = '<li>[^<]+' patronvideos += '<a href="([^"]+)"><img src="([^"]+)"[^>]*></a>[^<]+' patronvideos += '<div class="icos_lg">(.*?)</div>[^<]+' patronvideos += '<p class="tit_ficha">(.*?)</p>[^<]+' patronvideos += '(?:<p class="tem_fich">([^<]+)</p>)?' matches = re.compile(patronvideos, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = match[3].strip() if len(match) >= 5: scrapedtitle = scrapedtitle + " " + match[4] matchesconectores = re.compile('<img.*?alt="([^"]*)"', re.DOTALL).findall(match[2]) conectores = "" for matchconector in matchesconectores: logger.info("matchconector=" + matchconector) if matchconector == "": matchconector = "megavideo" conectores = conectores + matchconector + "/" if len(matchesconectores) > 0: scrapedtitle = scrapedtitle + " (" + conectores[:-1] + ")" scrapedtitle = scrapedtitle.replace("megavideo/megavideo", "megavideo") scrapedtitle = scrapedtitle.replace("megavideo/megavideo", "megavideo") scrapedtitle = scrapedtitle.replace("megavideo/megavideo", "megavideo") scrapedtitle = scrapedtitle.replace("descarga directa", "DD") scrapedtitle = scrapertools.htmlclean(scrapedtitle) scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedplot = "" scrapedurl = urlparse.urljoin(item.url, match[0]) scrapedthumbnail = match[1] if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=CHANNELNAME, action="temporadas", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) # Paginador #<li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item(channel=CHANNELNAME, action="series", title="!Página siguiente", url=scrapedurl, folder=True)) return itemlist
def peliculas(item): logger.info("[cinetube.py] peliculas") url = item.url # Descarga la página data = scrapertools.cachePage(url) # Extrae las entradas ''' <!--PELICULA--> <div class="peli_item textcenter"> <div class="pelicula_img"><a href='/peliculas/thriller/ver-pelicula-un-segundo-despues-2.html' > <img src="http://caratulas.cinetube.es/pelis/7058.jpg" alt="Un segundo después 2" /></a> </div><a href="/peliculas/thriller/ver-pelicula-un-segundo-despues-2.html" ><div class="dvdrip"></div></a><a href='/peliculas/thriller/ver-pelicula-un-segundo-despues-2.html' ><p class="white">Un segundo después 2</p></a><p><span class="rosa">DVD-RIP</span></p><div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="espanol" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> </div> <!--FIN PELICULA--> ''' # listado alfabetico ''' <!--PELICULA--> <div class="peli_item textcenter"> <div class="pelicula_img"><a href="/peliculas/musical/ver-pelicula-a-chorus-line.html"> <img src="http://caratulas.cinetube.es/pelis/246.jpg" alt="A Chorus Line" /></a> </div> <a href="/peliculas/musical/ver-pelicula-a-chorus-line.html"><p class="white">A Chorus Line</p></a> <p><span class="rosa">DVD-RIP</span></p><div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="espanol" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> </div> <!--FIN PELICULA--> ''' ''' <!--PELICULA--> <div class="peli_item textcenter"><a href="/documentales/otros/ver-documental-tu-cerebro-inmortal.html"> <div class="pelicula_img"> <img src="http://caratulas.cinetube.es/docus/9570.jpg" alt="Tu Cerebro Inmortal" /> </div> <p class="white"><a class="white" href="/documentales/otros/ver-documental-tu-cerebro-inmortal.html" title="Ver documental Tu Cerebro Inmortal">Tu Cerebro Inmortal</a></p></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> </div> <!--FIN PELICULA--> ''' patronvideos = '<!--PELICULA-->[^<]+' patronvideos += '<div class="peli_item textcenter">[^<]+' patronvideos += '<div class="pelicula_img"><a[^<]+' patronvideos += '<img src=["|\']([^"]+?)["|\'][^<]+</a>[^<]+' patronvideos += '</div[^<]+<a href=["|\']([^"]+?)["|\'].*?<p class="white">([^<]+)</p>.*?<p><span class="rosa">([^>]+)</span></p><div class="icos_lg">(.*?)</div>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: scrapedtitle = match[2] + " [" + match[3] + "]" matchesconectores = re.compile('<img.*?alt="([^"]*)"',re.DOTALL).findall(match[4]) conectores = "" for matchconector in matchesconectores: logger.info("matchconector="+matchconector) if matchconector=="": matchconector = "megavideo" conectores = conectores + matchconector + "/" if len(matchesconectores)>0: scrapedtitle = scrapedtitle + " (" + conectores[:-1] + ")" scrapedtitle = scrapedtitle.replace("megavideo/megavideo","megavideo") scrapedtitle = scrapedtitle.replace("megavideo/megavideo","megavideo") scrapedtitle = scrapedtitle.replace("megavideo/megavideo","megavideo") scrapedtitle = scrapedtitle.replace("descarga directa","DD") # Convierte desde UTF-8 y quita entidades HTML scrapedtitle = scrapertools.entityunescape(scrapedtitle) # procesa el resto scrapedplot = "" scrapedurl = urlparse.urljoin("http://www.cinetube.es/",match[1]) scrapedthumbnail = match[0] if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, action="findvideos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) # Extrae el paginador #<li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(url,matches[0]) itemlist.append( Item(channel=CHANNELNAME, action="peliculas", title="!Página siguiente" , url=scrapedurl , folder=True) ) return itemlist
def listpeliconcaratula(params,url,category): xbmc.output("[cinetube.py] listpeliconcaratula") # ------------------------------------------------------ # Descarga la página # ------------------------------------------------------ data = scrapertools.cachePage(url) #xbmc.output(data) # ------------------------------------------------------ # Extrae las entradas # ------------------------------------------------------ ''' <!--PELICULA--> <div class="peli_item textcenter" style="cursor:pointer;" onclick="location.href='/peliculas/intriga/ver-pelicula-a-espaldas-de-la-ley.html'"> <div class="pelicula_img"> <img src="http://caratulas.cinetube.es/pelis/251.jpg" alt="A espaldas de la ley" /> </div> <p class="white">A espaldas de la ley</p> <p><span class="rosa">DVD-RIP</span></p><div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="espanol" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> </div> ''' ''' DOCUMENTAL <!--PELICULA--> <div class="peli_item textcenter" style="cursor:pointer;" onclick="location.href='/documentales/serie-documental/ver-documental-planeta-azul.html'"> <div class="pelicula_img"> <img src="http://caratulas.cinetube.es/docus/366.jpg" alt="Planeta Azul" /> </div> <p class="white">Planeta Azul</p> <p><span class="rosa"></span></p><div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="espanol" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> </div> ''' patronvideos = '<!--PELICULA-->[^<]+' patronvideos += '<div class="peli_item textcenter" style="cursor:pointer;" onclick="location.href=\'/([^\']+)\'">[^<]+' patronvideos += '<div class="pelicula_img">[^<]+' patronvideos += '<img src="([^"]+)" alt="[^"]+" />[^<]+' patronvideos += '</div>[^<]*<p class="white">([^<]+)</p>[^<]*<p><span class="rosa">([^<]+)</span></p>' patronvideos += '<div class="icos_lg">(.*?)</div>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = match[2] + " [" + match[3] + "]" matchesconectores = re.compile('<img.*?alt="([^"]*)"',re.DOTALL).findall(match[4]) conectores = "" for matchconector in matchesconectores: xbmc.output("matchconector="+matchconector) if matchconector=="": matchconector = "megavideo" conectores = conectores + matchconector + "/" if len(matchesconectores)>0: scrapedtitle = scrapedtitle + " (" + conectores[:-1] + ")" # Convierte desde UTF-8 y quita entidades HTML try: scrapedtitle = unicode( scrapedtitle, "utf-8" ).encode("iso-8859-1") except: pass scrapedtitle = scrapertools.entityunescape(scrapedtitle) # procesa el resto scrapedplot = "" scrapedurl = urlparse.urljoin("http://www.cinetube.es/",match[0]) scrapedthumbnail = match[1] if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELNAME , "listmirrors" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) # ------------------------------------------------------ # Extrae el paginador # ------------------------------------------------------ #<li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: xbmctools.addnewfolder( CHANNELNAME , "listpeliconcaratula" , category , "!Página siguiente" , urlparse.urljoin(url,matches[0]) , "", "" ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )
def videolist(params,url,category): xbmc.output("[skai_folders.py] videolist") # -------------------------------------------------------- # DDownload page # -------------------------------------------------------- data = scrapertools.cachePage(url) # Extrae los vídeos patron = '<p><a href=\'(.*?)\' title=.*?<img alt="([^<]+)" src="(.*?)" /></a></p>.*?<dd> <p>(.*?)</dd>' pattern = '<meta name="description" content="(.*?)">' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[1] scrapedurl = urlparse.urljoin(url,unescape(match[0])) scrapedthumbnail = urlparse.urljoin(url,match[2]) data1 = scrapertools.cachePage(scrapedurl) matches1 = re.compile(pattern,re.DOTALL).findall(data1) if len(matches1)>0: match1 = matches1[0] scrapedplot = scrapertools.entityunescape(match1) scrapedplot = unescape(HTMLParser.HTMLParser().unescape(scrapedplot)) if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Add to the list of XBMC xbmctools.addnewvideo( CHANNELCODE , "play" , CHANNELNAME , "" , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) # Next Page pattern = 'class="next_page" rel="next" href="(.*?)">'+("Next","Επόμενη",)[int(config.getSetting("locale"))]+'</a></p>' matches = re.compile(pattern,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) if len(matches)>0: match = matches[0] scrapedtitle = ">>> Next Page >>>" scrapedurl = urlparse.urljoin(url,match) scrapedthumbnail = "" scrapedplot = "" if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Add to the list of XBMC #xbmctools.addnewfolder( CHANNELCODE , "videolist" , CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail , scrapedplot ) ### recursion for flattening the pages ### videolist(params,scrapedurl,category) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) # Disable sorting... xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def generico(params,url,category): xbmc.output("[rtveprogramas.py] generico") # El parametro allowblanks permite que haya vídeos sin título allowblanktitles = False if category=="allowblanktitles": allowblanktitles = True category = CHANNELNAME # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- data = scrapertools.cachePage(url) #xbmc.output(data) # -------------------------------------------------------- # Extrae las categorias (carpetas) # -------------------------------------------------------- patron = '<div class="news[^"]+">(.*?</div>)' bloques = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(bloques) for bloque in bloques: ''' ############################################################################################################## <span class="imgL"><a href="/mediateca/videos/20100225/aguila-roja-cap21/705225.shtml" title=""><img src="/imagenes/jpg/1267703487420.jpg" alt="" title=""/></a></span> <h3 class="M "> <a href="/mediateca/videos/20100225/aguila-roja-cap21/705225.shtml" title="Capítulo 21">Capítulo 21</a> </h3> <div class="chapeaux">Emitido el 25/02/10</div> ############################################################################################################## <span class="imgL"><a href="/mediateca/videos/20100218/aguila-roja-cap20/698541.shtml" title="Capítulo 20"><img src="/imagenes/capitulo-20/1267703445964.jpg" alt="Capítulo 20" title="Capítulo 20"/></a></span> <h3 class="M "> <a href="/mediateca/videos/20100218/aguila-roja-cap20/698541.shtml" title="Capítulo 20">Capítulo 20</a> </h3> <div class="chapeaux">Emitido el 18/02/10</div> ############################################################################################################## ''' scrapedtitle = "" scrapedurl = "" scrapedthumbnail = "" scrapedplot = "" # Enlace a la página y título patron = '<a href="([^"]+)"[^>]+>([^<]+)<' matches = re.compile(patron,re.DOTALL).findall(bloque) if DEBUG: scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(url, matches[0][0]) scrapedtitle = scrapertools.entityunescape(matches[0][1]) # Si no tiene titulo busca el primer enlace que haya if scrapedurl=="": # Enlace a la página y título patron = '<a href="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(bloque) if DEBUG: scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(url, matches[0]) # Thumbnail patron = '<img src="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(bloque) if DEBUG: scrapertools.printMatches(matches) if len(matches)>0: scrapedthumbnail = urlparse.urljoin(url, matches[0]) # Argumento patron = '<div class="chapeaux">(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(bloque) if DEBUG: scrapertools.printMatches(matches) if len(matches)>0: scrapedplot = scrapertools.htmlclean(matches[0]) if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") if allowblanktitles: titulos = scrapedurl.split("/") scrapedtitle = titulos[ len(titulos)-2 ] # Añade al listado de XBMC if scrapedtitle<>"" and scrapedurl<>"": xbmctools.addnewvideo( "rtve" , "play" , category , "Directo" , scrapedtitle , scrapedurl , scrapedthumbnail , scrapedplot ) # Cierra el directorio xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )