def listmirrors(item): logger.info("[yotix.py] listmirrors") title = item.title url = item.url thumbnail = item.thumbnail plot = item.plot itemlist = [] # Descarga la página de detalle data = scrapertools.cachePage(url) #logger.info(data) # Extrae el argumento patronvideos = '<div class="texto-sinopsis">(.*?)<div' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: plot = scrapertools.htmlclean(matches[0].strip()) # Extrae los enlaces si el video está en la misma página patron = 'so.addParam\(\'flashvars\',\'.*?file\=([^\&]+)\&' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: url = matches[0] newurl = findnewlocation(url) if newurl!="": url = newurl itemlist.append( Item(channel=CHANNELNAME, action="play" , title=title , url=url, thumbnail=thumbnail, plot=plot, server="Directo", folder=False)) # Extrae los enlaces a los vídeos (Megavídeo) patronvideos = '<a.*?href="(http://yotix.tv/flash/[^"]+)"[^>]*>(.*?)</a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Añade al listado de XBMC scrapedtitle = scrapertools.htmlclean(match[1].replace("–","-")).strip() scrapedurl = match[0] itemlist.append( Item(channel=CHANNELNAME, action="play" , title=title , url=url, thumbnail=thumbnail, plot=plot, server="Megavideo", folder=False)) # Extrae los enlaces a los vídeos (Directo) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/sitio/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/media/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/video/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/ver/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/rt/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/anime/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/gb/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/online/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) buscamirrors(itemlist,'<a.*?href="(http://yotix.tv/4s/[^"]+)"[^>]*>(.*?)</a>',data,thumbnail,plot) return itemlist
def episodios(params,url,category): xbmc.output("[boing.py] videolist") # El título es el de la serie title = urllib.unquote_plus( params.get("title") ) # Descarga la página data = scrapertools.cachePage("http://www.boing.es/videos/FFFFFF.xml") #xbmc.output(data) ''' <video id="ben10af_ep2_01" series="ben10af" extras="" novedad="0"> <titulo>Episodio 2 (parte 1)</titulo> <imagen>/videos/1clips/ben10af/ep/ep.jpg</imagen> <url>http://ht.cdn.turner.com/tbseurope/big/toones/protected_auth/b10af/Ben10_Ep02_Sg01.flv</url> <stats>http://www.boing.es/videos/stats/episodios_ben10af.html</stats> <descripcion><![CDATA[<a href='/microsites/ben10alienforce/index.jsp' target='_self'><font color='#FF0000'>Pincha</font></a> para visitar la página de Ben 10 Alien Force<br/>Episodios solamente disponibles en España]]></descripcion> </video> ''' # Extrae las entradas (videos, el parámetro url es el id de la serie) patronvideos = '<video id="[^"]+" series="'+url+'"[^>]+>[^<]+' patronvideos += '<titulo>([^<]+)</titulo>[^<]+' patronvideos += '<imagen>([^<]+)</imagen>[^<]+' patronvideos += '<url>([^<]+)</url>[^<]+' patronvideos += '<stats>[^<]+</stats>[^<]+' patronvideos += '<descripcion>(.*?)</descripcion>[^<]+' patronvideos += '</video>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo try: scrapedtitle = unicode( match[0], "utf-8" ).encode("iso-8859-1") except: scrapedtitle = match[0] scrapedurl = urlparse.urljoin(url,match[2]) scrapedthumbnail = urlparse.urljoin(url,match[1]) try: scrapedplot = scrapertools.htmlclean(unicode( match[3], "utf-8" ).encode("iso-8859-1")) except: scrapedplot = scrapertools.htmlclean(match[3]) if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") xbmctools.addnewvideo( CHANNELCODE , "play" , category , "Directo" , title + " - " + scrapedtitle , scrapedurl , scrapedthumbnail , scrapedplot ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_LABEL ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def listmirrors(params,url,category): logger.info("[yotix.py] listmirrors") title = urllib.unquote_plus( params.get("title") ) thumbnail = urllib.unquote_plus( params.get("thumbnail") ) plot = urllib.unquote_plus( params.get("plot") ) # Descarga la página de detalle data = scrapertools.cachePage(url) #logger.info(data) # Extrae el argumento patronvideos = '<div class="texto-sinopsis">(.*?)<div' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: plot = scrapertools.htmlclean(matches[0].strip()) # Extrae los enlaces si el video está en la misma página patron = 'so.addParam\(\'flashvars\',\'.*?file\=([^\&]+)\&' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: url = matches[0] xbmctools.addnewvideo( CHANNELNAME , "play2" , category , "Directo" , title , url , thumbnail , plot ) # Extrae los enlaces a los vídeos (Megavídeo) patronvideos = '<a.*?href="(http://yotix.tv/flash/[^"]+)"[^>]*>(.*?)</a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Añade al listado de XBMC scrapedtitle = scrapertools.htmlclean(match[1].replace("–","-")).strip() scrapedurl = match[0] xbmctools.addnewvideo( CHANNELNAME , "play" , category , "Megavideo" , scrapedtitle , scrapedurl , thumbnail , plot ) # Extrae los enlaces a los vídeos (Directo) extraevideos('<a.*?href="(http://yotix.tv/sitio/[^"]+)"[^>]*>(.*?)</a>',data,category,thumbnail,plot) extraevideos('<a.*?href="(http://yotix.tv/media/[^"]+)"[^>]*>(.*?)</a>',data,category,thumbnail,plot) extraevideos('<a.*?href="(http://yotix.tv/video/[^"]+)"[^>]*>(.*?)</a>',data,category,thumbnail,plot) extraevideos('<a.*?href="(http://yotix.tv/ver/[^"]+)"[^>]*>(.*?)</a>',data,category,thumbnail,plot) extraevideos('<a.*?href="(http://yotix.tv/rt/[^"]+)"[^>]*>(.*?)</a>',data,category,thumbnail,plot) extraevideos('<a.*?href="(http://yotix.tv/anime/[^"]+)"[^>]*>(.*?)</a>',data,category,thumbnail,plot) extraevideos('<a.*?href="(http://yotix.tv/gb/[^"]+)"[^>]*>(.*?)</a>',data,category,thumbnail,plot) extraevideos('<a.*?href="(http://yotix.tv/online/[^"]+)"[^>]*>(.*?)</a>',data,category,thumbnail,plot) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def novedades(params,url,category): xbmc.output("[boing.py] videolist") # Descarga la página data = scrapertools.cachePage(url) #xbmc.output(data) ''' <video id="naruto_promo" extras="naruto" novedad="1" promo=""> <titulo>Naruto en Boing FDF</titulo> <imagen>/videos/extras/naruto/thumb.jpg</imagen> <url>http://ht.cdn.turner.com/tbseurope/big/toones/naruto/PromoNaruto.flv</url> <stats>http://www.boing.es/videos/stats/clips.html</stats> <descripcion><![CDATA[<a href='/microsites/naruto/index.jsp' target='_self'><font color='#FF0000'>Pincha</font></a> para visitar la página de Naruto para Juegos y Descargas]]></descripcion> </video> ''' # Extrae las entradas (videos, el parámetro url es el id de la serie) patronvideos = '<video id="[^"]+" extras="([^"]+)"[^>]+>[^<]+' patronvideos += '<titulo>([^<]+)</titulo>[^<]+' patronvideos += '<imagen>([^<]+)</imagen>[^<]+' patronvideos += '<url>([^<]+)</url>[^<]+' patronvideos += '<stats>[^<]+</stats>[^<]+' patronvideos += '<descripcion>(.*?)</descripcion>[^<]+' patronvideos += '</video>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo try: scrapedtitle = match[0] + " - " + unicode( match[1], "utf-8" ).encode("iso-8859-1") except: scrapedtitle = match[1] scrapedurl = urlparse.urljoin(url,match[3]) scrapedthumbnail = urlparse.urljoin(url,match[2]) try: scrapedplot = scrapertools.htmlclean(unicode( match[4], "utf-8" ).encode("iso-8859-1")) except: scrapedplot = scrapertools.htmlclean(match[4]) if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") xbmctools.addnewvideo( CHANNELCODE , "play" , category , "Directo" , scrapedtitle , scrapedurl , scrapedthumbnail , scrapedplot ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_LABEL ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def detail(params, url, category): logger.info("[dibujosanimadosgratis.py] detail") # Recupera los parámetros title = urllib.unquote_plus(params.get("title")) thumbnail = urllib.unquote_plus(params.get("thumbnail")) plot = urllib.unquote_plus(params.get("plot")) # Descarga la página de detalle ''' <div class="post"> <h2 class="postTitle"><a href="http://dibujosanimadosgratis.net/ranma/ranma-%c2%bd-episodio-142-33-audio-latino-dvdrip-hq.html">Ranma ½ Episodio 142 3/3 Audio Latino DVDRip HQ</a></h2> <div class="postMeta"> <span class="date">May.08, 2010</span> en <span class="filed"><a href="http://dibujosanimadosgratis.net/category/ranma" title="Ver todas las entradas en Ranma" rel="category tag">Ranma</a></span> </div> <div class="postContent"><p> <img src="http://i4.ytimg.com/vi/3k4YsDCdfoA/default.jpg" align="right" border="0" width="120" height="90" vspace="4" hspace="4" /> </p> <p> Author: <a href="http://youtube.com/profile?user=AlucardReturn08">AlucardReturn08</a><br/> Keywords: <br/> Added: May 8, 2010<br/> </p> <p><object width="425" height="350"><param name="movie" value="http://www.youtube.com/v/3k4YsDCdfoA"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/3k4YsDCdfoA" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350"></embed></object></p> </div> ''' data = scrapertools.cachePage(url) patron = '<div class="post">(.*?<div class="postMeta">.*?<div class="postContent">.*?)</div>' matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) > 0: data = matches[0] logger.info(data) # Plot scrapedplot = scrapertools.htmlclean(data) scrapedplot = scrapedplot.replace("\n", " ") scrapedplot = scrapedplot.replace("\r", " ") # Thumbnail patron = '<img src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) scrapedthumbnail = "" if len(matches) > 0: scrapedthumbnail = matches[0] # ------------------------------------------------------------------------------------ # Busca los enlaces a los videos conocidos en el iframe # ------------------------------------------------------------------------------------ listavideos = servertools.findvideos(data) for video in listavideos: videotitle = video[0] url = video[1] server = video[2] xbmctools.addnewvideo(CHANNELNAME, "play", category, server, title.strip() + " - " + videotitle, url, scrapedthumbnail, scrapedplot) # ------------------------------------------------------------------------------------ # Label (top-right)... xbmcplugin.setPluginCategory(handle=pluginhandle, category=category) xbmcplugin.addSortMethod(handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE) xbmcplugin.endOfDirectory(handle=pluginhandle, succeeded=True)
def episodelist(item): logger.info("[seriespepito.py] list") # Descarga la página data = scrapertools.cachePage(item.url) #logger.info(data) # Extrae el argumento patron = '<div class="contenido" id="noticia">.*?<span[^>]+>(.*?)</div>' matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) > 0: scrapedplot = matches[0] scrapedplot = scrapertools.htmlclean(scrapedplot).strip() # Unas series están en utf-8, otras en iso-8859-1. Esta doble conversión funciona :) try: intermedia = unicode(scrapedplot, "utf-8", errors="replace").encode("iso-8859-1") intermedia = unicode(intermedia, "iso-8859-1", errors="replace").encode("utf-8") #print item.title+" encoding 1" scrapedplot = intermedia except: #print item.title+" encoding 2" scrapedplot = unicode(scrapedplot, "iso-8859-1", errors="replace").encode("utf-8") item.plot = scrapedplot else: scrapedplot = "" # Extrae los capítulos patron = "<li class='li_capitulo'><a class='capitulo1' href='([^']+)' title='[^']+'>([^<]+)</a> <img src='([^']+)'[^>]+></li>" matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist = [] for match in matches: scrapedtitle = match[1] + " [" + match[2][49:-4] + "]" scrapedurl = match[0] scrapedthumbnail = item.thumbnail #scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Ajusta el encoding a UTF-8 scrapedtitle = unicode(scrapedtitle, "iso-8859-1", errors="replace").encode("utf-8") itemlist.append( Item(channel=CHANNELNAME, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, show=item.show)) return itemlist
def estrenos(params, url, category): logger.info("[mcanime.py] estrenos") # Descarga la página data = scrapertools.cachePage(url) # logger.info(data) # Extrae las entradas (carpetas) """ <dl id="addRow9203" class="min row1"> <dd class="thumb"> <img src="/images/anime/th_9203.jpg" width="75" height="100" alt="" /> </dd> <dt><a href="/enciclopedia/anime/cobra_the_animation_rokunin_no_yuushi/9203">Cobra The Animation: Rokunin no Yuushi</a> <i>(Serie)</i></dt> <dd>Cobra es un conocido pirata espacial, pero decide cambiar su cara y borrar todas sus memorias. El ahora es un hombre normal, con un trabajo normal y una vida aburrida, pero comienza a recordar su verdadera identidad y sus aventuras comienzan de nuevo. <a href="/enciclopedia/anime/cobra_the_animation_rokunin_no_yuushi/9203">leer más.</a></dd> <dd class="small mgn"><a href="/descarga_directa/anime/cobra_the_animation_rokunin_no_yuushi/9203" class="srch_dd">Descargar <img width="14" height="14" src="/images/dds/download_icon.gif" alt="[DD]" /></a></dd> </dl> """ patron = '<dl id="[^"]+" class="min row.">(.*?)</dl>' matches = re.compile(patron, re.DOTALL).findall(data) for match in matches: data = match patron = '<dd class="thumb">[^<]+' patron += '<img src="([^"]+)"[^>]+>[^<]+' patron += "</dd>[^<]+" patron += '<dt><a href="[^"]+">([^<]+)</a>\s*<i>([^<]+)</i>\s*</dt>[^<]+' matches2 = re.compile(patron, re.DOTALL).findall(data) if len(matches2) > 0: scrapedtitle = matches2[0][1].strip() + " " + matches2[0][2].strip() scrapedthumbnail = urlparse.urljoin(url, matches2[0][0]) scrapedplot = "" scrapedurl = "" if DEBUG: logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]" ) patron = '</dt>(.*?)<dd class="small mgn"><a href="([^"]+)"' matches2 = re.compile(patron, re.DOTALL).findall(data) if len(matches2) > 0: try: scrapedplot = unicode(matches2[0][0].strip(), "utf-8").encode("iso-8859-1") except: scrapedplot = matches2[0][0].strip() scrapedplot = scrapertools.htmlclean(scrapedplot) scrapedplot = scrapedplot.replace("\n", " ") scrapedplot = scrapedplot.replace("\r", " ") scrapedplot = scrapedplot.replace("\r\n", " ") scrapedurl = urlparse.urljoin(url, matches2[0][1]) # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELNAME, "ddseriedetail", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot ) # Propiedades xbmcplugin.setPluginCategory(handle=int(sys.argv[1]), category=category) xbmcplugin.addSortMethod(handle=int(sys.argv[1]), sortMethod=xbmcplugin.SORT_METHOD_NONE) xbmcplugin.endOfDirectory(handle=int(sys.argv[1]), succeeded=True)
def ddpostdetail(params,url,category): logger.info("[mcanime.py] ddpostdetail") title = urllib.unquote_plus( params.get("title") ) thumbnail = urllib.unquote_plus( params.get("thumbnail") ) plot = unicode( xbmc.getInfoLabel( "ListItem.Plot" ), "utf-8" ) # Descarga la p·gina data = scrapertools.cachePage(url) #logger.info(data) # Foto de la serie de la enciclopedia patron = '<img src="([^"]+)" width="300".*?class="title_pic" />' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: thumbnail = matches[0] # Argumento - texto del post patron = '<div id="download_detail">(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: plot = scrapertools.htmlclean(matches[0]) plot = plot.replace("\r\n"," ") plot = plot.replace("\r"," ") plot = plot.replace("\n"," ") plot = plot.strip() # ------------------------------------------------------------------------------------ # Busca los enlaces a los videos # ------------------------------------------------------------------------------------ listavideos = servertools.findvideos(data) i = 1 for video in listavideos: try: fulltitle = unicode( title.strip() + " (%d) " + video[0], "utf-8" ).encode("iso-8859-1") except: fulltitle = title.strip() + " (%d) " + video[0] fulltitle = fulltitle % i i = i + 1 videourl = video[1] server = video[2] #logger.info("videotitle="+urllib.quote_plus( videotitle )) #logger.info("plot="+urllib.quote_plus( plot )) #plot = "" #logger.info("title="+urllib.quote_plus( title )) xbmctools.addnewvideo( CHANNELNAME , "play" , category , server , fulltitle , videourl , thumbnail , plot ) # ------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------ # AÒade la opciÛn "AÒadir todos los vÌdeos a la lista de descarga" # ------------------------------------------------------------------------------------ xbmctools.addnewvideo( CHANNELNAME , "addalltodownloadlist" , title , "" , "(AÒadir todos los vÌdeos a la lista de descarga)" , url , thumbnail , plot ) # Cierra el directorio xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def ddseriedetail(params,url,category): logger.info("[mcanime.py] ddseriedetail") title = urllib.unquote_plus( params.get("title") ) thumbnail = urllib.unquote_plus( params.get("thumbnail") ) plot = urllib.unquote_plus( params.get("plot") ) # Descarga la p·gina data = scrapertools.cachePage(url) # Foto de la serie de la enciclopedia patron = '<img src="([^"]+)" width="300".*?class="title_pic" />' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: thumbnail = matches[0] logger.info("[mcanime.py] thumbnail="+thumbnail) # Argumento patron = '<h6>Sinopsis.*?</h6>(.*?)<h6>' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: plot = matches[0] plot = plot.replace("\n"," ") plot = plot.replace("\r"," ") plot = plot.replace("\r\n"," ") plot = plot.strip() plot = scrapertools.htmlclean(matches[0]) logger.info("[mcanime.py] plot="+plot) # Aportaciones de los usuarios patron = '<h6 class="m">Por los Usuarios</h6>[^<]+' patron += '<div id="user_actions">(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: data = matches[0] #logger.info("[mcanime.py] data="+data) patron = '<ul class="dd_row">[^<]+' patron += '<li class="dd_type"><img[^>]+></li>[^<]+' patron += '<li class="dd_update"> <img[^>]+>([^<]+)</li>[^<]+' patron += '<li class="dd_title">[^<]+' patron += '<h5><a href="([^"]+)">([^<]+)</a></h5>' matches = re.compile(patron,re.DOTALL).findall(data) for match in matches: # Atributos scrapedtitle = match[2].strip()+" ("+match[0].strip()+")" scrapedurl = urlparse.urljoin(url,match[1]) scrapedthumbnail = thumbnail scrapedplot = plot if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # AÒade al listado de XBMC xbmctools.addnewfolder( CHANNELNAME , "ddpostdetail" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) # Cierra el directorio xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def home(params, url, category): logger.info("[mcanime.py] listvideos") # Descarga la página data = scrapertools.cachePage(url) # logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<div class="release" style="background-image.url\(\'([^\']+)\'\)\;">[^<]+' patronvideos += '<h4>([^<]+)<a href="([^"]+)">([^<]+)</a> <span class="date">([^<]+)</span></h4>[^<]+' patronvideos += '<div class="rimg"><img src="([^"]+)"[^>]+></div>[^<]+' patronvideos += '<div class="rtext">(.*?)</div>[^<]+' patronvideos += '<div class="rfinfo">(.*?)</div>[^<]+' patronvideos += '<div class="rflinks">(.*?)</div>[^<]+' patronvideos += '<div class="rinfo">(.*?)</div>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: if match[0].endswith("anime.gif"): scrapedtitle = match[3].strip() + " " + match[1].strip() + " (" + match[4] + ")" scrapedurl = urlparse.urljoin(url, match[2]) scrapedthumbnail = urlparse.urljoin(url, match[5]) scrapedplot = scrapertools.htmlclean(match[6]) scrapedextra = match[8] if DEBUG: logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]" ) # Añade al listado de XBMC xbmctools.addnewfolderextra( CHANNELNAME, "homedetail", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot, scrapedextra, ) # Extrae la marca de siguiente página patronvideos = '<span class="next"><a href="([^"]+)">Anteriores</a>...</span>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = "Página siguiente" scrapedurl = urlparse.urljoin(url, matches[0]) scrapedthumbnail = "" scrapedplot = "" xbmctools.addnewfolder(CHANNELNAME, "home", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot) # Propiedades xbmcplugin.setPluginCategory(handle=int(sys.argv[1]), category=category) xbmcplugin.addSortMethod(handle=int(sys.argv[1]), sortMethod=xbmcplugin.SORT_METHOD_NONE) xbmcplugin.endOfDirectory(handle=int(sys.argv[1]), succeeded=True)
def novedades(item): logger.info("[documentalesatonline.py] novedades") # Descarga la página data = scrapertools.cachePage(item.url) #logger.info(data) # Extrae las entradas (carpetas) patronvideos = "<div class='post hentry'>.*?" patronvideos += "<h3 class='post-title entry-title'>[^<]+" patronvideos += "<a href='([^']+)'>([^<]+)</a>.*?" patronvideos += '<img.*?src="([^"]+)"[^>]+>(.*?)<div' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist = [] for match in matches: scrapedtitle = match[1] scrapedurl = urlparse.urljoin(item.url, match[0]) scrapedthumbnail = match[2] scrapedplot = scrapertools.htmlclean(match[3]) if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=CHANNELNAME, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) # Página siguiente patronvideos = "<a class='blog-pager-older-link' href='([^']+)' id='Blog1_blog-pager-older-link' title='Entradas antiguas'>Entradas antiguas</a>" matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: scrapedtitle = "Página siguiente" scrapedurl = urlparse.urljoin(item.url, match) scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=CHANNELNAME, action="novedades", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) return itemlist
def detail(params,url,category): logger.info("[dibujosanimadosgratis.py] detail") # Recupera los parámetros title = urllib.unquote_plus( params.get("title") ) thumbnail = urllib.unquote_plus( params.get("thumbnail") ) plot = urllib.unquote_plus( params.get("plot") ) # Descarga la página de detalle ''' <div class="post"> <h2 class="postTitle"><a href="http://dibujosanimadosgratis.net/ranma/ranma-%c2%bd-episodio-142-33-audio-latino-dvdrip-hq.html">Ranma ½ Episodio 142 3/3 Audio Latino DVDRip HQ</a></h2> <div class="postMeta"> <span class="date">May.08, 2010</span> en <span class="filed"><a href="http://dibujosanimadosgratis.net/category/ranma" title="Ver todas las entradas en Ranma" rel="category tag">Ranma</a></span> </div> <div class="postContent"><p> <img src="http://i4.ytimg.com/vi/3k4YsDCdfoA/default.jpg" align="right" border="0" width="120" height="90" vspace="4" hspace="4" /> </p> <p> Author: <a href="http://youtube.com/profile?user=AlucardReturn08">AlucardReturn08</a><br/> Keywords: <br/> Added: May 8, 2010<br/> </p> <p><object width="425" height="350"><param name="movie" value="http://www.youtube.com/v/3k4YsDCdfoA"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/3k4YsDCdfoA" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350"></embed></object></p> </div> ''' data = scrapertools.cachePage(url) patron = '<div class="post">(.*?<div class="postMeta">.*?<div class="postContent">.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: data = matches[0] logger.info(data) # Plot scrapedplot = scrapertools.htmlclean(data) scrapedplot = scrapedplot.replace("\n"," ") scrapedplot = scrapedplot.replace("\r"," ") # Thumbnail patron = '<img src="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) scrapedthumbnail = "" if len(matches)>0: scrapedthumbnail = matches[0] # ------------------------------------------------------------------------------------ # Busca los enlaces a los videos conocidos en el iframe # ------------------------------------------------------------------------------------ listavideos = servertools.findvideos(data) for video in listavideos: videotitle = video[0] url = video[1] server = video[2] xbmctools.addnewvideo( CHANNELNAME , "play" , category , server , title.strip() + " - " + videotitle , url , scrapedthumbnail , scrapedplot ) # ------------------------------------------------------------------------------------ # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def novedades(params,url,category): logger.info("[documentalesatonline.py] novedades") # Descarga la página data = scrapertools.cachePage(url) #logger.info(data) # Extrae las entradas (carpetas) patronvideos = "<div class='post hentry'>.*?" patronvideos += "<h3 class='post-title entry-title'>[^<]+" patronvideos += "<a href='([^']+)'>([^<]+)</a>.*?" patronvideos += '<img.*?src="([^"]+)"[^>]+>(.*?)<div' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[1] scrapedurl = urlparse.urljoin(url,match[0]) scrapedthumbnail = match[2] scrapedplot = scrapertools.htmlclean(match[3]) # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELNAME , "detail" , category , scrapedtitle , scrapedurl , scrapedthumbnail , scrapedplot) # Página siguiente patronvideos = "<a class='blog-pager-older-link' href='([^']+)' id='Blog1_blog-pager-older-link' title='Entradas antiguas'>Entradas antiguas</a>" matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: scrapedtitle = "Página siguiente" scrapedurl = urlparse.urljoin(url,match) scrapedthumbnail = "" scrapedplot = "" # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELNAME , "novedades" , category , scrapedtitle , scrapedurl , scrapedthumbnail , scrapedplot) if config.getSetting("singlechannel")=="true": xbmctools.addSingleChannelOptions(params,url,category) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def buscamirrors(itemlist,patronvideos,data,thumbnail,plot): logger.info("patron="+patronvideos) matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: scrapedtitle = scrapertools.htmlclean(match[1].replace("–","-")).strip() scrapedurl = match[0] itemlist.append( Item(channel=CHANNELNAME, action="findvideos" , title=scrapedtitle , url=scrapedurl, thumbnail=thumbnail, plot=plot))
def extraevideos(patronvideos,data,category,thumbnail,plot): logger.info("patron="+patronvideos) matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: scrapedtitle = scrapertools.htmlclean(match[1].replace("–","-")).strip() scrapedurl = match[0] xbmctools.addnewvideo( CHANNELNAME , "play" , category , "Directo" , scrapedtitle , scrapedurl , thumbnail , plot )
def novedades(item): logger.info("[xhamster.py] novedades") # Descarga la página data = scrapertools.cachePage(item.url) #logger.info(data) # Extrae las entradas # seccion novedades ''' <a href="/movies/496069/the_cheerleader.html" class="hRotator"> <img src="http://st5.xhamster.com/t/069/2_496069.jpg" height="120" width="160"> <img class="hSprite" src="http://static.xhamster.com/images/spacer.gif" sprite="http://st5.xhamster.com/t/069/s_496069.jpg" id="496069" onmouseover="hRotator.start2(this);" height="120" width="160"> </a> <div class="moduleFeaturedTitle"> <a href="/movies/496069/the_cheerleader.html">The Cheerleader</a> </div> <div class="moduleFeaturedDetails">Runtime: 35m51s<br><span style="color: Green;"> ''' #patronvideos = '<p style="text-align: center;">.*?' patronvideos = '<a href="(/movies/[^"]+.html)"[^>]*?>[^<]*?' patronvideos += '<img src=\'([^\']+.xhamster.com[^\']+)\'[^>]+>[^<]*?' patronvideos += '<img[^<]*?>[^<]*?</a>[^<]*?' patronvideos += '<div[^<]*?>[^<]*?' patronvideos += '<a href="/movies/[^"]+.html"[^>]*?>([^<]+)</a>[^<]*?' patronvideos += '</div[^<]*?>[^<]*?' patronvideos += '<div[^<]*?>Runtime: ([^<]+)<' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: # Titulo scrapedtitle = match[2] + " [" + match[3] + "]" scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = match[1].replace(" ", "%20") scrapedplot = scrapertools.htmlclean(match[2].strip()) if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, action="findvideos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) # Extrae el paginador #<A HREF="/new/2.html">Next</A>< patronvideos = '<a href="(\/new\/[^\.]+\.html)"[^>]*?>Next[^<]*?<\/a>' matches = re.compile(patronvideos,re.DOTALL | re.IGNORECASE).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(item.url,matches[0]) logger.info("[xhamster.py] " + scrapedurl) itemlist.append( Item(channel=CHANNELNAME, action="novedades", title="!Página siguiente" , url=scrapedurl , folder=True) ) return itemlist
def home(params, url, category): logger.info("[mcanime.py] listvideos") # Descarga la p·gina data = scrapertools.cachePage(url) #logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<div class="release" style="background-image.url\(\'([^\']+)\'\)\;">[^<]+' patronvideos += '<h4>([^<]+)<a href="([^"]+)">([^<]+)</a> <span class="date">([^<]+)</span></h4>[^<]+' patronvideos += '<div class="rimg"><img src="([^"]+)"[^>]+></div>[^<]+' patronvideos += '<div class="rtext">(.*?)</div>[^<]+' patronvideos += '<div class="rfinfo">(.*?)</div>[^<]+' patronvideos += '<div class="rflinks">(.*?)</div>[^<]+' patronvideos += '<div class="rinfo">(.*?)</div>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: if match[0].endswith("anime.gif"): scrapedtitle = match[3].strip() + " " + match[1].strip( ) + " (" + match[4] + ")" scrapedurl = urlparse.urljoin(url, match[2]) scrapedthumbnail = urlparse.urljoin(url, match[5]) scrapedplot = scrapertools.htmlclean(match[6]) scrapedextra = match[8] if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # AÒade al listado de XBMC xbmctools.addnewfolderextra(CHANNELNAME, "homedetail", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot, scrapedextra) # Extrae la marca de siguiente p·gina patronvideos = '<span class="next"><a href="([^"]+)">Anteriores</a>...</span>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = "P·gina siguiente" scrapedurl = urlparse.urljoin(url, matches[0]) scrapedthumbnail = "" scrapedplot = "" xbmctools.addnewfolder(CHANNELNAME, "home", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot) # Propiedades xbmcplugin.setPluginCategory(handle=int(sys.argv[1]), category=category) xbmcplugin.addSortMethod(handle=int(sys.argv[1]), sortMethod=xbmcplugin.SORT_METHOD_NONE) xbmcplugin.endOfDirectory(handle=int(sys.argv[1]), succeeded=True)
def getnovedades(params,url,category): logger.info("[tubehentai.py] getnovedades") # ------------------------------------------------------ # Descarga la página # ------------------------------------------------------ data = scrapertools.cachePage(url) #logger.info(data) # ------------------------------------------------------ # Extrae las entradas # ------------------------------------------------------ # seccion novedades ''' <a href="http://www.tubehentai.com/videos/1167/teen-f**k-in-hospital.html" target="_self"> <img src="http://media.tubehentai.com/thumbs/4cbb3700dbdd91.avi/4cbb3700dbdd91.avi-3.jpg" alt="Teen F**k in Hospital" name="4cbb3700dbdd91.avi" id="4cbb3700dbdd91.avi" onmouseover='startm("4cbb3700dbdd91.avi","http://media.tubehentai.com/thumbs/4cbb3700dbdd91.avi/4cbb3700dbdd91.avi-",".jpg");' onmouseout='endm("4cbb3700dbdd91.avi"); this.src="http://media.tubehentai.com/thumbs/4cbb3700dbdd91.avi/4cbb3700dbdd91.avi-3.jpg";' height="164" width="218" border="0"> </a> ''' #patronvideos = '<p style="text-align: center;">.*?' patronvideos = '<a href="(http://www.tubehentai.com/videos/[^"]+)"[^>]*?>[^<]*?' patronvideos += '<img src="(http://media.tubehentai.com/thumbs/[^"]+)" alt="([^"]+)"[^>]+>[^<]*?</a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: # Titulo scrapedtitle = match[2] scrapedurl = match[0] scrapedthumbnail = match[1].replace(" ", "%20") scrapedplot = scrapertools.htmlclean(match[2].strip()) if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, action="play", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=False) ) # ------------------------------------------------------ # Extrae el paginador # ------------------------------------------------------ #<a href="page2.html" class="next">Next »</a> patronvideos = '<a href=\'(page[^\.]+\.html)\'[^>]*?>Next[^<]*?<\/a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(url,"/" + matches[0]) logger.info("[tubehentai.py] " + scrapedurl) itemlist.append( Item(channel=CHANNELNAME, action="novedades", title="!Página siguiente" , url=scrapedurl , folder=True) ) return itemlist
def temporadas(item): logger.info("[cinetube.py] temporadas") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) # Busca el argumento patronvideos = '<div class="ficha_des des_move">(.*?)</div>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: data = matches[0] scrapedplot = scrapertools.htmlclean(matches[0]) logger.info("plot actualizado en detalle") else: logger.info("plot no actualizado en detalle") # Busca las temporadas patronvideos = '<li><h2><a href="([^"]+)">([^<]+)<' matches = re.compile(patronvideos, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[1].strip() scrapedurl = urlparse.urljoin(item.url, match[0]) scrapedthumbnail = item.thumbnail if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=CHANNELNAME, action="episodios", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) # Una trampa, si la serie enlaza no con la temporada sino con la lista de episodios, se resuelve aquí if len(itemlist) == 0: itemlist = episodios(item) # Si la serie lleva directamente a la página de detalle de un episodio (suele pasar en novedades) se detecta aquí if len(itemlist) == 0: itemlist.extend(findvideos(item)) return itemlist
def episodelist(item): logger.info("[seriespepito.py] list") # Descarga la página data = scrapertools.cachePage(item.url) #logger.info(data) # Extrae el argumento patron = '<div class="contenido" id="noticia">.*?<span[^>]+>(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: scrapedplot = matches[0] scrapedplot = scrapertools.htmlclean(scrapedplot).strip() # Unas series están en utf-8, otras en iso-8859-1. Esta doble conversión funciona :) try: intermedia = unicode( scrapedplot, "utf-8" , errors="replace" ).encode("iso-8859-1") intermedia = unicode( intermedia, "iso-8859-1" , errors="replace" ).encode("utf-8") #print item.title+" encoding 1" scrapedplot = intermedia except: #print item.title+" encoding 2" scrapedplot = unicode( scrapedplot, "iso-8859-1" , errors="replace" ).encode("utf-8") item.plot = scrapedplot else: scrapedplot = "" # Extrae los capítulos patron = "<li class='li_capitulo'><a class='capitulo1' href='([^']+)' title='[^']+'>([^<]+)</a> <img src='([^']+)'[^>]+></li>" matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist = [] for match in matches: scrapedtitle = match[1] + " ["+match[2][49:-4]+"]" scrapedurl = match[0] scrapedthumbnail = item.thumbnail #scrapedplot = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Ajusta el encoding a UTF-8 scrapedtitle = unicode( scrapedtitle, "iso-8859-1" , errors="replace" ).encode("utf-8") itemlist.append( Item(channel=CHANNELNAME, action="findvideos" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, show=item.show)) return itemlist
def getcapitulos(params,url,category): logger.info("[anifenix.py] getcapitulos") title = urllib.unquote_plus( params.get("title") ) thumbnail = urllib.unquote_plus( params.get("thumbnail") ) plot = urllib.unquote_plus( params.get("plot") ) # ------------------------------------------------------------------------------------ # Descarga la página # ------------------------------------------------------------------------------------ data = scrapertools.cachePage(url) #logger.info(data) # ------------------------------------------------------------------------------------ # Busca el argumento # ------------------------------------------------------------------------------------ patronvideos = '<div class="ficha_des">(.*?)</div>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if len(matches)>0: plot = scrapertools.htmlclean(matches[0]) logger.info("plot actualizado en detalle"); else: logger.info("plot no actualizado en detalle"); # ------------------------------------------------------------------------------------ # Busca los enlaces a los mirrors, o a los capítulos de las series... # ------------------------------------------------------------------------------------ ''' <a href="video-hentai-2349.html"><strong>Episodio 01(sub Ingles) sorry..</strong></a> ''' patronvideos = '<a href="(video-hentai[^"]+)"><strong>([^<]+)</strong></a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) itemlist = [] for match in matches: # Titulo scrapedtitle = match[1] scrapedurl = urlparse.urljoin(url,match[0]) scrapedthumbnail = thumbnail scrapedplot = plot if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, action="detail", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) return itemlist
def getnovedades(params,url,category): logger.info("[myhentaitube.py] getnovedades") # Descarga la pagina data = scrapertools.cachePage(url) #logger.info(data) # Extrae las entradas # seccion novedades ''' <a href="/index.php?option=com_content&view=article&id=29:ai-shimai-hentai-movie-anime-&catid=1:movies&Itemid=2"> <img src="/images/stories/ai_shimai_dvd copy.gif" border="0" /> </a> ''' #patronvideos = '<p style="text-align: center;">.*?' patronvideos = '<a href="(/index.php[^"]+view=article[^"]+id=[^:]([^"]+)catid=1+[^"]+)">[^<]*?' patronvideos += '<img src="([^"]+)".*?</a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: # Titulo scrapedtitle = match[1] scrapedurl = urlparse.urljoin(url,match[0]) scrapedthumbnail = urlparse.urljoin(url,match[2]).replace(" ", "%20") scrapedplot = scrapertools.htmlclean(match[1].strip()) if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, action="capitulos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) # Extrae el paginador patronvideos = '<a href="(\/index.php\?pageNum[^"]+)">[^<]+</a></span>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(url,matches[0]) itemlist.append( Item(channel=CHANNELNAME, action="novedades", title="!P�gina siguiente" , url=scrapedurl , folder=True) ) return itemlist
def getcapitulos(params,url,category): logger.info("[myhentaitube.py] getcapitulos") title = urllib.unquote_plus( params.get("title") ) thumbnail = urllib.unquote_plus( params.get("thumbnail") ) plot = urllib.unquote_plus( params.get("plot") ) # Descarga la pagina data = scrapertools.cachePage(url) #logger.info(data) # Busca el argumento patronvideos = '<div class="ficha_des">(.*?)</div>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if len(matches)>0: plot = scrapertools.htmlclean(matches[0]) logger.info("plot actualizado en detalle"); else: logger.info("plot no actualizado en detalle"); # Busca los enlaces a los mirrors, o a los capitulos de las series... ''' <h3 style="text-align: center;"> <a href="/index.php?option=com_content&view=article&id=8&Itemid=2">CAPITULO 1 </a></h3> ''' patronvideos = '<a href="(/index.php[^"]+view=article[^"]+id=[^"]+)">([^<]+)<' matches = re.compile(patronvideos,re.DOTALL).findall(data) itemlist = [] for match in matches: # Titulo scrapedtitle = match[1] scrapedurl = urlparse.urljoin(url,match[0]) scrapedthumbnail = thumbnail scrapedplot = plot if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, action="play", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=False) ) return itemlist
def novedades(item): logger.info("[documentalesatonline.py] novedades") # Descarga la página data = scrapertools.cachePage(item.url) #logger.info(data) # Extrae las entradas (carpetas) patronvideos = "<div class='post hentry'>.*?" patronvideos += "<h3 class='post-title entry-title'>[^<]+" patronvideos += "<a href='([^']+)'>([^<]+)</a>.*?" patronvideos += '<img.*?src="([^"]+)"[^>]+>(.*?)<div' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist = [] for match in matches: scrapedtitle = match[1] scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = match[2] scrapedplot = scrapertools.htmlclean(match[3]) if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=CHANNELNAME, action="findvideos" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) # Página siguiente patronvideos = "<a class='blog-pager-older-link' href='([^']+)' id='Blog1_blog-pager-older-link' title='Entradas antiguas'>Entradas antiguas</a>" matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: scrapedtitle = "Página siguiente" scrapedurl = urlparse.urljoin(item.url,match) scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=CHANNELNAME, action="novedades" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) return itemlist
def temporadas(item): logger.info("[cinetube.py] temporadas") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) # Busca el argumento patronvideos = '<div class="ficha_des des_move">(.*?)</div>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if len(matches)>0: data = matches[0] scrapedplot = scrapertools.htmlclean(matches[0]) logger.info("plot actualizado en detalle"); else: logger.info("plot no actualizado en detalle"); # Busca las temporadas patronvideos = '<li><h2><a href="([^"]+)">([^<]+)<' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[1].strip() scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = item.thumbnail if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=CHANNELNAME, action="episodios", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail, plot=scrapedplot) ) # Una trampa, si la serie enlaza no con la temporada sino con la lista de episodios, se resuelve aquí if len(itemlist)==0: itemlist = episodios(item) # Si la serie lleva directamente a la página de detalle de un episodio (suele pasar en novedades) se detecta aquí if len(itemlist)==0: itemlist.extend(findvideos(item)) return itemlist
def findvideos(item): logger.info("[cinetube.py] findvideos") url = item.url title = item.title thumbnail = item.thumbnail plot = item.plot # ------------------------------------------------------------------------------------ # Descarga la pagina # ------------------------------------------------------------------------------------ data = scrapertools.cachePage(url) # logger.info(data) # ------------------------------------------------------------------------------------ # Busca el argumento # ------------------------------------------------------------------------------------ patronvideos = '<div class="ficha_des">(.*?)</div>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: plot = scrapertools.htmlclean(matches[0]) logger.info("plot actualizado en detalle") else: logger.info("plot no actualizado en detalle") # ------------------------------------------------------------------------------------ # Busca el thumbnail # ------------------------------------------------------------------------------------ patronvideos = '<div class="ficha_img pelicula_img">[^<]+' patronvideos += '<img src="([^"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: thumbnail = matches[0] logger.info("thumb actualizado en detalle") else: logger.info("thumb no actualizado en detalle") # ------------------------------------------------------------------------------------ # Busca los enlaces a los mirrors, o a los capitulos de las series... # ------------------------------------------------------------------------------------ # url = "http://www.cinetube.es/inc/mostrar_contenido.php?sec=pelis_ficha&zona=online&id=video-4637" # patronvideos = '<div class="ver_des_peli iframe2">[^<]+' # patronvideos += '<ul class="tabs-nav" id="([^"]+)">' # matches = re.compile(patronvideos,re.DOTALL).findall(data) # data = scrapertools.cachePage("http://www.cinetube.es/inc/mostrar_contenido.php?sec=pelis_ficha&zona=online&id="+matches[0]) """ <div id="ficha_ver_peli"> <div class="v_online"> <h2>Ver online <span>El destino de Nunik</span></h2> <div class="opstions_pelicula_list"> <div class="tit_opts" style="cursor:pointer;" onclick="location.href='http://www.cinetube.es/peliculas/drama/el-destino-de-nunik_espanol-dvd-rip-megavideo-6026.html'"> <p>Mirror 1: Megavideo</p> <p><span>CALIDAD: DVD-RIP | IDIOMA: ESPAÑOL</span></p> <p class="v_ico"><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="Megavideo" /></p> </div> <div class="tit_opts" style="cursor:pointer;" onclick="location.href='http://www.cinetube.es/peliculas/drama/el-destino-de-nunik_espanol-dvd-rip-megavideo-6027.html'"> <p>Mirror 2: Megavideo</p> <p><span>CALIDAD: DVD-RIP | IDIOMA: ESPAÑOL</span></p> <p class="v_ico"><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="Megavideo" /></p> </div> </div> </div> </div> """ """ <div class="v_online"> <h2>Ver online <span>Cantajuego 6</span></h2> <div class="opstions_pelicula_list"><div class="tit_opts"><a href="/peliculas/animacion-e-infantil/cantajuego-6_espanol-dvd-rip-megavideo-73371.html"> <p>Mirror 1: Megavideo</p> <p><span>CALIDAD: DVD-RIP | IDIOMA: ESPAÑOL</span></p> <p class="v_ico"><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="Megavideo" /></p> </a></div> </div> </div> </div><br/><div id="ficha_desc_peli"> <div class="v_online"> <h2 class="ico_fuego">Descargar <span>Cantajuego 6</span></h2> <div class="opstions_pelicula_list"><div class="tit_opts"><a href="/peliculas/animacion-e-infantil/descargar-cantajuego-6_espanol-dvd-rip-megaupload-73372.html" target="_blank"> <p>Mirror 1: Megaupload </p> <p><span>CALIDAD: DVD-RIP | IDIOMA: ESPAÑOL </span></p> <p class="v_ico"><img src="http://caratulas.cinetube.es/img/cont/megaupload.png" alt="Megaupload" /></p> </a></div> </div> </div> </div> """ # patronvideos = '<div class="tit_opts"><a href="([^"]+)">[^<]+' patronvideos = '<div class="tit_opts"><a href="([^"]+)".*?>[^<]+' patronvideos += "<p>([^<]+)</p>[^<]+" patronvideos += "<p><span>([^<]+)</span>" matches = re.compile(patronvideos, re.DOTALL).findall(data) itemlist = [] for match in matches: logger.info("Encontrado iframe mirrors " + match[0]) # Lee el iframe mirror = urlparse.urljoin(url, match[0].replace(" ", "%20")) req = urllib2.Request(mirror) req.add_header( "User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3" ) response = urllib2.urlopen(req) data = response.read() response.close() listavideos = servertools.findvideos(data) for video in listavideos: scrapedtitle = title.strip() + " " + match[1] + " " + match[2] + " " + video[0] scrapedurl = video[1] server = video[2] itemlist.append( Item( channel=CHANNELNAME, action="play", title=scrapedtitle, url=scrapedurl, thumbnail=item.thumbnail, plot=item.plot, server=server, folder=False, ) ) return itemlist
def getnovedades(params,url,category): logger.info("[anifenix.py] getnovedades") # ------------------------------------------------------ # Descarga la página # ------------------------------------------------------ data = scrapertools.cachePage(url) #logger.info(data) # ------------------------------------------------------ # Extrae las entradas # ------------------------------------------------------ # seccion novedades ''' <!--DWLayoutTable--> <tr> <td width="220" height="301" valign="top"><table width="100%" border="0" cellpadding="0" cellspacing="0"> <!--DWLayoutTable--> <tr> <td height="214" colspan="2" valign="top"><a href="descripcion.php?ns=Another lady innocent"><img src="../Administracion/Noticias/archivos/33bd660(cab).jpg" alt="" width="214" height="212" border="0" /></a></td> <td width="6" rowspan="2" valign="top"><!--DWLayoutEmptyCell--> </td> </tr> <tr> <td width="107" height="87" valign="top"><img src="../Administracion/Noticias/archivos/th_51143_2_123_237lo(cab_sub_1).jpg" width="106" height="87" alt="" /></td> <td width="107" align="right" valign="top"><a href="descripcion.php?ns=Another lady innocent"><img src="../Administracion/Noticias/archivos/th_51147_4_123_414lo(cab_sub_2).jpg" alt="" width="106" height="87" border="0" /></a></td> </tr> </table> </td> <td width="334" valign="top"><p align="center"><span class="Estilo57" style="margin:0px"><a href="serie-hentai-online-Another lady innocent.html">Serie hentai Online: Another lady innocent</a></span></p> ''' patronvideos = '<!--DWLayoutTable-->.*?<!--DWLayoutTable-->.*?<img src="([^"]+)".*?' patronvideos += '<span class="Estilo57"[^>]+><a href="([^"]+)">([^<]+)</a></span>.*?' patronvideos += '<p class="Estilo59">(.*?)</p>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: # Titulo scrapedtitle = match[2] scrapedurl = urlparse.urljoin(url,match[1]) scrapedthumbnail = urlparse.urljoin(url,match[0][2:]) scrapedplot = scrapertools.htmlclean(match[3].strip()) if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, action="capitulos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) # ------------------------------------------------------ # Extrae el paginador # ------------------------------------------------------ patronvideos = '<a href="(\/index.php\?pageNum[^"]+)">[^<]+</a></span>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(url,matches[0]) itemlist.append( Item(channel=CHANNELNAME, action="novedades", title="!Página siguiente" , url=scrapedurl , folder=True) ) return itemlist
def series(item): logger.info("[cinetube.py] series") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) logger.info("Pagina de %d caracteres" % len(data)) # Extrae las entradas ''' <li> <a href="/series/en-tierra-de-lobos/temporada-1/capitulo-12/"><img src="http://caratulas.cinetube.es/series/8912.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="espanol" /> <img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="megavideo.png" /> <img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> <p><span class="rosa"></span></p></div> <p class="tit_ficha"><a class="tit_ficha" title="Ver serie Tierra de lobos" href="/series/en-tierra-de-lobos/temporada-1/capitulo-12/">Tierra de lobos </a></p> <p class="tem_fich">1a Temporada - Cap 12</p> </li> ''' ''' <li> <a href="/series/gabriel-un-amor-inmortal/"><img src="http://caratulas.cinetube.es/series/7952.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/latino.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> <p class="tit_ficha">Gabriel, un amor inmortal </p> </li> ''' ''' <li> <a href="/series-anime/star-driver-kagayaki-no-takuto/temporada-1/capitulo-13/"><img src="http://caratulas.cinetube.es/seriesa/9009.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/sub.png" alt="sub" /> <img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="megavideo.png" /> <img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> <p><span class="rosa"></span></p></div> <p class="tit_ficha"><a class="tit_ficha" title="Ver serie Star Driver Kagayaki no Takuto" href="/series-anime/star-driver-kagayaki-no-takuto/temporada-1/capitulo-13/">Star Driver Kagayaki no Takuto </a></p> <p class="tem_fich">1a Temporada - Cap 13</p> </li> ''' patronvideos = '<li>[^<]+' patronvideos += '<a href="([^"]+)"><img src="([^"]+)"[^>]*></a>[^<]+' patronvideos += '<div class="icos_lg">(.*?)</div>[^<]+' patronvideos += '<p class="tit_ficha">(.*?)</p>[^<]+' patronvideos += '(?:<p class="tem_fich">([^<]+)</p>)?' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = match[3].strip() if len(match)>=5: scrapedtitle = scrapedtitle+" "+match[4] matchesconectores = re.compile('<img.*?alt="([^"]*)"',re.DOTALL).findall(match[2]) conectores = "" for matchconector in matchesconectores: logger.info("matchconector="+matchconector) if matchconector=="": matchconector = "megavideo" conectores = conectores + matchconector + "/" if len(matchesconectores)>0: scrapedtitle = scrapedtitle + " (" + conectores[:-1] + ")" scrapedtitle = scrapedtitle.replace("megavideo/megavideo","megavideo") scrapedtitle = scrapedtitle.replace("megavideo/megavideo","megavideo") scrapedtitle = scrapedtitle.replace("megavideo/megavideo","megavideo") scrapedtitle = scrapedtitle.replace("descarga directa","DD") scrapedtitle = scrapertools.htmlclean(scrapedtitle) scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedplot = "" scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = match[1] if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=CHANNELNAME, action="temporadas", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) # Paginador #<li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(item.url,matches[0]) itemlist.append( Item(channel=CHANNELNAME, action="series", title="!Página siguiente" , url=scrapedurl , folder=True) ) return itemlist
def findvideos(item): logger.info("[cinetube.py] findvideos") url = item.url title = item.title thumbnail = item.thumbnail plot = item.plot # Descarga la pagina data = scrapertools.cachePage(url) #logger.info(data) # Busca el argumento patronvideos = '<div class="ficha_des">(.*?)</div>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: plot = scrapertools.htmlclean(matches[0]) logger.info("plot actualizado en detalle") else: logger.info("plot no actualizado en detalle") # Busca el thumbnail patronvideos = '<div class="ficha_img pelicula_img">[^<]+' patronvideos += '<img src="([^"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: thumbnail = matches[0] logger.info("thumb actualizado en detalle") else: logger.info("thumb no actualizado en detalle") # Busca los enlaces a los mirrors, o a los capitulos de las series... ''' FORMATO EN SERIES <div class="tit_opts"><a href="/series/hawai-five/temporada-1/capitulo-13/212498.html"> <p>Opción 1: Ver online en Megavideo <span class="bold"></span></p> <p><span>IDIOMA: SUB</span></p> <p class="v_ico"><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="Megavideo" /></p> ''' patronvideos = '<div class="tit_opts"><a href="([^"]+)"[^>]*>[^<]+' patronvideos += '<p>(.*?)</p>[^<]+' patronvideos += '<p><span>(.*?)</span>' ''' patronvideos = '<div class="tit_opts"><a href="([^"]+)".*?>[^<]+' patronvideos += '<p>([^<]+)</p>[^<]+' patronvideos += '<p><span>(.*?)</span>' ''' matches = re.compile(patronvideos, re.DOTALL).findall(data) itemlist = [] for match in matches: logger.info("Encontrado iframe mirrors " + match[0]) # Lee el iframe mirror = urlparse.urljoin(url, match[0].replace(" ", "%20")) req = urllib2.Request(mirror) req.add_header( 'User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3' ) response = urllib2.urlopen(req) data = response.read() response.close() listavideos = servertools.findvideos(data) for video in listavideos: scrapedtitle = title.strip( ) + " " + match[1] + " " + match[2] + " " + video[0] scrapedtitle = scrapertools.htmlclean(scrapedtitle) scrapedurl = video[1] server = video[2] itemlist.append( Item(channel=CHANNELNAME, action="play", title=scrapedtitle, url=scrapedurl, thumbnail=item.thumbnail, plot=item.plot, server=server, folder=False)) return itemlist
def novedades(item): logger.info("[xhamster.py] novedades") # Descarga la página data = scrapertools.cachePage(item.url) #logger.info(data) # Extrae las entradas # seccion novedades ''' <a href="/movies/496069/the_cheerleader.html" class="hRotator"> <img src="http://st5.xhamster.com/t/069/2_496069.jpg" height="120" width="160"> <img class="hSprite" src="http://static.xhamster.com/images/spacer.gif" sprite="http://st5.xhamster.com/t/069/s_496069.jpg" id="496069" onmouseover="hRotator.start2(this);" height="120" width="160"> </a> <div class="moduleFeaturedTitle"> <a href="/movies/496069/the_cheerleader.html">The Cheerleader</a> </div> <div class="moduleFeaturedDetails">Runtime: 35m51s<br><span style="color: Green;"> ''' #patronvideos = '<p style="text-align: center;">.*?' patronvideos = '<a href="(/movies/[^"]+.html)"[^>]*?>[^<]*?' patronvideos += '<img src=\'([^\']+.xhamster.com[^\']+)\'[^>]+>[^<]*?' patronvideos += '<img[^<]*?>[^<]*?</a>[^<]*?' patronvideos += '<div[^<]*?>[^<]*?' patronvideos += '<a href="/movies/[^"]+.html"[^>]*?>([^<]+)</a>[^<]*?' patronvideos += '</div[^<]*?>[^<]*?' patronvideos += '<div[^<]*?>Runtime: ([^<]+)<' matches = re.compile(patronvideos, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: # Titulo scrapedtitle = match[2] + " [" + match[3] + "]" scrapedurl = urlparse.urljoin(item.url, match[0]) scrapedthumbnail = match[1].replace(" ", "%20") scrapedplot = scrapertools.htmlclean(match[2].strip()) if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) # Extrae el paginador #<A HREF="/new/2.html">Next</A>< patronvideos = '<a href="(\/new\/[^\.]+\.html)"[^>]*?>Next[^<]*?<\/a>' matches = re.compile(patronvideos, re.DOTALL | re.IGNORECASE).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) logger.info("[xhamster.py] " + scrapedurl) itemlist.append( Item(channel=CHANNELNAME, action="novedades", title="!Página siguiente", url=scrapedurl, folder=True)) return itemlist
def list(params,url,category): xbmc.output("[newcineonline.py] list") # Descarga la página data = scrapertools.cachePage(url) #xbmc.output(data) # Extrae las entradas (carpetas) patronvideos = '<div id="post-info-mid">[^<]+<div class="post-title"><a href="([^"]+)">([^<]+)</a></div>' patronvideos += '.*?<td class="post-story">.*?<tbody>.*?<img src="([^"]+)"[^>]+>(.*?)</tbody>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = match[1] # URL scrapedurl = urlparse.urljoin(url,match[0]) # Thumbnail scrapedthumbnail = urlparse.urljoin(url,match[2]) # procesa el resto scrapeddescription = match[3] scrapeddescription = scrapertools.htmlclean(scrapeddescription) scrapeddescription = scrapeddescription.replace("<!--colorstart:#589BB9-->","") scrapeddescription = scrapeddescription.replace("<!--colorend-->","") scrapeddescription = scrapeddescription.replace("<!--/colorend-->","") scrapeddescription = scrapeddescription.replace("<!--/colorstart-->","") scrapeddescription = scrapeddescription.strip() # Depuracion if (DEBUG): xbmc.output("scrapedtitle="+scrapedtitle) xbmc.output("scrapedurl="+scrapedurl) xbmc.output("scrapedthumbnail="+scrapedthumbnail) # Añade al listado de XBMC #addfolder( scrapedtitle , scrapedurl , "detail" ) xbmctools.addnewfolder( CHANNELNAME , "detail" , CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail , scrapeddescription ) # ------------------------------------------------------------------------------------ # Busca los enlaces a los mirrors, o a los capítulos de las series... # ------------------------------------------------------------------------------------ xbmc.output("Busca el enlace de página siguiente...") try: # La siguiente página patronvideos = '<a href\="([^"]+)"><span class\="navigation"[^>]+>Sigu' matches = re.compile(patronvideos,re.DOTALL).findall(data) url = matches[0] addfolder("!Siguiente",url,"list") except: xbmc.output("No encuentro la pagina...") # Label (top-right)... xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) # Disable sorting... xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )
def series(item): logger.info("[cinetube.py] series") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) logger.info("Pagina de %d caracteres" % len(data)) # Extrae las entradas ''' <li> <a href="/series/en-tierra-de-lobos/temporada-1/capitulo-12/"><img src="http://caratulas.cinetube.es/series/8912.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/espanol.png" alt="espanol" /> <img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="megavideo.png" /> <img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> <p><span class="rosa"></span></p></div> <p class="tit_ficha"><a class="tit_ficha" title="Ver serie Tierra de lobos" href="/series/en-tierra-de-lobos/temporada-1/capitulo-12/">Tierra de lobos </a></p> <p class="tem_fich">1a Temporada - Cap 12</p> </li> ''' ''' <li> <a href="/series/gabriel-un-amor-inmortal/"><img src="http://caratulas.cinetube.es/series/7952.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/latino.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="" /><img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> </div> <p class="tit_ficha">Gabriel, un amor inmortal </p> </li> ''' ''' <li> <a href="/series-anime/star-driver-kagayaki-no-takuto/temporada-1/capitulo-13/"><img src="http://caratulas.cinetube.es/seriesa/9009.jpg" alt="peli" /></a> <div class="icos_lg"><img src="http://caratulas.cinetube.es/img/cont/sub.png" alt="sub" /> <img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="megavideo.png" /> <img src="http://caratulas.cinetube.es/img/cont/ddirecta.png" alt="descarga directa" /> <p><span class="rosa"></span></p></div> <p class="tit_ficha"><a class="tit_ficha" title="Ver serie Star Driver Kagayaki no Takuto" href="/series-anime/star-driver-kagayaki-no-takuto/temporada-1/capitulo-13/">Star Driver Kagayaki no Takuto </a></p> <p class="tem_fich">1a Temporada - Cap 13</p> </li> ''' patronvideos = '<li>[^<]+' patronvideos += '<a href="([^"]+)"><img src="([^"]+)"[^>]*></a>[^<]+' patronvideos += '<div class="icos_lg">(.*?)</div>[^<]+' patronvideos += '<p class="tit_ficha">(.*?)</p>[^<]+' patronvideos += '(?:<p class="tem_fich">([^<]+)</p>)?' matches = re.compile(patronvideos, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = match[3].strip() if len(match) >= 5: scrapedtitle = scrapedtitle + " " + match[4] matchesconectores = re.compile('<img.*?alt="([^"]*)"', re.DOTALL).findall(match[2]) conectores = "" for matchconector in matchesconectores: logger.info("matchconector=" + matchconector) if matchconector == "": matchconector = "megavideo" conectores = conectores + matchconector + "/" if len(matchesconectores) > 0: scrapedtitle = scrapedtitle + " (" + conectores[:-1] + ")" scrapedtitle = scrapedtitle.replace("megavideo/megavideo", "megavideo") scrapedtitle = scrapedtitle.replace("megavideo/megavideo", "megavideo") scrapedtitle = scrapedtitle.replace("megavideo/megavideo", "megavideo") scrapedtitle = scrapedtitle.replace("descarga directa", "DD") scrapedtitle = scrapertools.htmlclean(scrapedtitle) scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedplot = "" scrapedurl = urlparse.urljoin(item.url, match[0]) scrapedthumbnail = match[1] if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=CHANNELNAME, action="temporadas", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) # Paginador #<li class="navs"><a class="pag_next" href="/peliculas-todas/2.html"></a></li> patronvideos = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item(channel=CHANNELNAME, action="series", title="!Página siguiente", url=scrapedurl, folder=True)) return itemlist
def ddpostdetail(params, url, category): logger.info("[mcanime.py] ddpostdetail") title = urllib.unquote_plus(params.get("title")) thumbnail = urllib.unquote_plus(params.get("thumbnail")) plot = unicode(xbmc.getInfoLabel("ListItem.Plot"), "utf-8") # Descarga la p·gina data = scrapertools.cachePage(url) #logger.info(data) # Foto de la serie de la enciclopedia patron = '<img src="([^"]+)" width="300".*?class="title_pic" />' matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) > 0: thumbnail = matches[0] # Argumento - texto del post patron = '<div id="download_detail">(.*?)</div>' matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) > 0: plot = scrapertools.htmlclean(matches[0]) plot = plot.replace("\r\n", " ") plot = plot.replace("\r", " ") plot = plot.replace("\n", " ") plot = plot.strip() # ------------------------------------------------------------------------------------ # Busca los enlaces a los videos # ------------------------------------------------------------------------------------ listavideos = servertools.findvideos(data) i = 1 for video in listavideos: try: fulltitle = unicode(title.strip() + " (%d) " + video[0], "utf-8").encode("iso-8859-1") except: fulltitle = title.strip() + " (%d) " + video[0] fulltitle = fulltitle % i i = i + 1 videourl = video[1] server = video[2] #logger.info("videotitle="+urllib.quote_plus( videotitle )) #logger.info("plot="+urllib.quote_plus( plot )) #plot = "" #logger.info("title="+urllib.quote_plus( title )) xbmctools.addnewvideo(CHANNELNAME, "play", category, server, fulltitle, videourl, thumbnail, plot) # ------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------ # AÒade la opciÛn "AÒadir todos los vÌdeos a la lista de descarga" # ------------------------------------------------------------------------------------ xbmctools.addnewvideo( CHANNELNAME, "addalltodownloadlist", title, "", "(AÒadir todos los vÌdeos a la lista de descarga)", url, thumbnail, plot) # Cierra el directorio xbmcplugin.setPluginCategory(handle=pluginhandle, category=category) xbmcplugin.addSortMethod(handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE) xbmcplugin.endOfDirectory(handle=pluginhandle, succeeded=True)
def ddseriedetail(params, url, category): logger.info("[mcanime.py] ddseriedetail") title = urllib.unquote_plus(params.get("title")) thumbnail = urllib.unquote_plus(params.get("thumbnail")) plot = urllib.unquote_plus(params.get("plot")) # Descarga la p·gina data = scrapertools.cachePage(url) # Foto de la serie de la enciclopedia patron = '<img src="([^"]+)" width="300".*?class="title_pic" />' matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) > 0: thumbnail = matches[0] logger.info("[mcanime.py] thumbnail=" + thumbnail) # Argumento patron = '<h6>Sinopsis.*?</h6>(.*?)<h6>' matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) > 0: plot = matches[0] plot = plot.replace("\n", " ") plot = plot.replace("\r", " ") plot = plot.replace("\r\n", " ") plot = plot.strip() plot = scrapertools.htmlclean(matches[0]) logger.info("[mcanime.py] plot=" + plot) # Aportaciones de los usuarios patron = '<h6 class="m">Por los Usuarios</h6>[^<]+' patron += '<div id="user_actions">(.*?)</div>' matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) > 0: data = matches[0] #logger.info("[mcanime.py] data="+data) patron = '<ul class="dd_row">[^<]+' patron += '<li class="dd_type"><img[^>]+></li>[^<]+' patron += '<li class="dd_update"> <img[^>]+>([^<]+)</li>[^<]+' patron += '<li class="dd_title">[^<]+' patron += '<h5><a href="([^"]+)">([^<]+)</a></h5>' matches = re.compile(patron, re.DOTALL).findall(data) for match in matches: # Atributos scrapedtitle = match[2].strip() + " (" + match[0].strip() + ")" scrapedurl = urlparse.urljoin(url, match[1]) scrapedthumbnail = thumbnail scrapedplot = plot if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # AÒade al listado de XBMC xbmctools.addnewfolder(CHANNELNAME, "ddpostdetail", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot) # Cierra el directorio xbmcplugin.setPluginCategory(handle=pluginhandle, category=category) xbmcplugin.addSortMethod(handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE) xbmcplugin.endOfDirectory(handle=pluginhandle, succeeded=True)
def list(params, url, category): logger.info("[newcineonline.py] list") # Descarga la página data = scrapertools.cachePage(url) #logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<div id="post-info-mid">[^<]+<div class="post-title"><a href="([^"]+)">([^<]+)</a></div>' patronvideos += '.*?<td class="post-story">.*?<tbody>.*?<img src="([^"]+)"[^>]+>(.*?)</tbody>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = match[1] # URL scrapedurl = urlparse.urljoin(url, match[0]) # Thumbnail scrapedthumbnail = urlparse.urljoin(url, match[2]) # procesa el resto scrapeddescription = match[3] scrapeddescription = scrapertools.htmlclean(scrapeddescription) scrapeddescription = scrapeddescription.replace( "<!--colorstart:#589BB9-->", "") scrapeddescription = scrapeddescription.replace("<!--colorend-->", "") scrapeddescription = scrapeddescription.replace("<!--/colorend-->", "") scrapeddescription = scrapeddescription.replace( "<!--/colorstart-->", "") scrapeddescription = scrapeddescription.strip() # Depuracion if (DEBUG): logger.info("scrapedtitle=" + scrapedtitle) logger.info("scrapedurl=" + scrapedurl) logger.info("scrapedthumbnail=" + scrapedthumbnail) # Añade al listado de XBMC #addfolder( scrapedtitle , scrapedurl , "detail" ) xbmctools.addnewfolder(CHANNELNAME, "detail", CHANNELNAME, scrapedtitle, scrapedurl, scrapedthumbnail, scrapeddescription) # ------------------------------------------------------------------------------------ # Busca los enlaces a los mirrors, o a los capítulos de las series... # ------------------------------------------------------------------------------------ logger.info("Busca el enlace de página siguiente...") try: # La siguiente página patronvideos = '<a href\="([^"]+)"><span class\="navigation"[^>]+>Sigu' matches = re.compile(patronvideos, re.DOTALL).findall(data) url = matches[0] addfolder("!Siguiente", url, "list") except: logger.info("No encuentro la pagina...") # Label (top-right)... xbmcplugin.setPluginCategory(handle=int(sys.argv[1]), category=category) # Disable sorting... xbmcplugin.addSortMethod(handle=int(sys.argv[1]), sortMethod=xbmcplugin.SORT_METHOD_NONE) # End of directory... xbmcplugin.endOfDirectory(handle=int(sys.argv[1]), succeeded=True)
def findvideos(data): logger.info("[servertools.py] findvideos") encontrados = set() devuelve = [] #Megavideo con partes para cinetube logger.info ("0) Megavideo con partes para cinetube") patronvideos = 'id.+?http://www.megavideo.com..v.(.+?)".+?(parte\d+)' #id="http://www.megavideo.com/?v=CN7DWZ8S"><a href="#parte1">Parte 1 de 2</a></li> matches = re.compile(patronvideos).findall(data) for match in matches: titulo = "[Megavideo " + match[1] + "]" url = match[0] if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos con título logger.info("1) Megavideo con titulo...") patronvideos = '<div align="center">([^<]+)<.*?<param name="movie" value="http://www.megavideo.com/v/([A-Z0-9a-z]{8})[^"]+"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = match[0].strip() if titulo == "": titulo = "[Megavideo]" url = match[1] if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos con título logger.info("1b) Megavideo con titulo...") patronvideos = '<a href\="http\:\/\/www.megavideo.com/\?v\=([A-Z0-9a-z]{8})".*?>([^<]+)</a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = match[1].strip() if titulo == "": titulo = "[Megavideo]" url = match[0] if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("1c) Megavideo sin titulo...") #http://www.megavideo.com/?v=OYGXMZBM patronvideos = 'http\:\/\/www.megavideo.com/\?v\=([A-Z0-9a-z]{8})"' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: titulo = "" if titulo == "": titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("1d) Megavideo sin titulo...") #http://www.megavideo.com/?v=OYGXMZBM patronvideos = 'http\:\/\/www.megavideo.com/\?v\=([A-Z0-9a-z]{8})' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: titulo = "" if titulo == "": titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megaupload con título logger.info("1k1) Megaupload...") patronvideos = '<a.*?href="http://www.megaupload.com/\?d=([A-Z0-9a-z]{8})".*?>(.*?)</a>' matches = re.compile(patronvideos).findall(data) for match in matches: titulo = scrapertools.htmlclean(match[1].strip())+" - [Megaupload]" url = match[0] if url not in encontrados: logger.info(" titulo="+titulo) logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megaupload' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) #2/12/2010 Megaupload logger.info("1k) Megaupload...") patronvideos = 'http\://www.megaupload.com/(?:es/)?\?.*?d\=([A-Z0-9a-z]{8})(?:[^>]*>([^<]+)</a>)?' matches = re.compile(patronvideos).findall(data) for match in matches: if match[1]<>"": titulo = match[1].strip()+" - [Megaupload]" else: titulo = "[Megaupload]" url = match[0] if url not in encontrados: logger.info(" titulo="+titulo) logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megaupload' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos sin título logger.info("2) Megavideo sin titulo...") patronvideos = '<param name="movie" value="http://wwwstatic.megavideo.com/mv_player.swf\?v=([^"]+)">' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Vreel - Vídeos con título logger.info( "3) Vreel con título...") patronvideos = '<div align="center"><b>([^<]+)</b>.*?<a href\="(http://beta.vreel.net[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = match[0].strip() if titulo == "": titulo = "[Vreel]" url = match[1] if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Vreel' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Vreel - Vídeos con título logger.info("4) Vreel con titulo...") patronvideos = '<div align="center">([^<]+)<.*?<a href\="(http://beta.vreel.net[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = match[0].strip() if titulo == "": titulo = "[Vreel]" url = match[1] if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Vreel' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # STAGEVU logger.info("7) Stagevu sin título...") patronvideos = '"(http://stagevu.com[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Stagevu]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Stagevu' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # TU.TV logger.info("8) Tu.tv sin título...") patronvideos = '<param name="movie" value="(http://tu.tv[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[tu.tv]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'tu.tv' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # TU.TV logger.info("9) Tu.tv sin título...") #<param name="movie" value="http://www.tu.tv/tutvweb.swf?kpt=aHR0cDovL3d3dy50dS50di92aWRlb3Njb2RpL24vYS9uYXppcy11bi1hdmlzby1kZS1sYS1oaXN0b3JpYS0xLTYtbGEtbC5mbHY=&xtp=669149_VIDEO" patronvideos = '<param name="movie" value="(http://www.tu.tv[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[tu.tv]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'tu.tv' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("9b) Tu.tv sin título...") #<embed src="http://tu.tv/tutvweb.swf?kpt=aHR0cDovL3d3dy50dS50di92aW patronvideos = '<embed src="(http://tu.tv/[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[tu.tv]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'tu.tv' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos sin título logger.info("10 ) Megavideo sin titulo...") patronvideos = '"http://www.megavideo.com/v/([A-Z0-9a-z]{8})[^"]+"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos sin título logger.info("11) Megavideo sin titulo...") patronvideos = '"http://www.megavideo.com/v/([A-Z0-9a-z]{8})[^"]+"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # STAGEVU ''' logger.info("12) Stagevu...") patronvideos = '(http://stagevu.com[^<]+)<' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "Ver el vídeo en Stagevu" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Stagevu' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) ''' # Vreel - Vídeos sin título logger.info("13) Vreel sin titulo...") patronvideos = '(http://beta.vreel.net[^<]+)<' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Vreel]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Vreel' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos con título logger.info("14) Megavideo con titulo...") patronvideos = '<a href="http://www.megavideo.com/\?v\=([^"]+)".*?>(.*?)</a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = match[1].strip() if titulo == "": titulo = "[Megavideo]" url = match[0] if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos con título logger.info("14b) Megavideo con titulo...") patronvideos = '<param name="movie" value=".*?v\=([A-Z0-9]{8})" />' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Stagevu...") patronvideos = '"http://stagevu.com.*?uid\=([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Stagevu]" url = "http://stagevu.com/video/"+match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Stagevu' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Stagevu...") patronvideos = "'http://stagevu.com.*?uid\=([^']+)'" matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Stagevu]" url = "http://stagevu.com/video/"+match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Stagevu' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Megavideo... formato d=XXXXXXX") patronvideos = 'http://www.megavideo.com/.*?\&d\=([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Megavideo... formato watchanimeon") patronvideos = 'src="http://wwwstatic.megavideo.com/mv_player.swf.*?\&v\=([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Megaupload... formato megavideo con d=XXXXXXX") patronvideos = 'http://www.megavideo.com/\?d\=([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megaupload' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Movshare...") patronvideos = '"(http://www.movshare.net/video/[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Movshare]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'movshare' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Movshare...") patronvideos = "'(http://www.movshare.net/embed/[^']+)'" matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Movshare]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'movshare' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Veoh...") patronvideos = '"http://www.veoh.com/.*?permalinkId=([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Veoh]" if match.count("&")>0: primera = match.find("&") url = match[:primera] else: url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'veoh' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Directo - myspace") patronvideos = 'flashvars="file=(http://[^\.]+.myspacecdn[^\&]+)&' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Directo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Directo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Directo - myspace") patronvideos = '(http://[^\.]+\.myspacecdn.*?\.flv)' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Directo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Directo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Directo - ning") patronvideos = '(http://api.ning.com.*?\.flv)' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Directo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Directo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Videoweed...") patronvideos = '(http://www.videoweed.com/file/*?\.flv)' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Videoweed]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'videoweed' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) YouTube...") patronvideos = '"http://www.youtube.com/v/([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[YouTube]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'youtube' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) #http://video.ak.facebook.com/cfs-ak-ash2/33066/239/133241463372257_27745.mp4 logger.info("0) Facebook...") patronvideos = '(http://video.ak.facebook.com/.*?\.mp4)' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Facebook]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'facebook' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) #http://www.4shared.com/embed/392975628/ff297d3f logger.info("0) 4shared...") patronvideos = '"(http://www.4shared.com.*?)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[4shared]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , '4shared' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) #http://www.4shared.com/embed/392975628/ff297d3f logger.info("0) 4shared...") patronvideos = "'(http://www.4shared.com.*?)'" matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[4shared]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , '4shared' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) #file=http://es.video.netlogstatic.com//v/oo/004/398/4398830.flv& #http://es.video.netlogstatic.com//v/oo/004/398/4398830.flv logger.info("0) netlogicstat...") patronvideos = "file\=(http\:\/\/es.video.netlogstatic[^\&]+)\&" matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Directo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Directo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) return devuelve
def estrenos(params, url, category): logger.info("[mcanime.py] estrenos") # Descarga la p·gina data = scrapertools.cachePage(url) #logger.info(data) # Extrae las entradas (carpetas) ''' <dl id="addRow9203" class="min row1"> <dd class="thumb"> <img src="/images/anime/th_9203.jpg" width="75" height="100" alt="" /> </dd> <dt><a href="/enciclopedia/anime/cobra_the_animation_rokunin_no_yuushi/9203">Cobra The Animation: Rokunin no Yuushi</a> <i>(Serie)</i></dt> <dd>Cobra es un conocido pirata espacial, pero decide cambiar su cara y borrar todas sus memorias. El ahora es un hombre normal, con un trabajo normal y una vida aburrida, pero comienza a recordar su verdadera identidad y sus aventuras comienzan de nuevo. <a href="/enciclopedia/anime/cobra_the_animation_rokunin_no_yuushi/9203">leer m·s.</a></dd> <dd class="small mgn"><a href="/descarga_directa/anime/cobra_the_animation_rokunin_no_yuushi/9203" class="srch_dd">Descargar <img width="14" height="14" src="/images/dds/download_icon.gif" alt="[DD]" /></a></dd> </dl> ''' patron = '<dl id="[^"]+" class="min row.">(.*?)</dl>' matches = re.compile(patron, re.DOTALL).findall(data) for match in matches: data = match patron = '<dd class="thumb">[^<]+' patron += '<img src="([^"]+)"[^>]+>[^<]+' patron += '</dd>[^<]+' patron += '<dt><a href="[^"]+">([^<]+)</a>\s*<i>([^<]+)</i>\s*</dt>[^<]+' matches2 = re.compile(patron, re.DOTALL).findall(data) if len(matches2) > 0: scrapedtitle = matches2[0][1].strip() + " " + matches2[0][2].strip( ) scrapedthumbnail = urlparse.urljoin(url, matches2[0][0]) scrapedplot = "" scrapedurl = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") patron = '</dt>(.*?)<dd class="small mgn"><a href="([^"]+)"' matches2 = re.compile(patron, re.DOTALL).findall(data) if len(matches2) > 0: try: scrapedplot = unicode(matches2[0][0].strip(), "utf-8").encode("iso-8859-1") except: scrapedplot = matches2[0][0].strip() scrapedplot = scrapertools.htmlclean(scrapedplot) scrapedplot = scrapedplot.replace("\n", " ") scrapedplot = scrapedplot.replace("\r", " ") scrapedplot = scrapedplot.replace("\r\n", " ") scrapedurl = urlparse.urljoin(url, matches2[0][1]) # AÒade al listado de XBMC xbmctools.addnewfolder(CHANNELNAME, "ddseriedetail", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot) # Propiedades xbmcplugin.setPluginCategory(handle=int(sys.argv[1]), category=category) xbmcplugin.addSortMethod(handle=int(sys.argv[1]), sortMethod=xbmcplugin.SORT_METHOD_NONE) xbmcplugin.endOfDirectory(handle=int(sys.argv[1]), succeeded=True)
def findvideos(item): logger.info("[cinetube.py] findvideos") url = item.url title = item.title thumbnail = item.thumbnail plot = item.plot # Descarga la pagina data = scrapertools.cachePage(url) #logger.info(data) # Busca el argumento patronvideos = '<div class="ficha_des">(.*?)</div>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if len(matches)>0: plot = scrapertools.htmlclean(matches[0]) logger.info("plot actualizado en detalle"); else: logger.info("plot no actualizado en detalle"); # Busca el thumbnail patronvideos = '<div class="ficha_img pelicula_img">[^<]+' patronvideos += '<img src="([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) if len(matches)>0: thumbnail = matches[0] logger.info("thumb actualizado en detalle"); else: logger.info("thumb no actualizado en detalle"); # Busca los enlaces a los mirrors, o a los capitulos de las series... ''' FORMATO EN SERIES <div class="tit_opts"><a href="/series/hawai-five/temporada-1/capitulo-13/212498.html"> <p>Opción 1: Ver online en Megavideo <span class="bold"></span></p> <p><span>IDIOMA: SUB</span></p> <p class="v_ico"><img src="http://caratulas.cinetube.es/img/cont/megavideo.png" alt="Megavideo" /></p> ''' patronvideos = '<div class="tit_opts"><a href="([^"]+)"[^>]*>[^<]+' patronvideos += '<p>(.*?)</p>[^<]+' patronvideos += '<p><span>(.*?)</span>' ''' patronvideos = '<div class="tit_opts"><a href="([^"]+)".*?>[^<]+' patronvideos += '<p>([^<]+)</p>[^<]+' patronvideos += '<p><span>(.*?)</span>' ''' matches = re.compile(patronvideos,re.DOTALL).findall(data) itemlist = [] for match in matches: logger.info("Encontrado iframe mirrors "+match[0]) # Lee el iframe mirror = urlparse.urljoin(url,match[0].replace(" ","%20")) req = urllib2.Request(mirror) req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3') response = urllib2.urlopen(req) data=response.read() response.close() listavideos = servertools.findvideos(data) for video in listavideos: scrapedtitle = title.strip() + " " + match[1] + " " + match[2] + " " + video[0] scrapedtitle = scrapertools.htmlclean(scrapedtitle) scrapedurl = video[1] server = video[2] itemlist.append( Item(channel=CHANNELNAME, action="play" , title=scrapedtitle , url=scrapedurl, thumbnail=item.thumbnail, plot=item.plot, server=server, folder=False)) return itemlist
def generico(params,url,category): xbmc.output("[rtveprogramas.py] generico") # El parametro allowblanks permite que haya vídeos sin título allowblanktitles = False if category=="allowblanktitles": allowblanktitles = True category = CHANNELNAME # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- data = scrapertools.cachePage(url) #xbmc.output(data) # -------------------------------------------------------- # Extrae las categorias (carpetas) # -------------------------------------------------------- patron = '<div class="news[^"]+">(.*?</div>)' bloques = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(bloques) for bloque in bloques: ''' ############################################################################################################## <span class="imgL"><a href="/mediateca/videos/20100225/aguila-roja-cap21/705225.shtml" title=""><img src="/imagenes/jpg/1267703487420.jpg" alt="" title=""/></a></span> <h3 class="M "> <a href="/mediateca/videos/20100225/aguila-roja-cap21/705225.shtml" title="Capítulo 21">Capítulo 21</a> </h3> <div class="chapeaux">Emitido el 25/02/10</div> ############################################################################################################## <span class="imgL"><a href="/mediateca/videos/20100218/aguila-roja-cap20/698541.shtml" title="Capítulo 20"><img src="/imagenes/capitulo-20/1267703445964.jpg" alt="Capítulo 20" title="Capítulo 20"/></a></span> <h3 class="M "> <a href="/mediateca/videos/20100218/aguila-roja-cap20/698541.shtml" title="Capítulo 20">Capítulo 20</a> </h3> <div class="chapeaux">Emitido el 18/02/10</div> ############################################################################################################## ''' scrapedtitle = "" scrapedurl = "" scrapedthumbnail = "" scrapedplot = "" # Enlace a la página y título patron = '<a href="([^"]+)"[^>]+>([^<]+)<' matches = re.compile(patron,re.DOTALL).findall(bloque) if DEBUG: scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(url, matches[0][0]) scrapedtitle = scrapertools.entityunescape(matches[0][1]) # Si no tiene titulo busca el primer enlace que haya if scrapedurl=="": # Enlace a la página y título patron = '<a href="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(bloque) if DEBUG: scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(url, matches[0]) # Thumbnail patron = '<img src="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(bloque) if DEBUG: scrapertools.printMatches(matches) if len(matches)>0: scrapedthumbnail = urlparse.urljoin(url, matches[0]) # Argumento patron = '<div class="chapeaux">(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(bloque) if DEBUG: scrapertools.printMatches(matches) if len(matches)>0: scrapedplot = scrapertools.htmlclean(matches[0]) if (DEBUG): xbmc.output("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") if allowblanktitles: titulos = scrapedurl.split("/") scrapedtitle = titulos[ len(titulos)-2 ] # Añade al listado de XBMC if scrapedtitle<>"" and scrapedurl<>"": xbmctools.addnewvideo( "rtve" , "play" , category , "Directo" , scrapedtitle , scrapedurl , scrapedthumbnail , scrapedplot ) # Cierra el directorio xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def listmirrors(params,url,category): logger.info("[gratisdocumentales.py] detail") title = urllib.unquote_plus( params.get("title") ) thumbnail = urllib.unquote_plus( params.get("thumbnail") ) #plot = unicode( xbmc.getInfoLabel( "ListItem.Plot" ), "utf-8" ) plot = urllib.unquote_plus( params.get("plot") ) # ------------------------------------------------------------------------------------ # Descarga la página # ------------------------------------------------------------------------------------ data = scrapertools.cachePage(url) #logger.info(data) # ------------------------------------------------------------------------------------ # Busca el argumento # ------------------------------------------------------------------------------------ patronvideos = '<div class="ficha_des">(.*?)</div>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if len(matches)>0: plot = scrapertools.htmlclean(matches[0]) # ------------------------------------------------------------------------------------ # Busca el thumbnail # ------------------------------------------------------------------------------------ patronvideos = '<div class="ficha_img pelicula_img">[^<]+' patronvideos += '<img src="([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) if len(matches)>0: thumbnail = matches[0] # ------------------------------------------------------------------------------------ # Busca los enlaces a los mirrors, o a los capítulos de las series... # ------------------------------------------------------------------------------------ # url = "http://www.gratisdocumentales.es/inc/mostrar_contenido.php?sec=pelis_ficha&zona=online&id=video-4637" patronvideos = '<div class="ver_des_peli iframe2">[^<]+' patronvideos += '<ul class="tabs-nav" id="([^"]+)">' matches = re.compile(patronvideos,re.DOTALL).findall(data) ''' <div id="ficha_ver_peli"> <div class="v_online"> <h2>Ver online <span>El destino de Nunik</span></h2> <div class="opstions_pelicula_list"> <div class="tit_opts" style="cursor:pointer;" onclick="location.href='http://www.gratisdocumentales.es/peliculas/drama/el-destino-de-nunik_espanol-dvd-rip-megavideo-6026.html'"> <p>Mirror 1: Megavideo</p> <p><span>CALIDAD: DVD-RIP | IDIOMA: ESPAÑOL</span></p> <p class="v_ico"><img src="http://caratulas.gratisdocumentales.es/img/cont/megavideo.png" alt="Megavideo" /></p> </div> <div class="tit_opts" style="cursor:pointer;" onclick="location.href='http://www.gratisdocumentales.es/peliculas/drama/el-destino-de-nunik_espanol-dvd-rip-megavideo-6027.html'"> <p>Mirror 2: Megavideo</p> <p><span>CALIDAD: DVD-RIP | IDIOMA: ESPAÑOL</span></p> <p class="v_ico"><img src="http://caratulas.gratisdocumentales.es/img/cont/megavideo.png" alt="Megavideo" /></p> </div> </div> </div> </div> ''' data = scrapertools.cachePage("http://www.gratisdocumentales.es/inc/mostrar_contenido.php?sec=pelis_ficha&zona=online&id="+matches[0]) patronvideos = '<div class="tit_opts" style="cursor:pointer;" onclick="location.href=\'([^\']+)\'">[^<]+' patronvideos += '<p>([^<]+)</p>[^<]+' patronvideos += '<p><span>([^<]+)</span>' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: logger.info("Encontrado iframe mirrors "+match[0]) # Lee el iframe mirror = urlparse.urljoin(url,match[0].replace(" ","%20")) req = urllib2.Request(mirror) req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3') response = urllib2.urlopen(req) data=response.read() response.close() listavideos = servertools.findvideos(data) for video in listavideos: videotitle = video[0] scrapedurl = video[1] server = video[2] xbmctools.addnewvideo( CHANNELNAME , "play" , category , server , title.strip()+" "+match[1]+" "+match[2]+" "+videotitle , scrapedurl , thumbnail , plot ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )
def findvideos(data): logger.info("[servertools.py] findvideos") encontrados = set() devuelve = [] #Megavideo con partes para cinetube logger.info ("0) Megavideo con partes para cinetube") patronvideos = 'id.+?http://www.megavideo.com..v.(.+?)".+?(parte\d+)' #id="http://www.megavideo.com/?v=CN7DWZ8S"><a href="#parte1">Parte 1 de 2</a></li> matches = re.compile(patronvideos).findall(data) for match in matches: titulo = "[Megavideo " + match[1] + "]" url = match[0] if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos con título logger.info("1) Megavideo con titulo...") patronvideos = '<div align="center">([^<]+)<.*?<param name="movie" value="http://www.megavideo.com/v/([A-Z0-9a-z]{8})[^"]+"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = match[0].strip() if titulo == "": titulo = "[Megavideo]" url = match[1] if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos con título logger.info("1b) Megavideo con titulo...") patronvideos = '<a href\="http\:\/\/www.megavideo.com/\?v\=([A-Z0-9a-z]{8})".*?>([^<]+)</a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = match[1].strip() if titulo == "": titulo = "[Megavideo]" url = match[0] if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("1c) Megavideo sin titulo...") #http://www.megavideo.com/?v=OYGXMZBM patronvideos = 'http\:\/\/www.megavideo.com/\?v\=([A-Z0-9a-z]{8})"' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: titulo = "" if titulo == "": titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("1d) Megavideo sin titulo...") #http://www.megavideo.com/?v=OYGXMZBM patronvideos = 'http\:\/\/www.megavideo.com/\?v\=([A-Z0-9a-z]{8})' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: titulo = "" if titulo == "": titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megaupload con título logger.info("1k1) Megaupload...") patronvideos = '<a.*?href="http://www.megaupload.com/\?d=([A-Z0-9a-z]{8})".*?>(.*?)</a>' matches = re.compile(patronvideos).findall(data) for match in matches: titulo = scrapertools.htmlclean(match[1].strip())+" - [Megaupload]" url = match[0] if url not in encontrados: logger.info(" titulo="+titulo) logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megaupload' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) #2/12/2010 Megaupload logger.info("1k) Megaupload...") patronvideos = 'http\://www.megaupload.com/(?:es/)?\?.*?d\=([A-Z0-9a-z]{8})(?:[^>]*>([^<]+)</a>)?' matches = re.compile(patronvideos).findall(data) for match in matches: if match[1]<>"": titulo = match[1].strip()+" - [Megaupload]" else: titulo = "[Megaupload]" url = match[0] if url not in encontrados: logger.info(" titulo="+titulo) logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megaupload' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos sin título logger.info("2) Megavideo sin titulo...") patronvideos = '<param name="movie" value="http://wwwstatic.megavideo.com/mv_player.swf\?v=([^"]+)">' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Vreel - Vídeos con título logger.info( "3) Vreel con título...") patronvideos = '<div align="center"><b>([^<]+)</b>.*?<a href\="(http://beta.vreel.net[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = match[0].strip() if titulo == "": titulo = "[Vreel]" url = match[1] if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Vreel' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Vreel - Vídeos con título logger.info("4) Vreel con titulo...") patronvideos = '<div align="center">([^<]+)<.*?<a href\="(http://beta.vreel.net[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = match[0].strip() if titulo == "": titulo = "[Vreel]" url = match[1] if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Vreel' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # STAGEVU logger.info("7) Stagevu sin título...") patronvideos = '"(http://stagevu.com[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Stagevu]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Stagevu' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # TU.TV logger.info("8) Tu.tv sin título...") patronvideos = '<param name="movie" value="(http://tu.tv[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[tu.tv]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'tu.tv' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # TU.TV logger.info("9) Tu.tv sin título...") #<param name="movie" value="http://www.tu.tv/tutvweb.swf?kpt=aHR0cDovL3d3dy50dS50di92aWRlb3Njb2RpL24vYS9uYXppcy11bi1hdmlzby1kZS1sYS1oaXN0b3JpYS0xLTYtbGEtbC5mbHY=&xtp=669149_VIDEO" patronvideos = '<param name="movie" value="(http://www.tu.tv[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[tu.tv]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'tu.tv' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("9b) Tu.tv sin título...") #<embed src="http://tu.tv/tutvweb.swf?kpt=aHR0cDovL3d3dy50dS50di92aW patronvideos = '<embed src="(http://tu.tv/[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[tu.tv]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'tu.tv' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos sin título logger.info("10 ) Megavideo sin titulo...") patronvideos = '"http://www.megavideo.com/v/([A-Z0-9a-z]{8})[^"]+"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos sin título logger.info("11) Megavideo sin titulo...") patronvideos = '"http://www.megavideo.com/v/([A-Z0-9a-z]{8})[^"]+"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # STAGEVU ''' logger.info("12) Stagevu...") patronvideos = '(http://stagevu.com[^<]+)<' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "Ver el vídeo en Stagevu" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Stagevu' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) ''' # Vreel - Vídeos sin título logger.info("13) Vreel sin titulo...") patronvideos = '(http://beta.vreel.net[^<]+)<' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Vreel]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Vreel' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos con título logger.info("14) Megavideo con titulo...") patronvideos = '<a href="http://www.megavideo.com/\?v\=([^"]+)".*?>(.*?)</a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = match[1].strip() if titulo == "": titulo = "[Megavideo]" url = match[0] if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) # Megavideo - Vídeos con título logger.info("14b) Megavideo con titulo...") patronvideos = '<param name="movie" value=".*?v\=([A-Z0-9]{8})" />' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Stagevu...") patronvideos = '"http://stagevu.com.*?uid\=([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Stagevu]" url = "http://stagevu.com/video/"+match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Stagevu' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Stagevu...") patronvideos = "'http://stagevu.com.*?uid\=([^']+)'" matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Stagevu]" url = "http://stagevu.com/video/"+match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Stagevu' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Megavideo... formato d=XXXXXXX") patronvideos = 'http://www.megavideo.com/.*?\&d\=([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Megavideo... formato watchanimeon") patronvideos = 'src="http://wwwstatic.megavideo.com/mv_player.swf.*?\&v\=([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megavideo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Megaupload... formato megavideo con d=XXXXXXX") patronvideos = 'http://www.megavideo.com/\?d\=([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Megavideo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Megaupload' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Movshare...") patronvideos = '"(http://www.movshare.net/video/[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Movshare]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'movshare' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Movshare...") patronvideos = "'(http://www.movshare.net/embed/[^']+)'" matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Movshare]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'movshare' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Veoh...") patronvideos = '"http://www.veoh.com/.*?permalinkId=([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Veoh]" if match.count("&")>0: primera = match.find("&") url = match[:primera] else: url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'veoh' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Directo - myspace") patronvideos = 'flashvars="file=(http://[^\.]+.myspacecdn[^\&]+)&' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Directo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Directo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Directo - myspace") patronvideos = '(http://[^\.]+\.myspacecdn.*?\.flv)' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Directo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Directo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Directo - ning") patronvideos = '(http://api.ning.com.*?\.flv)' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Directo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Directo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) Videoweed...") patronvideos = '(http://www.videoweed.com/file/*?\.flv)' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Videoweed]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'videoweed' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) YouTube...") patronvideos = '"http://www.youtube.com/v/([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[YouTube]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'youtube' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) #http://video.ak.facebook.com/cfs-ak-ash2/33066/239/133241463372257_27745.mp4 logger.info("0) Facebook...") patronvideos = '(http://video.ak.facebook.com/.*?\.mp4)' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Facebook]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'facebook' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) #http://www.4shared.com/embed/392975628/ff297d3f logger.info("0) 4shared...") patronvideos = '"(http://www.4shared.com.*?)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[4shared]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , '4shared' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) #http://www.4shared.com/embed/392975628/ff297d3f logger.info("0) 4shared...") patronvideos = "'(http://www.4shared.com.*?)'" matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[4shared]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , '4shared' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) #file=http://es.video.netlogstatic.com//v/oo/004/398/4398830.flv& #http://es.video.netlogstatic.com//v/oo/004/398/4398830.flv logger.info("0) netlogicstat...") patronvideos = "file\=(http\:\/\/es.video.netlogstatic[^\&]+)\&" matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[Directo]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'Directo' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("videobb...") patronvideos = "(http\:\/\/videobb.com\/video\/[a-zA-Z0-9]+)" matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[videobb]" url = match if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'videobb' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) logger.info("0) megaporn...") patronvideos = 'http\:\/\/www.mega(?:(?:p**n)|(?:rotic)?)?\.com/(?:(?:e/)?|\?(?:s=.+?&(?:amp;)?)?(?:(?:v\=)|)?)?([A-Z0-9]{8})' matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: titulo = "[megaporn]" url = match print url if url not in encontrados: logger.info(" url="+url) devuelve.append( [ titulo , url , 'megaporn' ] ) encontrados.add(url) else: logger.info(" url duplicada="+url) return devuelve