def listserie(item): logger.info("[cineblog01.py] mainlist") itemlist = [] # Descarga la p�gina data = scrapertools.cache_page(item.url) if DEBUG: logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<div id="covershot"><a[^<]+<p[^<]+<img.*?src="([^"]+)".*?' patronvideos += '<div id="post-title"><a href="([^"]+)"><h3>([^<]+)</h3></a></div>[^<]+' patronvideos += '<div id="description"><p>(.*?)</p>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: scrapedtitle = scrapertools.unescape(match[2]) scrapedurl = urlparse.urljoin(item.url,match[1]) scrapedthumbnail = urlparse.urljoin(item.url,match[0]) scrapedplot = scrapertools.unescape(match[3]) if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # A�ade al listado de XBMC itemlist.append( Item(channel=item.channel, action="findvideos" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) # Put the next page mark try: next_page = scrapertools.get_match(data,"<link rel='next' href='([^']+)'") itemlist.append( Item(channel=item.channel, action="listserie" , title=">> Next page" , url=next_page, thumbnail=scrapedthumbnail, plot=scrapedplot)) except: pass return itemlist
def listvideos(params,url,category): logger.info("[discoverymx.py] listvideos") scrapedthumbnail = "" scrapedplot = "" # Descarga la página data = scrapertools.cachePage(url) #logger.info(data) # Extrae las entradas (carpetas) patronvideos = "<h3 class='post-title entry-title'>[^<]+" patronvideos += "<a href='([^']+)'>([^<]+)</a>.*?" patronvideos += "<div class='post-body entry-content'>(.*?)<div class='post-footer'>" matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[1] scrapedtitle = re.sub("<[^>]+>"," ",scrapedtitle) scrapedtitle = scrapertools.unescape(scrapedtitle) scrapedurl = match[0] regexp = re.compile(r'src="(http[^"]+)"') matchthumb = regexp.search(match[2]) if matchthumb is not None: scrapedthumbnail = matchthumb.group(1) matchplot = re.compile('<div align="center">(<img.*?)</span></div>',re.DOTALL).findall(match[2]) if len(matchplot)>0: scrapedplot = matchplot[0] #print matchplot else: scrapedplot = "" scrapedplot = re.sub("<[^>]+>"," ",scrapedplot) scrapedplot = scrapertools.unescape(scrapedplot) #scrapedplot = scrapedplot.replace("…","") if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELNAME , "detail" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) # Extrae la marca de siguiente página patronvideos = "<a class='blog-pager-older-link' href='([^']+)'" matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedtitle = "Página siguiente" scrapedurl = urlparse.urljoin(url,matches[0]) scrapedthumbnail = "" scrapedplot = "" xbmctools.addnewfolder( CHANNELNAME , "listvideos" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) # Propiedades xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )
def peliculasrobalo(item): logger.info("[cineblog01.py] mainlist") itemlist = [] if item.url == "": item.url = sito # Descarga la página data = scrapertools.cache_page(item.url) logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<div class="span4".*?<a.*?<p><img src="([^"]+)".*?' patronvideos += '<div class="span8">.*?<a href="([^"]+)"> <h1>([^"]+)</h1></a>.*?' patronvideos += '<p><strong>.*?</strong>.*?<br />([^"]+)<a href' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedtitle = scrapertools.unescape(match.group(3)) scrapedurl = urlparse.urljoin(item.url, match.group(2)) scrapedthumbnail = urlparse.urljoin(item.url, match.group(1)) scrapedthumbnail = scrapedthumbnail.replace(" ", "%20") scrapedplot = scrapertools.unescape(match.group(4)) scrapedplot = scrapertools.htmlclean(scrapedplot).strip() if DEBUG: logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="findvid", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, viewmode="movie_with_plot", fanart=scrapedthumbnail)) # Next page mark try: bloque = scrapertools.get_match(data, "<div id='wp_page_numbers'>(.*?)</div>") patronvideos = '<a href="([^"]+)">></a></li>' matches = re.compile(patronvideos, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = "[COLOR orange]Successivo>>[/COLOR]" scrapedurl = matches[0] scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="peliculasrobalo", title=scrapedtitle, url=scrapedurl, thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png", plot=scrapedplot)) except: pass return itemlist
def novita(item): logger.info("[cb01anime.py] mainlist") itemlist = [] # Descarga la página data = scrapertools.anti_cloudflare(item.url, headers) # Extrae las entradas (carpetas) patronvideos = '<div class="span4"> <a.*?<img src="(.*?)".*?' patronvideos += '<div class="span8">.*?<a href="(.*?)">.*?' patronvideos += '<h1>(.*?)</h1></a>.*?<br />(.*?)<br>.*?' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedthumbnail = match.group(1) scrapedurl = match.group(2) scrapedtitle = scrapertools.unescape(match.group(3)) scrapedplot = scrapertools.unescape(match.group(4)) scrapedplot = scrapertools.decodeHtmlentities(scrapedplot) if scrapedplot.startswith(""): scrapedplot = scrapedplot[64:] if DEBUG: logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") ## ------------------------------------------------ scrapedthumbnail = httptools.get_url_headers(scrapedthumbnail) ## ------------------------------------------------ # Añade al listado de XBMC itemlist.append( Item(channel=__channel__, action="listacompleta" if scrapedtitle == "Lista Alfabetica Completa Anime/Cartoon" else "episodios", fulltitle=scrapedtitle, show=scrapedtitle, title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, viewmode="movie_with_plot", plot=scrapedplot)) # Put the next page mark try: next_page = scrapertools.get_match(data, "<link rel='next' href='([^']+)'") itemlist.append( Item(channel=__channel__, action="HomePage", title="[COLOR yellow]Torna Home[/COLOR]", folder=True)), itemlist.append( Item(channel=__channel__, action="novita", title="[COLOR orange]Successivo>>[/COLOR]", url=next_page, thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png")) except: pass return itemlist
def mainlist(item): logger.info("[simpsonita.py] mainlist") itemlist = [] # Descarga la página data = scrapertools.cachePage(sito) logger.info(data) itemlist.append( Item(channel=__channel__, action="mainlist", title="[COLOR green]Ricarica...[/COLOR]")) patronvideos = '<div class="random-article random-k2-article ">\s*<div class="title">\s*<h4>\s*<a href="([^"]+)">([^<]+)<\/a>' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedtitle = scrapertools.unescape(match.group(2)) scrapedurl = sito + match.group(1) if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "]") itemlist.append( Item(channel=__channel__, action="play", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR green]Puntata Random - " + scrapedtitle + "[/COLOR]", url=scrapedurl)) # Extrae las entradas (carpetas) patronvideos = '<li><a href="([^"]+)"><span class="catTitle">([^<]+)<\/span>' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedtitle = scrapertools.unescape(match.group(2)) scrapedurl = sito + match.group(1) if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "]") # Añade al listado de XBMC itemlist.append( Item(channel=__channel__, action="listepisodes", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl)) return itemlist
def listvideos(item): logger.info("[discoverymx.py] listvideos") itemlist=[] scrapedthumbnail = "" scrapedplot = "" # Descarga la página data = scrapertools.cache_page(item.url) patronvideos = "<h3 class='post-title entry-title'[^<]+" patronvideos += "<a href='([^']+)'>([^<]+)</a>.*?" patronvideos += "<div class='post-body entry-content'(.*?)<div class='post-footer'>" matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[1] scrapedtitle = re.sub("<[^>]+>"," ",scrapedtitle) scrapedtitle = scrapertools.unescape(scrapedtitle) scrapedurl = match[0] regexp = re.compile(r'src="(http[^"]+)"') matchthumb = regexp.search(match[2]) if matchthumb is not None: scrapedthumbnail = matchthumb.group(1) matchplot = re.compile('<div align="center">(<img.*?)</span></div>',re.DOTALL).findall(match[2]) if len(matchplot)>0: scrapedplot = matchplot[0] #print matchplot else: scrapedplot = "" scrapedplot = re.sub("<[^>]+>"," ",scrapedplot) scrapedplot = scrapertools.unescape(scrapedplot) if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC #xbmctools.addnewfolder( __channel__ , "findevi" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) itemlist.append( Item(channel=__channel__, action="findvideos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) # Extrae la marca de siguiente página patronvideos = "<a class='blog-pager-older-link' href='([^']+)'" matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedtitle = "Página siguiente" scrapedurl = urlparse.urljoin(item.url,matches[0]) scrapedthumbnail = "" scrapedplot = "" itemlist.append( Item(channel=__channel__, action="listvideos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) return itemlist
def programas(item): logger.info("[clantv.py] programas") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) # Extrae los programas patron = '<div class="informacion-serie">[^<]+' patron += '<h3>[^<]+' patron += '<a href="([^"]+)">([^<]+)</a>[^<]+' patron += '</h3>[^<]+' patron += '<a[^>]+>[^<]+</a><img.*?src="([^"]+)"><div>(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches) == 0: patron = '<div class="informacion-serie"><h3><a href="([^"]+)">([^<]+)</a></h3><a[^>]+>[^<]+</a><img.*?src="([^"]+)"><div>(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[1] scrapedtitle = scrapertools.unescape(scrapedtitle) scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedurl = urlparse.urljoin(scrapedurl,"videos") scrapedthumbnail = urlparse.urljoin(item.url,match[2]) scrapedplot = match[3] scrapedplot = scrapertools.unescape(scrapedplot).strip() scrapedplot = scrapertools.htmlclean(scrapedplot).strip() scrapedpage = urlparse.urljoin(item.url,match[0]) if (DEBUG): logger.info("scraped title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"] plot=["+scrapedplot+"]") #logger.info(scrapedplot) # Añade al listado itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="episodios" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , page=scrapedpage, show=scrapedtitle , folder=True) ) # Añade el resto de páginas patron = '<li class="siguiente">[^<]+<a rel="next" title="Ir a la página siguiente" href="([^"]+)">Siguiente' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) if len(matches)>0: match = matches[0] newitem = Item(channel=CHANNELNAME,url=urlparse.urljoin(item.url,match)) itemlist.extend(programas(newitem)) return itemlist
def listserie(item): logger.info("[cineblog01.py] mainlist") itemlist = [] # Descarga la página data = anti_cloudflare(item.url) logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<div class="span4">\s*<a href="([^"]+)"><img src="([^"]+)".*?<div class="span8">.*?<h1>([^<]+)</h1></a>(.*?)<br><a' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedtitle = scrapertools.unescape(match.group(3)) scrapedurl = match.group(1) scrapedthumbnail = match.group(2) scrapedplot = scrapertools.unescape(match.group(4)) scrapedplot = scrapertools.htmlclean(scrapedplot).strip() if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC itemlist.append( Item(channel=__channel__, action="episodios", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) # Put the next page mark try: next_page = scrapertools.get_match(data, "<link rel='next' href='([^']+)'") itemlist.append( Item(channel=__channel__, action="HomePage", title="[COLOR yellow]Torna Home[/COLOR]", folder=True)), itemlist.append( Item(channel=__channel__, action="listserie", title="[COLOR orange]Successivo>>[/COLOR]", url=next_page, thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png", )) except: pass return itemlist
def peliculas(item): logger.info("[cineblog01.py] mainlist") itemlist = [] if item.url =="": item.url = sito # Descarga la página data = scrapertools.cache_page(item.url) logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<div id="covershot".*?<a.*?<img src="(.*?)".*?' patronvideos += '<div id="post-title"><a href="(.*?)".*?' patronvideos += '<h3>(.*?)</h3>(.*?)</p>' #patronvideos += '<div id="description"><p>(.?*)</div>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: scrapedtitle = scrapertools.unescape(match[2]) scrapedurl = urlparse.urljoin(item.url,match[1]) scrapedthumbnail = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = scrapedthumbnail.replace(" ", "%20"); scrapedplot = scrapertools.unescape(match[3]) scrapedplot = scrapertools.htmlclean(scrapedplot).strip() if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, viewmode="movie_with_plot", fanart=scrapedthumbnail)) # Next page mark try: bloque = scrapertools.get_match(data,"<div id='wp_page_numbers'>(.*?)</div>") # <a href="http://cineblog01.com/page/2/">Avanti # <a href="http://www.cineblog01.com/category/streaming/vk/animazione-vk/page/2/">Avanti > </a></li> patronvideos = '<a href="([^"]+)">Avanti' matches = re.compile (patronvideos, re.DOTALL).findall (data) scrapertools.printMatches (matches) if len(matches)>0: scrapedtitle = ">> Avanti" scrapedurl = matches[0] scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="peliculas" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) except: pass return itemlist
def listserie(item): logger.info("[cineblog01.py] mainlist") itemlist = [] # Descarga la página data = scrapertools.anti_cloudflare(item.url, headers) # Extrae las entradas (carpetas) patronvideos = '<div class="span4">\s*<a href="([^"]+)"><img src="([^"]+)".*?<div class="span8">.*?<h1>([^<]+)</h1></a>(.*?)<br><a' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedtitle = scrapertools.unescape(match.group(3)) scrapedurl = match.group(1) scrapedthumbnail = match.group(2) scrapedplot = scrapertools.unescape(match.group(4)) scrapedplot = scrapertools.htmlclean(scrapedplot).strip() if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append(infoSod( Item(channel=__channel__, action="episodios", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, extra=item.extra, plot=scrapedplot), tipo='tv')) # Put the next page mark try: next_page = scrapertools.get_match(data, "<link rel='next' href='([^']+)'") itemlist.append( Item(channel=__channel__, action="HomePage", title="[COLOR yellow]Torna Home[/COLOR]", folder=True)), itemlist.append( Item(channel=__channel__, action="listserie", title="[COLOR orange]Successivo>>[/COLOR]", url=next_page, extra=item.extra, thumbnail="https://raw.githubusercontent.com/orione7/Pelis_images/master/vari/successivo_P.png", )) except: pass return itemlist
def trailer(item): logger.info("pelisalacarta.bricocine trailer") itemlist = [] data = get_page( item.url ) #trailer patron = "<iframe width='570' height='400' src='//([^']+)" # Busca los enlaces a los videos listavideos = servertools.findvideos(data) if len(listavideos)==0 : itemlist.append( Item(channel=__channel__, title="[COLOR gold][B]Esta pelicula no tiene trailer,lo sentimos...[/B][/COLOR]", thumbnail ="http://s6.postimg.org/fay99h9ox/briconoisethumb.png", fanart ="http://s6.postimg.org/uie8tu1jl/briconoisefan.jpg",folder=False) ) for video in listavideos: videotitle = scrapertools.unescape(video[0]) url = video[1] server = video[2] #xbmctools.addnewvideo( __channel__ , "play" , category , server , , url , thumbnail , plot ) title= "[COLOR crimson]Trailer - [/COLOR]" itemlist.append( Item(channel=__channel__, action="play", server=server, title=title + videotitle , url=url , thumbnail=item.extra , plot=item.plot , fulltitle = item.title , fanart="http://s23.postimg.org/84vkeq863/movietrailers.jpg", folder=False) ) return itemlist
def Videosnuevoslist(params,url,category): logger.info("[sonolatino.py] VideoesNuevos") # Descarga la p�gina data = scrapertools.cachePage(url) #logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<tr><td.*?<a href="([^"]+)">' patronvideos += '<img src="([^"]+)" ' patronvideos += 'alt="[^"]+".*?' patronvideos += 'width="250">([^<]+)<' patronvideos += 'td class.*?<a href="[^>]+>([^<]+)</a></td><td class.*?>([^<]+)</td></tr>' matches = re.compile(patronvideos,re.DOTALL).findall(data) #logger.info("matches = "+str(matches)) if DEBUG: scrapertools.printMatches(matches) for match in matches: # Titulo # Titulo scrapedtitle = match[2] + " - " + match[3]+" - " + match[4].replace('í','i') print scrapedtitle scrapedtitle = scrapertools.unescape(scrapedtitle) # URL scrapedurl = match[0] # Thumbnail scrapedthumbnail = match[1] imagen = "" # procesa el resto scrapedplot = match[3] tipo = match[3] # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) # A�ade al listado de XBMC #xbmctools.addthumbnailfolder( __channel__ , scrapedtitle, scrapedurl , scrapedthumbnail, "detail" ) xbmctools.addnewfolder( __channel__ , "detail" , category , scrapedtitle ,scrapedurl , scrapedthumbnail , scrapedplot ) # Busca enlaces de paginas siguientes... cat = "nuevo" patronvideo = patronvideos paginasiguientes(patronvideo,data,category,cat) # Label (top-right)... xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) # Disable sorting... xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )
def pagina_(item): logger.info("pelisalacarta.channels.daramatv letras" + item.url) itemlist = [] headers = DEFAULT_HEADERS[:] data = scrapertools.cache_page(item.url,headers=headers) data1 = scrapertools.get_match(data, '<div class="animes-bot">(.+?)<!-- fin -->') data1 = data1.replace('\n','') data1 = data1.replace('\r','') patron = 'href="(\/drama.+?)".+?<\/div>(.+?)<\/div>.+?src="(.+?)".+?titulo">(.+?)<' matches = re.compile(patron, re.DOTALL).findall(data1) for scrapedurl, scrapedplot, scrapedthumbnail, scrapedtitle in matches: title = scrapertools.unescape(scrapedtitle).strip() url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(host, scrapedthumbnail) plot = scrapertools.decodeHtmlentities(scrapedplot) itemlist.append( Item(channel=__channel__, action="episodios" , title=title , url=url, thumbnail=thumbnail, plot=plot, show=title)) patron = 'href="([^"]+)" class="next"' matches = re.compile(patron, re.DOTALL).findall(data) for match in matches: if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, match) scrapedtitle = "Pagina Siguiente >>" scrapedthumbnail = "" scrapedplot = "" itemlist.append(Item(channel=__channel__, action="pagina_", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) return itemlist
def topVideos(params,url,category): url2=url data = scrapertools.cachePage(url) patron = '<option value="([^"]+)" >([^<]+)</option>' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: opciones = [] urls = [] opciones.append("Todo el Tiempo") urls.append("http://www.sonolatino.com/topvideos.html") opciones.append("Ultimos 2 dias") urls.append("http://www.sonolatino.com/topvideos.html?do=recent") for match in matches: opciones.append(scrapertools.unescape(match[1])) urls.append(match[0]) # Abre el di�logo de selecci�n dia = xbmcgui.Dialog() seleccion = dia.select("Elige Listar Top por :", opciones) logger.info("seleccion=%d" % seleccion) if seleccion==-1: return url2 = urls[seleccion] toplist(params,url2,category)
def findvideos(item): logger.info("[pelisadicto.py] findvideos") itemlist = [] data = scrapertools.cache_page(item.url) data = scrapertools.unescape(data) titulo = item.title titulo_tmdb = re.sub("([0-9+])", "", titulo.strip()) oTmdb= Tmdb(texto_buscado=titulo_tmdb, idioma_busqueda="es") item.fanart=oTmdb.get_backdrop() # Descarga la pagina # data = scrapertools.cache_page(item.url) patron = '#div_\d_\D.+?<img id="([^"]+).*?<span>.*?</span>.*?<span>(.*?)</span>.*?imgdes.*?imgdes/([^\.]+).*?<a href=([^\s]+)' #Añado calidad matches = re.compile(patron,re.DOTALL).findall(data) for scrapedidioma, scrapedcalidad, scrapedserver, scrapedurl in matches: title = titulo + "_" + scrapedidioma + "_"+ scrapedserver + "_" + scrapedcalidad itemlist.append( Item(channel=__channel__, action="play", title=title, fulltitle=title, url=scrapedurl, thumbnail=item.thumbnail, plot=item.plot, show=item.show, fanart=item.fanart) ) return itemlist
def novedades_series(item): logger.info("[shurweb.py] novedades_series") data = scrapertools.cachePage(item.url) data = scrapertools.unescape(data) data = scrapertools.get_match( data, '<div class="tab-pane fade in active" id="series">(.*?)<div class="tab-pane fade" id="pelis">' ) patron = '<a class="video_thumb" href="([^"]+)" rel="bookmark" title="([^"]+)">.*?<img.*?src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for url, title, thumbnail in matches: if DEBUG: logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item( channel=__channel__, action="findvideos", title=title, url=url, thumbnail=thumbnail, fanart="http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg", ) ) return itemlist
def play(item): logger.info("pelisalacarta.bricocine findvideos") itemlist = servertools.find_video_items(data=item.url) data = scrapertools.cache_page(item.url) listavideos = servertools.findvideos(data) for video in listavideos: videotitle = scrapertools.unescape(video[0]) url = item.url server = video[2] # xbmctools.addnewvideo( __channel__ , "play" , category , server , , url , thumbnail , plot ) itemlist.append( Item( channel=__channel__, action="play", server=server, title="Trailer - " + videotitle, url=url, thumbnail=item.thumbnail, plot=item.plot, fulltitle=item.title, fanart="http://s23.postimg.org/84vkeq863/movietrailers.jpg", folder=False, ) ) return itemlist
def mainlist(item): logger.info("[southparkita.py] mainlist") itemlist = [] # Descarga la página data = scrapertools.cache_page(host) logger.info(data) itemlist.append( Item(channel=__channel__, action="mainlist", title="[COLOR green]Ricarica...[/COLOR]")) # Extrae las entradas (carpetas) patronvideos = '<li id="menu-item-\d{4}.*?\d{4}"><a href="([^"]+)">([^<]+)<\/a><\/li>' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedtitle = scrapertools.unescape(match.group(2)) scrapedurl = match.group(1) if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "]") # Añade al listado de XBMC itemlist.append( Item(channel=__channel__, action="listepisodes", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl)) return itemlist
def findvideos(item): logger.info("[cinevos.py] findvideos") # Descarga la página data = scrapertools.cachePage(item.url) logger.info(data) # Busca si hay subtitulo patronvideos = '<a href="(http://www.cinevos.com/sub/[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) sub = "" if len(matches): sub = matches[0] logger.info("con subtitulo :%s" %sub) # Busca la descripcion patronvideos = '<p>(<div.*?</div>) </p>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) plot = "" if len(matches): plot = re.sub("<[^>]+>","",matches[0]) # Busca los enlaces a los videos listavideos = servertools.findvideos(data) itemlist = [] for video in listavideos: videotitle = scrapertools.unescape(video[0]) #print videotitle url = video[1] server = video[2] if "Megaupload" in videotitle: videotitle = item.title + " - [Megaupload]" else: videotitle = item.title+ " - " +videotitle itemlist.append( Item(channel=CHANNELNAME, action="play", server=server, title=videotitle , url=url , thumbnail=item.thumbnail , plot=plot ,subtitle=sub, folder=False) ) return itemlist
def mainlist(item): logger.info("[griffinita.py] mainlist") itemlist = [] # Descarga la página data = scrapertools.cache_page(host) patronvideos = '<li><a href="([^"]+)"><span class="catTitle">([^<]+)<\/span>' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedtitle = scrapertools.unescape(match.group(2)) scrapedurl = host + match.group(1) if DEBUG: logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "]") # Añade al listado de XBMC itemlist.append( Item(channel=__channel__, action="listepisodes", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl)) return itemlist
def listepisodes(item): logger.info("[simpsonita.py] episodeslist") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<h3 class="catItemTitle">\s*<a href="([^"]+)">([^<]+)<\/a>' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedtitle = scrapertools.unescape(match.group(2)).strip() scrapedurl = sito + match.group(1) if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "]") # Añade al listado de XBMC itemlist.append( Item(channel=__channel__, action="play", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]"+ scrapedtitle + "[/COLOR]", url=scrapedurl)) return itemlist
def peliculas(item): logger.info("[pornhub.py] peliculas") itemlist = [] # Descarga la página data = scrapertools.downloadpage(item.url) data = scrapertools.unescape(data) data = scrapertools.find_single_match(data,'<ul class="nf-videos videos row-4-thumbs">(.*?)<div class="pre-footer">') # Extrae las peliculas patron = '<div class="phimage">.*?' patron += '<a href="/view_video.php\?viewkey=([^"]+)" title="([^"]+).*?' patron += '<var class="duration">([^<]+)</var>(.*?)</div>.*?' patron += 'data-smallthumb="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for viewkey,scrapedtitle,duration,scrapedhd,thumbnail in matches: title=scrapedtitle scrapedhd = scrapertools.find_single_match(scrapedhd,'<span class="hd-thumbnail">(.*?)</span>') url= 'http://es.pornhub.com/embed/' + urllib.quote(viewkey, safe="%/:&?") thumbnail = urllib.quote(thumbnail, safe="%/:&?") if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="play", title=title , url=url , duration=duration, quality=scrapedhd,fanart=__fanart__, thumbnail=thumbnail) ) # Paginador patron = '<li class="page_next"><a href="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: url=urlparse.urljoin("http://es.pornhub.com",matches[0]) itemlist.append( Item(channel=__channel__, action="peliculas", title="Página siguiente >" ,fanart=__fanart__, url=url)) return itemlist
def series(item): logger.info() data = scrapertools.cache_page(item.url) patron = '<div class="post" id="post"[^<]+<center><h1 class="post-title entry-title"[^<]+<a href="([^"]+)">' \ '(.*?)</a>[^<]+</h1></center>[^<]+<div[^<]+</div>[^<]+<div[^<]+<div.+?<img src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) itemlist = [] for scrapedurl, scrapedtitle, scrapedthumbnail in matches: title = scrapertools.unescape(scrapedtitle) fulltitle = title url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) show = title logger.debug("title=[{0}], url=[{1}], thumbnail=[{2}]".format(title, url, thumbnail)) itemlist.append(Item(channel=item.channel, action="episodios", title=title, url=url, thumbnail=thumbnail, show=show, fulltitle=fulltitle, fanart=thumbnail, folder=True)) patron = '</span><a class="page larger" href="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) for match in matches: if len(matches) > 0: scrapedurl = match scrapedtitle = ">> Pagina Siguiente" itemlist.append(Item(channel=item.channel, action="series", title=scrapedtitle, url=scrapedurl, folder=True, viewmode="movies_with_plot")) return itemlist
def generos(item): logger.info("[pornhub.py] generos") itemlist = [] # Descarga la página data = scrapertools.downloadpage(item.url) data = scrapertools.unescape(data) data = scrapertools.find_single_match(data,'<div id="categoriesStraightImages">(.*?)</ul>') # Extrae las categorias patron = '<li class="cat_pic" data-category="\d+">.*?' patron += '<a href="([^"]+)">' patron += '<img src="([^"]+)" ' patron += 'alt="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl,scrapedthumbnail,scrapedtitle in matches: title = scrapedtitle if "?" in scrapedurl: url = urlparse.urljoin(item.url,scrapedurl + "&o=cm") else: url = urlparse.urljoin(item.url,scrapedurl + "?o=cm") thumbnail = urllib.quote(scrapedthumbnail, safe="%/:&?") itemlist.append( Item(channel=__channel__, action="peliculas", title=title, url=url , fanart=__fanart__ , thumbnail=thumbnail) ) itemlist.sort(key=lambda x: x.title) return itemlist
def menuaz(item): logger.info("[shurweb.py] menuaz") data = scrapertools.cachePage(item.url) data = scrapertools.unescape(data) data = scrapertools.get_match(data, '<ul class="pagination pagination-lg">(.*?)</div>') patron = '<li><a href="(.*?)" rel="nofollow">(.*?)</a></li>' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for url, letra in matches: if "peliculas" in item.url or "documentales" in item.url: itemlist.append( Item( channel=__channel__, title=letra, action="peliculas", url=url, fanart="http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg", ) ) else: itemlist.append( Item( channel=__channel__, title=letra, action="series", url=url, fanart="http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg", ) ) return itemlist
def novedades_peliculas(item): logger.info("[shurweb.py] novedades_peliculas") data = scrapertools.cachePage(item.url) data = scrapertools.unescape(data) data = scrapertools.get_match( data, '<div class="tab-pane fade" id="pelis">(.*?)<div class="tab-pane fade" id="docus"' ) return peliculas(item, data=data)
def listanime(item): logger.info("[cineblog01.py] mainlist") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<div class="span4"> <a.*?<img.*?src="(.*?)".*?' patronvideos += '<div class="span8">.*?<a href="(.*?)">.*?' patronvideos += '<h1>(.*?)</h1></a>.*?<br />(.*?)<br>.*?' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedthumbnail = match.group(1) scrapedurl = match.group(2) scrapedtitle = scrapertools.unescape(match.group(3)) scrapedplot = scrapertools.unescape(match.group(4)) if scrapedplot.startswith(""): scrapedplot = scrapedplot[149:] if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC itemlist.append( Item(channel=__channel__, action="findvid_anime", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) # Put the next page mark try: next_page = scrapertools.get_match(data, "<link rel='next' href='([^']+)'") itemlist.append( Item(channel=__channel__, action="listanime", title="[COLOR orange]Successivo>>[/COLOR]", url=next_page, thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png")) except: pass return itemlist
def listepisodes(item): logger.info("[southparkita.py] episodeslist") logger.info(item.url) itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) cicla = True cnt = 2 while cicla: data = data + scrapertools.cache_page(item.url + 'page/' + str(cnt) + '/') logger.info(item.url + 'page/' + str(cnt) + '/') patronvideos = '<title>Pagina non trovata.*?<\/title>' matches = re.compile(patronvideos, re.DOTALL).finditer(data) cnt += 1 logger.info(str(cnt)) if matches: cicla = False logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<h1 class="entry-title noMarginTop"><a href="([^"]+)".*?>([^<]+)<\/a><\/h1>' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedtitle = scrapertools.unescape(match.group(2)).strip() scrapedurl = match.group(1) if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "]") # Añade al listado de XBMC itemlist.append( Item(channel=__channel__, action="findvideos", fulltitle=item.fulltitle, show=item.show, title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl)) if config.get_library_support() and len(itemlist) != 0: itemlist.append( Item(channel=__channel__, title=item.title, url=item.url, action="add_serie_to_library", extra="listepisodes", show=item.show)) itemlist.append( Item(channel=item.channel, title="Scarica tutti gli episodi della serie", url=item.url, action="download_all_episodes", extra="listepisodes", show=item.show)) return itemlist
def devuelveListaEpisodios (params,url,category): """Scrapea la página de episodios y la devuelve como lista de diccionarios UPDATE 25-02-2011: Los últimos camibos de la web tvshack hacen este procedimiento innecesariamente complejo. Rediseñado --> Los habituales en los canales de pelisalacarta. <-- [{Episodio}] Lista de diccionarios con los datos de cada episodio Al añadir los episodios como diccionarios nos permite añadir o borrar claves a voluntad dejando abierto el diseño a poder añadir información en los canales que ésta exista. Las clave básicas en el momento de escribir este canal son: 'title' : Título del episodio - Para la biblioteca peferiblemente en formato NOMBRE_SERIE - TEMPORADAxEPISODIO TÍTULO_EPISODIO LOQUESEA 'url' : URL del episodio 'plot' : Resumen del episodio (o de la serie si éste no existe para este canal) 'thumbnail' : Fotograma del episodio o carátula de la serie """ if params.has_key("Serie"): serie = params.get("Serie") else: serie = "" # Descarga la p�gina data = scrapertools.cachePage(url) temporada = '0' # Extraemos los episodios por medio de expresiones regulares (patrón) # Ej. Serie: <li class="listm"><a href="/tv/Family_Guy/season_1/episode_1/">ep1. Death Has a Shadow</a><a href=""><span>31/1/1999</span></a></li> # Ej. Anime: EN DESUSO <li><a href="http://tvshack.bz/anime/07_Ghost/season_1/episode_5/">ep5. Episode 5</a><a href=""><span>??/??/????</span></a></li> # Ej. Musica:EN DESUSO <li><a href="http://tvshack.bz/music/Michael_Jackson/C85E8225E45E/">Black Or White<span>2,301 views</span></a></li><li><a patronepisodios = '''(?x) # Activa opción VERBOSE. <li\ class="listm"><a\ href=" # Basura (?:http://tvshack\.bz)?([^"]+)" #\g1 = Path (relativo) del episodio/video [^>]*> # Basura ([0-9]+) #\g2 = Temporada x([0-9]+) #\g3 = Número de episodio \ ([^<]+) #\g4 = Nombre del episodio <\/a><\/li> # Basura ''' episodiosREO = re.compile(patronepisodios) ## Objeto de Expresión Regular (REO) listaEp = [] # Lista de Episodios Ep = {} # Diccionario con la info de cada episodio # UPDATE 25-2-2011: La nueva web tampoco tiene infor de serie ni fechas de emisión Ep['thumbnail']= "" Ep['plot']= "" for match in episodiosREO.finditer (data): if category != 'Musica': title = match.expand (serie + ' - \g<2>x\g<3> - \g<4>') #con expand los grupos referenciados empiezan en 1 else: title = match.expand ('\g<3> (visto \g<5> veces)') #con expand los grupos referenciaos empiezan en 1 #URL del episodio Ep['title'] = scrapertools.unescape(title) Ep['url'] = TVSHACK_URL + match.group(1) listaEp.append(Ep.copy()) #Se añade el episodio a la lista (hay que copiarlo) return listaEp
def listvideos(item): logger.info("[peliculasid.py] listvideos") # Descarga la página data = scrapertools.cachePage(item.url) logger.info(data) # Extrae las entradas (carpetas) patronvideos = 'id="filmga[^"]+" class="filmgal">(.*?<strong>Duraci[^<]+</strong>[^<]+</div>)' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist = [] for match in matches: # URL try: scrapedurl = re.compile(r'href="(.+?)"').findall(match)[0] except: continue # Titulo try: scrapedtitle = re.compile(r'<span class="titulotool">(.+?)</div>').findall(match.replace("\n",""))[0] scrapedtitle = re.sub("<[^>]+>","",scrapedtitle) try: scrapedtitle = scrapertools.unescape(scrapedtitle) except:pass except: scrapedtitle = "sin titulo" # Thumbnail try: scrapedthumbnail = re.compile(r'src="(.+?)"').findall(match)[0] except: scrapedthumbnail = "" # Argumento try: scrapedplot = re.compile(r'<div class="sinopsis">(.+?)</div>').findall(match)[0] scrapedplot = re.sub("<[^>]+>"," ",scrapedplot).strip() except: scrapedplot = "" # Añade al listado de XBMC itemlist.append( Item(channel=__channel__, action="play" , title=scrapedtitle.strip() , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, extra = scrapedplot , context= "4|5", viewmode="movie", folder=False )) # Extrae la marca de siguiente página patronvideos = "<span class='current'>[^<]+</span><a href='(.+?)' class='page larger'>" matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedtitle = "Página siguiente" scrapedurl = urlparse.urljoin(item.url,matches[0]) scrapedthumbnail = "" scrapedplot = "" itemlist.append( Item(channel=__channel__, action="listvideos" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot )) return itemlist
def peliculas(item): logger.info("streamondemand.hdstreamingit peliculas") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url, timeout=5) # Extrae las entradas (carpetas) patronvideos = '<a class="link_image" href="[^"]+" title="Permalink to (.*?)">.*?' patronvideos += '<img src="([^"]+)" alt="">.*?' patronvideos += '<div class="button_yellow"><a(?: target="_blank")? href="([^"]+)"' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedurl = urlparse.urljoin(item.url, match.group(3)) scrapedthumbnail = urlparse.urljoin(item.url, match.group(2)) scrapedtitle = scrapertools.unescape(match.group(1)) scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") if 'adfoc.us' in scrapedurl: scrapedurl = importio_url + scrapedurl if 'adf.ly' in scrapedurl: scrapedurl = dec_fly + scrapedurl itemlist.append( infoSod(Item( channel=__channel__, action="episodios" if item.extra == "serie" else "findvideos", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, extra=item.extra, folder=True), tipo='movie')) # Extrae el paginador patronvideos = "<link rel='next' href='([^']+)' />" matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item(channel=__channel__, action="HomePage", title="[COLOR yellow]Torna Home[/COLOR]", folder=True)), itemlist.append( Item( channel=__channel__, action="peliculas", extra=item.extra, title="[COLOR orange]Successivo >>[/COLOR]", url=scrapedurl, thumbnail= "http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png", folder=True)) return itemlist
def animestream(item): logger.info("[cineblog01.py] mainlist") itemlist = [] if item.url == "": item.url = sito # Descarga la página data = scrapertools.cache_page(item.url) logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<div class="span4"> <a.*?<p><img.*?src="(.*?)".*?' patronvideos += '<div class="span8">.*?<a href="(.*?)">.*?' patronvideos += '<h1>(.*?)</h1></a>.*?<br>-->(.*?)<br>.*?' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedtitle = scrapertools.unescape(match.group(3)) scrapedurl = urlparse.urljoin(item.url, match.group(2)) scrapedthumbnail = urlparse.urljoin(item.url, match.group(1)) scrapedthumbnail = scrapedthumbnail.replace(" ", "%20") scrapedplot = scrapertools.unescape(match.group(4)) scrapedplot = scrapertools.htmlclean(scrapedplot).strip() if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="findvid_anime", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, viewmode="movie_with_plot", fanart=scrapedthumbnail)) # Next page mark try: bloque = scrapertools.get_match(data, "<div id='wp_page_numbers'>(.*?)</div>") patronvideos = '<a href="([^"]+)">></a></li>' matches = re.compile(patronvideos, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = "[COLOR orange]Successivo>>[/COLOR]" scrapedurl = matches[0] scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="animestream", title=scrapedtitle, url=scrapedurl, thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png", plot=scrapedplot)) except: pass return itemlist
def peliculas_tmdb(item): logger.info("streamondemand-pureita streaminghd peliculas") itemlist = [] # Descarga la pagina data = httptools.downloadpage(item.url).data # Extrae las entradas (carpetas) patron = '<img src="([^"]+)" alt="([^<]+)">\s*<div class="mepo"><span class="quality">\s*([^<]+).*?' patron += '</div><div class="rating"><span class="icon-star2"></span>\s*(.*?)</div>\s*' patron += '<a href="([^"]+)">' matches = re.compile(patron, re.DOTALL).finditer(data) for match in matches: scrapedurl = urlparse.urljoin(item.url, match.group(5)) votes = scrapertools.unescape(match.group(4)) quality = scrapertools.unescape(match.group(3)) scrapedtitle = scrapertools.unescape(match.group(2)) scrapedthumbnail = urlparse.urljoin(item.url, match.group(1)) scrapedthumbnail = httptools.get_url_headers(scrapedthumbnail) if "SUB-ITA" in scrapedtitle or "sub-ita" in scrapedtitle: lang = " ([COLOR yellow]Sub-Ita[/COLOR])" else: lang = "" if votes: votes = " ([COLOR yellow]" + votes.strip() + "[/COLOR])" if quality: quality = " ([COLOR yellow]" + quality.lower().strip( ) + "[/COLOR])" scrapedtitle = scrapedtitle.replace("’", "'").replace( " & ", " ").replace("[SUN-ITA]", "").strip() itemlist.append( infoSod(Item(channel=__channel__, action="findvideos", contentType="movie", fulltitle=scrapedtitle, show=scrapedtitle, title=scrapedtitle + quality + votes, url=scrapedurl, thumbnail=scrapedthumbnail, plot="", folder=True), tipo='movie')) # Extrae el paginador patronvideos = '<a href="([^"]+)"><span class="icon-chevron-right">' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item( channel=__channel__, action="peliculas", title="[COLOR orange]Successivi >>[/COLOR]", url=scrapedurl, thumbnail= "https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png", folder=True)) return itemlist
def listatipoVideo(params,url,category): logger.info("[sonolatino.py] listatipodocumental") # busca tipo de listado por : FECHA | VISTAS | RATING # url = listarpor(params,url,category) if len(url)==0 : return # Descarga la p�gina data = scrapertools.cachePage(url) # Extrae las entradas (carpetas) if url == "http://www.sonolatino.com/index.html": patronvideos = '<li class="item">[^<]+<a href="([^"]+)"><img src="([^"]+)" alt="([^"]+)" class="imag".*?/></a>' cat = "viendose" else: patronvideos = '<li class="video">[^<]+<div class="video_i">[^<]+<a href="([^"]+)"' patronvideos += '>[^<]+<img src="([^"]+)" alt="([^"]+)".*?<span class="artist_name">([^<]+)</span>' cat = "tipo" matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) #logger.info("matches = "+matches[0]) scrapedplot = "" for match in matches: # Titulo scrapedtitle = scrapertools.unescape(match[2]) # URL scrapedurl = match[0] # Thumbnail scrapedthumbnail = match[1] # procesa el resto if cat == "tipo": scrapedplot = scrapertools.unescape(match[3]) else: for campo in re.findall("/(.*?)/",match[0]): scrapedplot = campo # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) # A�ade al listado de XBMC xbmctools.addnewfolder( __channel__ , "detail" , category , scrapedtitle + " - " + scrapedplot , scrapedurl , scrapedthumbnail , scrapedplot ) # ------------------------------------------- # Busqueda de la siguiente pagina if cat == "tipo": patron_pagina_sgte = '</span><a href="([^"]+)"' paginasiguientes(patron_pagina_sgte,data,category,cat) # Label (top-right)... xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) # Disable sorting... xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )
def series(item): logger.info("pelisalacarta.channels.animeflv series") data = scrapertools.anti_cloudflare(item.url, headers=CHANNEL_DEFAULT_HEADERS, host=CHANNEL_HOST) ''' <div class="aboxy_lista"> <a href="/ova/nurarihyon-no-mago-ova.html" title="Nurarihyon no Mago OVA"> <img class="lazy portada" src="/img/blank.gif" data-original="http://cdn.animeflv.net/img/portada/1026.jpg" alt="Nurarihyon no Mago OVA"/> </a> <span style="float: right; margin-top: 0px;" class="tipo_1"></span> <a href="/ova/nurarihyon-no-mago-ova.html" title="Nurarihyon no Mago OVA" class="titulo"> Nurarihyon no Mago OVA </a> <div class="generos_links"> <b>Generos:</b> <a href="/animes/genero/accion/">Acción</a>, <a href="/animes/genero/shonen/">Shonen</a>, <a href="/animes/genero/sobrenatural/">Sobrenatural</a> </div> <div class="sinopsis"> La historia empieza en alrededor de 100 años después de la desaparición de Yamabuki Otome, la primera esposa Rihan Nura. Rihan por fin recobró la compostura y la vida vuelve a la normalidad. A medida que la cabeza del Clan Nura, está ocupado trabajando en la construcción de un mundo armonioso para los seres humanos y youkai. Un día, él ve a Setsura molesta por lo que decide animarla tomando el clan para ir a disfrutar de las aguas termales … </div> </div> ''' patron = '<div class="aboxy_lista"[^<]+' patron += '<a href="([^"]+)"[^<]+<img class="[^"]+" src="[^"]+" data-original="([^"]+)"[^<]+</a[^<]+' patron += '<span[^<]+</span[^<]+' patron += '<a[^>]+>([^<]+)</a.*?' patron += '<div class="sinopsis">(.*?)</div' matches = re.compile(patron, re.DOTALL).findall(data) itemlist = [] for scrapedurl, scrapedthumbnail, scrapedtitle, scrapedplot in matches: title = scrapertools.unescape(scrapedtitle) fulltitle = title url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = scrapertools.htmlclean(scrapedplot) show = title #if DEBUG:logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format(title, url, thumbnail)) itemlist.append( Item(channel=item.channel, action="episodios", title=title, url=url, thumbnail=thumbnail, plot=plot, show=show, fulltitle=fulltitle, fanart=thumbnail, folder=True)) patron = '<a href="([^"]+)">\»\;</a>' matches = re.compile(patron, re.DOTALL).findall(data) for match in matches: if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, match) scrapedtitle = ">> Pagina Siguiente" scrapedthumbnail = "" scrapedplot = "" itemlist.append( Item(channel=item.channel, action="series", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True, viewmode="movies_with_plot")) return itemlist
def findvideos(item): logger.info() itemlist = list() data = httptools.downloadpage(item.url).data data = scrapertools.unescape(data) soup = BeautifulSoup(data, "html5lib", from_encoding="utf-8") lang = soup.find_all("b") lang_list = get_langs(lang) lang_count = 0 for tab_soup in soup.find_all("div", class_="contenedor_tab"): lang = lang_list[lang_count] for elem in tab_soup.find_all("iframe"): title = "" enc_url = scrapertools.find_single_match(elem["data-data"], '([^\+]+)\+(.+)?') s = base64.b64decode(enc_url[0]).decode('utf-8') i = enc_url[1] hidden_url = "https://encriptando.com" + s + i hidden_data = httptools.downloadpage(hidden_url, follow_redirects=False, headers={ 'Referer': host }).data var, val = scrapertools.find_single_match( hidden_data.replace("'", '"'), 'var (k|s)="([^"]+)";') url = decrypt(var, val) if var == "k": url += "|%s" % item.url if not config.get_setting('unify'): title = ' [%s]' % lang itemlist.append( Item(channel=item.channel, title='%s' + title, url=url, action='play', language=lang, infoLabels=item.infoLabels)) lang_count += 1 itemlist = servertools.get_servers_itemlist( itemlist, lambda x: x.title % x.server.capitalize()) # Requerido para FilterTools itemlist = filtertools.get_links(itemlist, item, list_language) # Requerido para AutoPlay autoplay.start(itemlist, item) if config.get_videolibrary_support( ) and len(itemlist) > 0 and item.extra != 'findvideos': itemlist.append( Item(channel=item.channel, title= '[COLOR yellow]Añadir esta pelicula a la videoteca[/COLOR]', url=item.url, action="add_pelicula_to_library", extra="findvideos", contentTitle=item.contentTitle)) return itemlist
def peliculas_list(item): logger.info( "[streamondemand-pureita altadefinizione01_zone] peliculas_list") itemlist = [] # Descarga la pagina data = httptools.downloadpage(item.url, headers=headers).data # Extrae las entradas (carpetas) patron = '<img\s*[^>]+src="([^"]+)[^>]+>\s*</a>\s*</td>\s*[^>]+>' patron += '<h2>\s*<a href="([^"]+)"\s*title=".*?">([^<]+)</a>\s*</h2></td>.*?' patron += '<td class="mlnh-3">(.*?)</td>.*?<td class="mlnh-4">(.*?)</td>' matches = re.compile(patron, re.DOTALL).finditer(data) for match in matches: scrapedplot = "" quality = scrapertools.unescape(match.group(5)) year = scrapertools.unescape(match.group(4)) scrapedtitle = scrapertools.unescape(match.group(3)) scrapedurl = scrapertools.unescape(match.group(2)) scrapedthumbnail = urlparse.urljoin(item.url, match.group(1)) if year: scrapetitle = scrapedtitle.strip() + " (" + year + ")" else: scrapetitle = scrapedtitle if quality: quality = " ([COLOR yellow]" + quality + "[/COLOR])" if year: year = " ([COLOR yellow]" + year + "[/COLOR])" itemlist.append( infoSod(Item(channel=__channel__, action="findvideos", contentType="movie", fulltitle=scrapetitle, show=scrapetitle, title="[COLOR azure]" + scrapedtitle + "[/COLOR]" + year + quality, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True), tipo='movie')) # Extrae el paginador patronvideos = 'href="([^"]+)">»</a></i>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item( channel=__channel__, action="peliculas_list", title="[COLOR orange]Successivi >>[/COLOR]", url=scrapedurl, thumbnail= "https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png", folder=True)) return itemlist
def episodios(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data datas = re.sub(r"\n|\r|\t| |<br>", "", data) # logger.info(datas) patron = '<div class="imagen"><a href="([^"]+)">.*?' # url cap, img patron += '<div class="numerando">(.*?)</div>.*?' # numerando cap patron += '<a href="[^"]+">([^<]+)</a>' # title de episodios matches = scrapertools.find_multiple_matches(datas, patron) for scrapedurl, scrapedtitle, scrapedname in matches: scrapedtitle = scrapedtitle.replace('--', '0') patron = '(\d+) - (\d+)' match = re.compile(patron, re.DOTALL).findall(scrapedtitle) season, episode = match[0] if 'season' in item.infoLabels and int( item.infoLabels['season']) != int(season): continue title = "%sx%s: %s" % (season, episode.zfill(2), scrapertools.unescape(scrapedname)) new_item = item.clone(title=title, url=scrapedurl, action="findvideos", text_color=color3, fulltitle=title, contentType="episode") if 'infoLabels' not in new_item: new_item.infoLabels = {} new_item.infoLabels['season'] = season new_item.infoLabels['episode'] = episode.zfill(2) itemlist.append(new_item) # TODO no hacer esto si estamos añadiendo a la videoteca if not item.extra: # Obtenemos los datos de todos los capitulos de la temporada mediante multihilos tmdb.set_infoLabels(itemlist, __modo_grafico__) for i in itemlist: if i.infoLabels['title']: # Si el capitulo tiene nombre propio añadirselo al titulo del item i.title = "%sx%s %s" % (i.infoLabels['season'], i.infoLabels['episode'], i.infoLabels['title']) if i.infoLabels.has_key('poster_path'): # Si el capitulo tiene imagen propia remplazar al poster i.thumbnail = i.infoLabels['poster_path'] itemlist.sort(key=lambda it: int(it.infoLabels['episode']), reverse=config.get_setting('orden_episodios', __channel__)) tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) # Opción "Añadir esta serie a la videoteca" if config.get_videolibrary_support() and len(itemlist) > 0: itemlist.append( Item(channel=__channel__, title="Añadir esta serie a la videoteca", url=item.url, action="add_serie_to_library", extra="episodios", show=item.show, category="Series", text_color=color1, thumbnail=thumbnail_host, fanart=fanart_host)) return itemlist
def findvid_film(item): def load_links(itemlist, re_txt, color, desc_txt): streaming = scrapertools.find_single_match(data, re_txt) patron = '<td><a[^h]href="([^"]+)"[^>]+>([^<]+)<' matches = re.compile(patron, re.DOTALL).findall(streaming) for scrapedurl, scrapedtitle in matches: logger.debug("##### findvideos %s ## %s ## %s ##" % (desc_txt, scrapedurl, scrapedtitle)) title = "[COLOR " + color + "]" + desc_txt + ":[/COLOR] " + item.title + " [COLOR grey]" + QualityStr + "[/COLOR] [COLOR blue][" + scrapedtitle + "][/COLOR]" itemlist.append( Item(channel=__channel__, action="play", title=title, url=scrapedurl, fulltitle=item.fulltitle, thumbnail=item.thumbnail, show=item.show, folder=False)) logger.info("[cineblog01.py] findvid_film") itemlist = [] # Carica la pagina data = httptools.downloadpage(item.url, headers=headers).data data = scrapertools.decodeHtmlentities(data) # Extract the quality format patronvideos = '>([^<]+)</strong></div>' matches = re.compile(patronvideos, re.DOTALL).finditer(data) QualityStr = "" for match in matches: QualityStr = scrapertools.unescape(match.group(1))[6:] # STREAMANGO # matches = [] # u = scrapertools.find_single_match(data, '(?://|\.)streamango\.com/(?:f/|embed/)?[0-9a-zA-Z]+') # if u: matches.append((u, 'Streamango')) # Estrae i contenuti - Streaming load_links(itemlist, '<strong>Streaming:</strong>(.*?)<table height="30">', "orange", "Streaming") # Estrae i contenuti - Streaming HD load_links(itemlist, '<strong>Streaming HD[^<]+</strong>(.*?)<table height="30">', "yellow", "Streaming HD") # Estrae i contenuti - Streaming 3D load_links(itemlist, '<strong>Streaming 3D[^<]+</strong>(.*?)<table height="30">', "pink", "Streaming 3D") # Estrae i contenuti - Download load_links(itemlist, '<strong>Download:</strong>(.*?)<table height="30">', "aqua", "Download") # Estrae i contenuti - Download HD load_links( itemlist, '<strong>Download HD[^<]+</strong>(.*?)<table width="100%" height="20">', "azure", "Download HD") if len(itemlist) == 0: itemlist = servertools.find_video_items(item=item) return itemlist
def videos(item): logger.info("[pornoactricesx.py] videos") itemlist = [] mas = True data = "" url = item.url while len(itemlist) < 25 and mas == True: data = scrapertools.cachePage(url) data = scrapertools.unescape(data) patron = '<div class="field field-name-title field-type-ds field-label-hidden view-mode-teaser"><div class="field-items"><div class="field-item even"><h1><a href="([^"]+)">([^"]+)</a></h1></div></div></div> </div>' patron += '[^<]{4}<div class="group-left">[^<]{5}<div class="field field-name-field-imagen-del-video field-type-image field-label-hidden view-mode-teaser"><div class="field-items">' patron += '<figure class="clearfix field-item even"><a href="([^"]+)"><img class="image-style-medium" src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) for url, title, url2, thumbnail in matches: scrapedtitle = title.replace(" Vídeo porno completo.", "") scrapedurl = urlparse.urljoin("http://www.pornoactricesx.com", url) scrapedthumbnail = thumbnail scrapedplot = "" # Depuracion if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action='play', title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) #Patron 2 para busquedas patron = '<div class="field field-name-title field-type-ds field-label-hidden view-mode-search_result">' patron += '<div class="field-items"><div class="field-item even"><h1><a href="([^"]+)">([^"]+)</a></h1></div></div></div> </div>' patron += '[^<]{4}<div class="group-left">[^<]{5}<div class="field field-name-field-imagen-del-video field-type-image field-label-hidden view-mode-search_result"><div class="field-items"><figure class="clearfix field-item even"><a href="([^"]+)"><img class="image-style-medium" src="([^"]+)" width=' matches = re.compile(patron, re.DOTALL).findall(data) for url, title, url2, thumbnail in matches: scrapedtitle = title.replace(" Vídeo porno completo.", "") scrapedurl = urlparse.urljoin("http://www.pornoactricesx.com", url) scrapedthumbnail = thumbnail scrapedplot = "" # Depuracion if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action='play', title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) patron = '<a title="Ir a la página siguiente" href="([^<]+)">siguiente ›</a>' matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) > 0: url = "http://www.pornoactricesx.com" + matches[0] mas = True else: mas = False #Paginador patron = '<a title="Ir a la página siguiente" href="([^<]+)">siguiente ›</a>' matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) > 0: scrapedurl = "http://www.pornoactricesx.com" + matches[0] itemlist.append( Item(channel=__channel__, action="videos", title="Página Siguiente", url=scrapedurl, thumbnail="", folder=True)) return itemlist
def findvideos(item): logger.info() itemlist = [] data = get_source(item.url) data = scrapertools.find_single_match( data, '<div id="marco-post">.*?<div id="sidebar">') data = scrapertools.unescape(data) data = scrapertools.decodeHtmlentities(data) options_regex = '<a href="#tab.*?">.*?<b>(.*?)</b>' option_matches = re.compile(options_regex, re.DOTALL).findall(data) video_regex = '<iframe.*?src="(.*?)".*?</iframe>' video_matches = re.compile(video_regex, re.DOTALL).findall(data) # for option, scrapedurl in matches: for option, scrapedurl in map(None, option_matches, video_matches): if scrapedurl is None: continue scrapedurl = scrapedurl.replace('"', '').replace('&', '&') try: data_video = get_source(scrapedurl) except Exception as e: logger.info('Error en url: ' + scrapedurl) continue # logger.info(data_video) # Este sitio pone multiples páginas intermedias, cada una con sus reglas. source_headers = dict() source_headers[ "Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8" source_headers["X-Requested-With"] = "XMLHttpRequest" if scrapedurl.find("https://repro") != 0: logger.info("Caso 0: url externa") url = scrapedurl itemlist.append( Item(channel=item.channel, title=option, url=url, action='play', language=IDIOMA)) elif scrapedurl.find("pi76823.php") > 0: logger.info("Caso 1") source_data = get_source(scrapedurl) source_regex = 'post\( "(.*?)", { acc: "(.*?)", id: \'(.*?)\', tk: \'(.*?)\' }' source_matches = re.compile(source_regex, re.DOTALL).findall(source_data) for source_page, source_acc, source_id, source_tk in source_matches: source_url = scrapedurl[0:scrapedurl.find("pi76823.php" )] + source_page source_result = httptools.downloadpage( source_url, post='acc=' + source_acc + '&id=' + source_id + '&tk=' + source_tk, headers=source_headers) if source_result.code == 200: source_json = jsontools.load(source_result.data) itemlist.append( Item(channel=item.channel, title=option, url=source_json['urlremoto'], action='play', language=IDIOMA)) elif scrapedurl.find("pi7.php") > 0: logger.info("Caso 2") source_data = get_source(scrapedurl) source_regex = 'post\( "(.*?)", { acc: "(.*?)", id: \'(.*?)\', tk: \'(.*?)\' }' source_matches = re.compile(source_regex, re.DOTALL).findall(source_data) for source_page, source_acc, source_id, source_tk in source_matches: source_url = scrapedurl[0:scrapedurl.find("pi7.php" )] + source_page source_result = httptools.downloadpage( source_url, post='acc=' + source_acc + '&id=' + source_id + '&tk=' + source_tk, headers=source_headers) if source_result.code == 200: source_json = jsontools.load(source_result.data) itemlist.append( Item(channel=item.channel, title=option, url=source_json['urlremoto'], action='play', language=IDIOMA)) elif scrapedurl.find("reproducir120.php") > 0: logger.info("Caso 3") source_data = get_source(scrapedurl) videoidn = scrapertools.find_single_match( source_data, 'var videoidn = \'(.*?)\';') tokensn = scrapertools.find_single_match( source_data, 'var tokensn = \'(.*?)\';') source_regex = 'post\( "(.*?)", { acc: "(.*?)"' source_matches = re.compile(source_regex, re.DOTALL).findall(source_data) for source_page, source_acc in source_matches: source_url = scrapedurl[0:scrapedurl.find("reproducir120.php" )] + source_page source_result = httptools.downloadpage( source_url, post='acc=' + source_acc + '&id=' + videoidn + '&tk=' + tokensn, headers=source_headers) if source_result.code == 200: source_json = jsontools.load(source_result.data) urlremoto_regex = "file:'(.*?)'" urlremoto_matches = re.compile( urlremoto_regex, re.DOTALL).findall(source_json['urlremoto']) if len(urlremoto_matches) == 1: itemlist.append( Item(channel=item.channel, title=option, url=urlremoto_matches[0], action='play', language=IDIOMA)) elif scrapedurl.find("reproducir14.php") > 0: logger.info("Caso 4") source_data = get_source(scrapedurl) source_regex = '<div id="player-contenido" vid="(.*?)" name="(.*?)"' source_matches = re.compile(source_regex, re.DOTALL).findall(source_data) videoidn = source_matches[0][0] tokensn = source_matches[0][1] source_regex = 'post\( "(.*?)", { acc: "(.*?)"' source_matches = re.compile(source_regex, re.DOTALL).findall(source_data) for source_page, source_acc in source_matches: source_url = scrapedurl[0:scrapedurl.find("reproducir14.php" )] + source_page source_result = httptools.downloadpage( source_url, post='acc=' + source_acc + '&id=' + videoidn + '&tk=' + tokensn, headers=source_headers) if source_result.code == 200: source_json = jsontools.load(source_result.data) itemlist.append( Item(channel=item.channel, title=option, url=source_json['urlremoto'], action='play', language=IDIOMA)) else: logger.info("Caso nuevo") itemlist = servertools.get_servers_itemlist(itemlist) # Requerido para FilterTools itemlist = filtertools.get_links(itemlist, item, list_language) # Requerido para AutoPlay autoplay.start(itemlist, item) if config.get_videolibrary_support( ) and len(itemlist) > 0 and item.extra != 'findvideos': itemlist.append( Item(channel=item.channel, title= '[COLOR yellow]Añadir esta pelicula a la videoteca[/COLOR]', url=item.url, action="add_pelicula_to_library", extra="findvideos", contentTitle=item.contentTitle)) return itemlist
def peliculas(item): logger.info("streamondemand-pureita.cineblogrun peliculas") itemlist = [] # Descarga la pagina data = httptools.downloadpage(item.url, headers=headers).data # Extrae las entradas (carpetas) patron = r'<a href="([^"]+)">\s*<div class="Image">\s*<figure clas[^>]+><img[^>]+src="([^"]+)"\s*' patron += r'class[^>]+><\/figure>\s*<\/div>\s*<h3 class="Title">(.*?)<\/h3>.*?' patron += r'<span[^>]+>([^<]+)</span><span class="Qlty">([^<]+)</span>.*?' patron += r'.*?<p>(.*?)</p>' matches = re.compile(patron, re.DOTALL).finditer(data) for match in matches: scrapedplot = scrapertools.unescape(match.group(6)) quality = scrapertools.unescape(match.group(5)) year = scrapertools.unescape(match.group(4)) scrapedtitle = scrapertools.unescape(match.group(3)) scrapedthumbnail = scrapertools.unescape(match.group(2)) scrapedurl = urlparse.urljoin(item.url, match.group(1)) scrapedtitle = scrapedtitle.replace("&", "e") if "." in year or "h" in year: year = "" else: year = " ([COLOR yellow]" + year + "[/COLOR])" if "1080" in quality or "720" in quality: quality = " ([COLOR yellow]HD[/COLOR])" else: if "Unknown" in quality: quality = " ([COLOR yellow]NA[/COLOR])" else: quality = " ([COLOR yellow]LQ[/COLOR])" itemlist.append( infoSod(Item(channel=__channel__, action="findvideos", contentType="movie", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]" + scrapedtitle + "[/COLOR]" + year + quality, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True), tipo='movie')) patronvideos = '<a class="next page-numbers" href="([^"]+)">' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item( channel=__channel__, action="peliculas", title="[COLOR orange]Successivi >>[/COLOR]", url=scrapedurl, thumbnail= "https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png", folder=True)) return itemlist
def registerOrLogin(page_url): if config.get_setting('username', server='hdmario') and config.get_setting( 'password', server='hdmario'): if login(): return True if platformtools.dialog_yesno( 'HDmario', 'Questo server necessita di un account, ne hai già uno oppure vuoi tentare una registrazione automatica?', yeslabel='Accedi', nolabel='Tenta registrazione'): from specials import setting from core.item import Item user_pre = config.get_setting('username', server='hdmario') password_pre = config.get_setting('password', server='hdmario') setting.server_config(Item(config='hdmario')) user_post = config.get_setting('username', server='hdmario') password_post = config.get_setting('password', server='hdmario') if user_pre != user_post or password_pre != password_post: return registerOrLogin(page_url) else: return False else: import random import string logger.debug('Registrazione automatica in corso') mailbox = Gmailnator() randPsw = ''.join( random.choice(string.ascii_letters + string.digits) for i in range(10)) captcha = httptools.downloadpage(baseUrl + '/captchaInfo').json logger.debug('email: ' + mailbox.address) logger.debug('pass: '******'/register/', email=True, password=True, email_default=mailbox.address, password_default=randPsw, captcha_img=captcha['captchaUrl']) if not reg: return False regPost = httptools.downloadpage(baseUrl + '/register/', post={ 'email': reg['email'], 'email_confirmation': reg['email'], 'password': reg['password'], 'password_confirmation': reg['password'], 'captchaUuid': captcha['captchaUuid'], 'captcha': reg['captcha'] }) if '/register' in regPost.url: error = scrapertools.htmlclean( scrapertools.find_single_match( regPost.data, 'Impossibile proseguire.*?</div>')) error = scrapertools.unescape( scrapertools.re.sub('\n\s+', ' ', error)) platformtools.dialog_ok('HDmario', error) return False if reg['email'] == mailbox.address: mail = mailbox.waitForMail() if mail: checkUrl = scrapertools.find_single_match( mail.body, 'href="([^"]+)">Premi qui').replace(r'\/', '/') logger.debug('CheckURL: ' + checkUrl) httptools.downloadpage(checkUrl) config.set_setting('username', mailbox.address, server='hdmario') config.set_setting('password', randPsw, server='hdmario') platformtools.dialog_ok( 'HDmario', 'Registrato automaticamente con queste credenziali:\nemail:' + mailbox.address + '\npass: '******'HDmario', 'Impossibile registrarsi automaticamente') return False else: platformtools.dialog_ok( 'HDmario', 'Hai modificato la mail quindi KoD non sarà in grado di effettuare la verifica in autonomia, apri la casella ' + reg['email'] + ' e clicca sul link. Premi ok quando fatto') logger.debug('Registrazione completata') return True
def listado(item): logger.info("pelisalacarta.channels.pelispedia listado") itemlist = [] action = "findvideos" if item.extra == 'serie': action = "episodios" data = anti_cloudflare(item.url) data = re.sub(r"\n|\r|\t|\s{2}| |<Br>|<BR>|<br>|<br/>|<br />|-\s", "", data) # logger.info("data -- {}".format(data)) patron = '<li[^>]+><a href="([^"]+)" alt="([^<]+).*?<img src="([^"]+).*?>.*?<span>\(([^)]+).*?' \ '<p class="font12">(.*?)</p>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedyear, scrapedplot in matches: title = "{title} ({year})".format(title=scrapertools.unescape( scrapedtitle.strip()), year=scrapedyear) plot = scrapertools.entityunescape(scrapedplot) itemlist.append( Item(channel=__channel__, title=title, url=urlparse.urljoin(HOST, scrapedurl), action=action, thumbnail=scrapedthumbnail, plot=plot, context="", show=scrapertools.unescape(scrapedtitle.strip()), extra=item.extra)) # no se muestra ordenado porque la paginación de la página no se hace correctamente # itemlist.sort(key=lambda item: item.title) # numero de registros que se muestran por página, es fijo por cada paginación if len(matches) == 48: file_php = "more" tipo_serie = "" if item.extra == "movies": anio = scrapertools.find_single_match(item.url, "(?:year=)(\w+)") letra = scrapertools.find_single_match(item.url, "(?:letra=)(\w+)") genero = scrapertools.find_single_match(item.url, "(?:gender=|genre=)(\w+)") params = "letra={letra}&year={year}&genre={genero}".format( letra=letra, year=anio, genero=genero) else: tipo2 = scrapertools.find_single_match(item.url, "(?:series/|tipo2=)(\w+)") tipo_serie = "&tipo=serie" if tipo2 != "all": file_php = "letra" tipo_serie += "&tipo2=" + tipo2 genero = "" if tipo2 == "anio": genero = scrapertools.find_single_match( item.url, "(?:anio/|genre=)(\w+)") if tipo2 == "genero": genero = scrapertools.find_single_match( item.url, "(?:genero/|genre=)(\w+)") if tipo2 == "letra": genero = scrapertools.find_single_match( item.url, "(?:letra/|genre=)(\w+)") params = "genre={genero}".format(genero=genero) url = "http://www.pelispedia.tv/api/{file}.php?rangeStart=48&rangeEnd=48{tipo_serie}&{params}".\ format(file=file_php, tipo_serie=tipo_serie, params=params) if "rangeStart" in item.url: ant_inicio = scrapertools.find_single_match( item.url, "rangeStart=(\d+)&") inicio = str(int(ant_inicio) + 48) url = item.url.replace("rangeStart=" + ant_inicio, "rangeStart=" + inicio) itemlist.append( Item(channel=__channel__, action="listado", title=">> Página siguiente", extra=item.extra, url=url)) return itemlist
def peliculas(item): logger.info("[streamondemand-pureita altadefinizione01_zone] peliculas") itemlist = [] # Descarga la pagina data = httptools.downloadpage(item.url, headers=headers).data # Extrae las entradas (carpetas) patron = '<h2>\s*<a href="([^"]+)">([^"]+)<\/a>\s*<\/h2>\s*[^>]+>[^>]+.*?\s*' patron += '</div>\s*<a href[^>]+>[^>]+src="([^"]+)"[^>]+>\s*</a>\s*' patron += '<div class="trdublaj">\s*(.*?)</div>\s*[^>]+>(.*?)\s*<' patron += '.*?<li>\s*<span class="ml[^"]+">(.*?)<\/.*?span>\s*<\/li>\s*' patron += '<li><span class="ml-label">([^<]+)</span></li>.*?<p>(.*?)</p>' matches = re.compile(patron, re.DOTALL).finditer(data) for match in matches: scrapedplot = scrapertools.unescape(match.group(8)) year = scrapertools.unescape(match.group(7)) rating = scrapertools.unescape(match.group(6)) sub = scrapertools.unescape(match.group(5)) quality = scrapertools.unescape(match.group(4)) scrapedthumbnail = urlparse.urljoin(item.url, match.group(3)) scrapedtitle = scrapertools.unescape(match.group(2)) scrapedurl = scrapertools.unescape(match.group(1)) if year: scrapetitle = scrapedtitle.strip() + " (" + year + ")" else: scrapetitle = scrapedtitle if sub: sub = " ([COLOR yellow]" + sub + "[/COLOR])" if quality: quality = " ([COLOR yellow]" + quality + "[/COLOR])" if year: year = " ([COLOR yellow]" + year + "[/COLOR])" if rating: rating = rating.replace("<b>", "") rating = " ([COLOR yellow]" + rating + "[/COLOR])" itemlist.append( infoSod(Item(channel=__channel__, action="findvideos", contentType="movie", fulltitle=scrapetitle, show=scrapetitle, title="[COLOR azure]" + scrapedtitle + "[/COLOR] " + sub + year + quality + rating, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True), tipo='movie')) # Extrae el paginador patronvideos = 'href="([^"]+)">»</a></i>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item( channel=__channel__, action="peliculas", title="[COLOR orange]Successivi >>[/COLOR]", url=scrapedurl, thumbnail= "https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png", folder=True)) return itemlist
def novedades_tv(item): logger.info("[streamondemand-pureita filmsenzalimiti] novedades") itemlist = [] # Descarga la pagina data = httptools.downloadpage(item.url, headers=headers).data # Extrae las entradas (carpetas) patron = '<li><a href="([^"]+)" data-thumbnail="([^"]+)"><div>\s*' patron += '<div class="title">([^<]+)</div>\s*' patron += '<div class="episode" title="Voto IMDb">([^<]+)</div>' matches = re.compile(patron, re.DOTALL).finditer(data) for match in matches: imdb = scrapertools.unescape(match.group(4)) scrapedtitle = scrapertools.unescape(match.group(3)) scrapedthumbnail = scrapertools.unescape(match.group(2)) scrapedurl = urlparse.urljoin(item.url, match.group(1)) if "[HD]" in scrapedtitle: quality = " ([COLOR yellow]HD[/COLOR])" else: quality = "" if "HD" in imdb or "N/A" in imdb or "N/D" in imdb: imdb = "" else: imdb = " ([COLOR yellow]" + imdb + "[/COLOR])" scrapedplot = "" scrapedtitle = scrapedtitle.replace(" [HD]", "").replace(" & ", " e ") scrapedtitle = scrapedtitle.replace(" – ", " - ").replace("’", "'") scrapedtitle = scrapedtitle.strip() scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) itemlist.append( infoSod(Item(channel=__channel__, action="episodios", contentType="tv", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]" + scrapedtitle + "[/COLOR]" + quality + imdb, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True), tipo='tv')) # Extrae el paginador patronvideos = '<li><a href="([^"]+)" >Pagina successiva' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item( channel=__channel__, action="novedades_tv", title="[COLOR orange]Successivi >>[/COLOR]", url=scrapedurl, thumbnail= "https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png", folder=True)) return itemlist
def peliculas_update(item): logger.info("[streamondemand-pureita altadefinizione01_zone] peliculas_update") itemlist = [] # Descarga la pagina data = httptools.downloadpage(item.url, headers=headers).data patron = '<div class="son_eklenen_head"></div>(.*?)<div id="right_bar">' data = scrapertools.find_single_match(data, patron) # Extrae las entradas (carpetas) patron = '</div>\s*<a href="([^"]+)">\s*' \ '<img width=".*?"\s*height=".*?" src="([^"]+)" [^>]+ alt="([^<]+)"\s*title="".*?/>.*?' \ '</a>\s*<div class="trdublaj">\s*(.*?)</div>\s*[^>]+>(.*?)\s*<' \ '.*?<li><span class="ml-label">([^<]+)</span></li>.*?<p>(.*?)</p>' matches = re.compile(patron, re.DOTALL).finditer(data) for match in matches: scrapedplot = scrapertools.unescape(match.group(7)) year = scrapertools.unescape(match.group(6)) sub = scrapertools.unescape(match.group(5)) quality = scrapertools.unescape(match.group(4)) scrapedtitle = scrapertools.unescape(match.group(3)) scrapedthumbnail = urlparse.urljoin(item.url, match.group(2)) scrapedurl = scrapertools.unescape(match.group(1)) if sub: sub = " ([COLOR yellow]" + sub + "[/COLOR])" if quality: quality = " ([COLOR yellow]" + quality + "[/COLOR])" if year: year = " ([COLOR yellow]" + year + "[/COLOR])" itemlist.append(infoSod( Item(channel=__channel__, action="findvideos", contentType="movie", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]" + scrapedtitle + "[/COLOR] " + sub + year + quality, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True), tipo='movie')) # Extrae el paginador patronvideos = 'href="([^"]+)">»</a></i>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item(channel=__channel__, action="peliculas_update", title="[COLOR orange]Successivi >>[/COLOR]", url=scrapedurl, thumbnail="https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png", folder=True)) return itemlist
def findvideos(item): logger.info("pelisalacarta.sinluces findvideos") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url) data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) #extra enlaces patron = '<div class="play-c">(.*?)<div class="datos">' matches = re.compile(patron, re.DOTALL).findall(data) if not "hqq" in data: itemlist.append( Item( channel=__channel__, title= "[COLOR orange][B]Sin servidores para Pelisalacarta...[/B][/COLOR]", thumbnail="http://s6.postimg.org/55zljwr4h/sinnoisethumb.png", fanart="http://s6.postimg.org/avfu47xap/sinnoisefan.jpg", folder=False)) for bloque_enlaces_idiomas in matches: patronenlaces = '<div id="play-(.*?)".*?src="([^"]+)"' matchesenlaces = re.compile(patronenlaces, re.DOTALL).findall(bloque_enlaces_idiomas) patronidiomas = '<a href="#play-(.*?)">([^<]+)' matchesidiomas = re.compile(patronidiomas, re.DOTALL).findall(bloque_enlaces_idiomas) for numero, scrapedurl in matchesenlaces: url = scrapedurl for numero2, idiomas in matchesidiomas: if numero == numero2: title = idiomas idiomas = re.sub(r"[0-9]", "", idiomas) listavideos = servertools.findvideos(url) for video in listavideos: idiomas = idiomas.replace( idiomas, "[COLOR white]" + idiomas + "[/COLOR]") videotitle = scrapertools.unescape( video[0]) + "-" + idiomas url = video[1] server = video[2] videotitle = videotitle.replace( videotitle, "[COLOR skyblue]" + videotitle + "[/COLOR]") title_first = "[COLOR gold]Ver en--[/COLOR]" title = title_first + videotitle itemlist.append( Item(channel=__channel__, action="play", server=server, title=title, url=url, thumbnail=item.extra, fulltitle=item.title, fanart=item.show, folder=False)) #otro patronenlaces patronenlaces = '<div id="play-(.*?)".*?src=\'([^\']+)\'' matchesenlaces = re.compile(patronenlaces, re.DOTALL).findall(bloque_enlaces_idiomas) patronidiomas = '<a href="#play-(.*?)">([^<]+)' matchesidiomas = re.compile(patronidiomas, re.DOTALL).findall(bloque_enlaces_idiomas) for numero, url in matchesenlaces: pepe = url for numero2, idiomas in matchesidiomas: if numero == numero2: title = idiomas idiomas = re.sub(r"[0-9]", "", idiomas) listavideos = servertools.findvideos(pepe) for video in listavideos: idiomas = idiomas.replace( idiomas, "[COLOR white]" + idiomas + "[/COLOR]") videotitle = scrapertools.unescape( video[0]) + "-" + idiomas url = video[1] server = video[2] videotitle = videotitle.replace( videotitle, "[COLOR skyblue]" + videotitle + "[/COLOR]") title_first = "[COLOR gold]Ver en--[/COLOR]" title = title_first + videotitle itemlist.append( Item(channel=__channel__, action="play", server=server, title=title, url=url, thumbnail=item.extra, fulltitle=item.title, fanart=item.show, folder=False)) patron = '<em>opción \d+, ([^<]+)</em>.*?' # Datos que contienen los enlaces para sacarlos con servertools.findvideos patron += '<div class="contenedor_tab">(.*?)<div style="clear:both;">' matches = re.compile(patron, re.DOTALL).findall(data) for idioma, datosEnlaces in matches: listavideos = servertools.findvideos(datosEnlaces) for video in listavideos: videotitle = scrapertools.unescape(video[0]) + "-" + idioma url = video[1] server = video[2] videotitle = videotitle.replace( videotitle, "[COLOR skyblue]" + videotitle + "[/COLOR]") title_first = "[COLOR gold]Ver en--[/COLOR]" title = title_first + videotitle idioma = idioma.replace(idioma, "[COLOR white]" + idioma + "[/COLOR]") itemlist.append( Item(channel=__channel__, action="play", server=server, title=title, url=url, thumbnail=item.extra, fulltitle=item.title, fanart=item.show, folder=False)) return itemlist
def episodios(item): logger.info("pelisalacarta.channels.animeflv episodios") itemlist = [] data = scrapertools.anti_cloudflare(item.url, headers=CHANNEL_DEFAULT_HEADERS, host=CHANNEL_HOST) ''' <div class="tit">Listado de episodios <span class="fecha_pr">Fecha Próximo: 2013-06-11</span></div> <ul class="anime_episodios" id="listado_epis"> <li><a href="/ver/aiura-9.html">Aiura 9</a></li> <li><a href="/ver/aiura-8.html">Aiura 8</a></li> <li><a href="/ver/aiura-7.html">Aiura 7</a></li> <li><a href="/ver/aiura-6.html">Aiura 6</a></li> <li><a href="/ver/aiura-5.html">Aiura 5</a></li> <li><a href="/ver/aiura-4.html">Aiura 4</a></li> <li><a href="/ver/aiura-3.html">Aiura 3</a></li> <li><a href="/ver/aiura-2.html">Aiura 2</a></li> <li><a href="/ver/aiura-1.html">Aiura 1</a></li> </ul> ''' data = scrapertools.find_single_match( data, '<div class="tit">Listado de episodios.*?</div>(.*?)</ul>') patron = '<li><a href="([^"]+)">([^<]+)</a></li>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.unescape(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = item.thumbnail plot = item.plot # TODO crear funcion que pasandole el titulo y buscando en un array de series establezca el valor el nombre # y temporada / capitulo para que funcione con trak.tv season = 1 episode = 1 patron = re.escape(item.show) + "\s+(\d+)" # logger.info("title {0}".format(title)) # logger.info("patron {0}".format(patron)) try: episode = scrapertools.get_match(title, patron) episode = int(episode) # logger.info("episode {0}".format(episode)) except IndexError: pass except ValueError: pass episode_title = scrapertools.find_single_match(title, "\d+:\s*(.*)") if episode_title == "": episode_title = "Episodio " + str(episode) season, episode = numbered_for_tratk(item.show, season, episode) if len(str(episode)) == 1: title = str(season) + "x0" + str(episode) else: title = str(season) + "x" + str(episode) title = item.show + " - " + title + " " + episode_title #if DEBUG: logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format(title, url, thumbnail)) itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot, show=item.show, fulltitle="{0} {1}".format(item.show, title), fanart=thumbnail, viewmode="movies_with_plot", folder=True)) if config.get_library_support() and len(itemlist) > 0: itemlist.append( Item(channel=item.channel, title="Añadir esta serie a la biblioteca de XBMC", url=item.url, action="add_serie_to_library", extra="episodios", show=item.show)) itemlist.append( Item(channel=item.channel, title="Descargar todos los episodios de la serie", url=item.url, action="download_all_episodes", extra="episodios", show=item.show)) return itemlist
def peliculas(item): logger.info("[cineblog01.py] mainlist") itemlist = [] if item.url == "": item.url = sito # Descarga la página data = scrapertools.anti_cloudflare(item.url, headers) # Extrae las entradas (carpetas) patronvideos = '<div class="span4".*?<a.*?<p><img src="([^"]+)".*?' patronvideos += '<div class="span8">.*?<a href="([^"]+)"> <h1>([^"]+)</h1></a>.*?' patronvideos += '<strong>([^<]*)</strong>.*?<br />([^<+]+)' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedtitle = scrapertools.unescape(match.group(3)) scrapedurl = urlparse.urljoin(item.url, match.group(2)) scrapedthumbnail = urlparse.urljoin(item.url, match.group(1)) scrapedthumbnail = scrapedthumbnail.replace(" ", "%20") scrapedplot = scrapertools.unescape("[COLOR orange]" + match.group(4) + "[/COLOR]\n" + match.group(5).strip()) scrapedplot = scrapertools.htmlclean(scrapedplot).strip() if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( infoSod(Item(channel=__channel__, action="findvideos", fulltitle=scrapedtitle, show=scrapedtitle, title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, extra=item.extra, viewmode="movie_with_plot"), tipo='movie')) # Next page mark try: bloque = scrapertools.get_match( data, "<div id='wp_page_numbers'>(.*?)</div>") patronvideos = '<a href="([^"]+)">></a></li>' matches = re.compile(patronvideos, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = "[COLOR orange]Successivo>>[/COLOR]" scrapedurl = matches[0] scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="HomePage", title="[COLOR yellow]Torna Home[/COLOR]", folder=True)), itemlist.append( Item( channel=__channel__, action="peliculas", title=scrapedtitle, url=scrapedurl, thumbnail= "http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png", extra=item.extra, plot=scrapedplot)) except: pass return itemlist
def listado(item): logger.info() itemlist = [] # ~ data = httptools.downloadpage(item.url).data data = obtener_data(item.url) data = re.sub(r"\n|\r|\t|\s{2}| |<Br>|<BR>|<br>|<br/>|<br />|-\s", "", data) if item.extra == 'movies': action = "findvideos" content_type = "movie" patron = '<li[^>]+><a href="([^"]+)" alt="([^<|\(]+).*?<img src="([^"]+).*?>.*?<span>\(([^)]+).*?' \ '<p class="font12">(.*?)</p>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedyear, scrapedplot in matches: title = "%s (%s)" % (scrapertools.unescape( scrapedtitle.strip()), scrapedyear) plot = scrapertools.entityunescape(scrapedplot) new_item = Item(channel=__channel__, title=title, url=urlparse.urljoin(CHANNEL_HOST, scrapedurl), action=action, thumbnail=scrapedthumbnail, plot=plot, context="", extra=item.extra, contentType=content_type) new_item.fulltitle = scrapertools.unescape(scrapedtitle.strip()) new_item.infoLabels = {'year': scrapedyear} itemlist.append(new_item) else: action = "temporadas" content_type = "tvshow" patron = '<li[^>]+><a href="([^"]+)" alt="([^<|\(]+).*?<img src="([^"]+)' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle, scrapedthumbnail in matches: title = scrapertools.unescape(scrapedtitle.strip()) new_item = Item(channel=__channel__, title=title, url=urlparse.urljoin(CHANNEL_HOST, scrapedurl), action=action, thumbnail=scrapedthumbnail, context="", extra=item.extra, contentType=content_type, fulltitle=title) new_item.show = title # fix en algunos casos la url está mal new_item.url = new_item.url.replace(CHANNEL_HOST + "pelicula", CHANNEL_HOST + "serie") itemlist.append(new_item) # Obtenemos los datos basicos de todas las peliculas mediante multihilos tmdb.set_infoLabels(itemlist, __modo_grafico__) if '<ul class="pagination"' in data: url_next = scrapertools.find_single_match(data, 'href="([^"]*)" rel="next"') if url_next: url = urlparse.urljoin(CHANNEL_HOST, url_next) itemlist.append( Item(channel=__channel__, action="listado", title=">> Página siguiente", extra=item.extra, url=url, thumbnail=thumbnail_host, fanart=fanart_host)) return itemlist
def findvid_film(item): logger.info("[cineblog01.py] findvideos") itemlist = [] # Descarga la página data = scrapertools.anti_cloudflare(item.url, headers) data = scrapertools.decodeHtmlentities(data).replace( 'http://cineblog01.pw', 'http://k4pp4.pw') # Extract the quality format patronvideos = '>([^<]+)</strong></div>' matches = re.compile(patronvideos, re.DOTALL).finditer(data) QualityStr = "" for match in matches: QualityStr = scrapertools.unescape(match.group(1))[6:] # Extrae las entradas streaming = scrapertools.find_single_match( data, '<strong>Streaming:</strong>(.*?)<table height="30">') patron = '<td><a href="([^"]+)" target="_blank">([^<]+)</a></td>' matches = re.compile(patron, re.DOTALL).findall(streaming) for scrapedurl, scrapedtitle in matches: print "##### findvideos Streaming ## %s ## %s ##" % (scrapedurl, scrapedtitle) title = "[COLOR orange]Streaming:[/COLOR] " + item.title + " [COLOR grey]" + QualityStr + "[/COLOR] [COLOR blue][" + scrapedtitle + "][/COLOR]" itemlist.append( Item(channel=__channel__, action="play", title=title, url=scrapedurl, fulltitle=item.fulltitle, thumbnail=item.thumbnail, show=item.show, folder=False)) streaming_hd = scrapertools.find_single_match( data, '<strong>Streaming HD[^<]+</strong>(.*?)<table height="30">') patron = '<td><a href="([^"]+)" target="_blank">([^<]+)</a></td>' matches = re.compile(patron, re.DOTALL).findall(streaming_hd) for scrapedurl, scrapedtitle in matches: print "##### findvideos Streaming HD ## %s ## %s ##" % (scrapedurl, scrapedtitle) title = "[COLOR yellow]Streaming HD:[/COLOR] " + item.title + " [COLOR grey]" + QualityStr + "[/COLOR] [COLOR blue][" + scrapedtitle + "][/COLOR]" itemlist.append( Item(channel=__channel__, action="play", title=title, url=scrapedurl, fulltitle=item.fulltitle, thumbnail=item.thumbnail, show=item.show, folder=False)) streaming_3D = scrapertools.find_single_match( data, '<strong>Streaming 3D[^<]+</strong>(.*?)<table height="30">') patron = '<td><a href="([^"]+)" target="_blank">([^<]+)</a></td>' matches = re.compile(patron, re.DOTALL).findall(streaming_3D) for scrapedurl, scrapedtitle in matches: print "##### findvideos Streaming 3D ## %s ## %s ##" % (scrapedurl, scrapedtitle) title = "[COLOR pink]Streaming 3D:[/COLOR] " + item.title + " [COLOR grey]" + QualityStr + "[/COLOR] [COLOR blue][" + scrapedtitle + "][/COLOR]" itemlist.append( Item(channel=__channel__, action="play", title=title, url=scrapedurl, fulltitle=item.fulltitle, thumbnail=item.thumbnail, show=item.show, folder=False)) download = scrapertools.find_single_match( data, '<strong>Download:</strong>(.*?)<table height="30">') patron = '<td><a href="([^"]+)" target="_blank">([^<]+)</a></td>' matches = re.compile(patron, re.DOTALL).findall(download) for scrapedurl, scrapedtitle in matches: print "##### findvideos Download ## %s ## %s ##" % (scrapedurl, scrapedtitle) title = "[COLOR aqua]Download:[/COLOR] " + item.title + " [COLOR grey]" + QualityStr + "[/COLOR] [COLOR blue][" + scrapedtitle + "][/COLOR]" itemlist.append( Item(channel=__channel__, action="play", title=title, url=scrapedurl, fulltitle=item.fulltitle, thumbnail=item.thumbnail, show=item.show, folder=False)) download_hd = scrapertools.find_single_match( data, '<strong>Download HD[^<]+</strong>(.*?)<table width="100%" height="20">' ) patron = '<td><a href="([^"]+)" target="_blank">([^<]+)</a></td>' matches = re.compile(patron, re.DOTALL).findall(download_hd) for scrapedurl, scrapedtitle in matches: print "##### findvideos Download HD ## %s ## %s ##" % (scrapedurl, scrapedtitle) title = "[COLOR azure]Download HD:[/COLOR] " + item.title + " [COLOR grey]" + QualityStr + "[/COLOR] [COLOR blue][" + scrapedtitle + "][/COLOR]" itemlist.append( Item(channel=__channel__, action="play", title=title, url=scrapedurl, fulltitle=item.fulltitle, thumbnail=item.thumbnail, show=item.show, folder=False)) if len(itemlist) == 0: itemlist = servertools.find_video_items(item=item) return itemlist
def novedades_documentales(item): logger.info("[shurweb.py] novedades_documentales") data = scrapertools.cachePage(item.url) data = scrapertools.unescape(data) data = scrapertools.get_match(data,'<div class="tab-pane fade" id="docus">(.*?)<div class="panel panel-primary">') return peliculas(item,data=data)
def listado(item): logger.info() itemlist = [] action = "findvideos" content_type = "movie" if item.extra == 'serie': action = "temporadas" content_type = "tvshow" # ~ data = httptools.downloadpage(item.url).data data = obtener_data(item.url) data = re.sub(r"\n|\r|\t|\s{2}| |<Br>|<BR>|<br>|<br/>|<br />|-\s", "", data) # logger.info("data -- {}".format(data)) patron = '<li[^>]+><a href="([^"]+)" alt="([^<|\(]+).*?<img src="([^"]+).*?>.*?<span>\(([^)]+).*?' \ '<p class="font12">(.*?)</p>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedyear, scrapedplot in matches[: 28]: title = "%s (%s)" % (scrapertools.unescape( scrapedtitle.strip()), scrapedyear) plot = scrapertools.entityunescape(scrapedplot) new_item = Item(channel=__channel__, title=title, url=urlparse.urljoin(CHANNEL_HOST, scrapedurl), action=action, thumbnail=scrapedthumbnail, plot=plot, context="", extra=item.extra, contentType=content_type, fulltitle=title) if item.extra == 'serie': new_item.show = scrapertools.unescape(scrapedtitle.strip()) # fix en algunos casos la url está mal new_item.url = new_item.url.replace(CHANNEL_HOST + "pelicula", CHANNEL_HOST + "serie") else: new_item.fulltitle = scrapertools.unescape(scrapedtitle.strip()) new_item.infoLabels = {'year': scrapedyear} # logger.debug(new_item.tostring()) itemlist.append(new_item) # Obtenemos los datos basicos de todas las peliculas mediante multihilos tmdb.set_infoLabels(itemlist, __modo_grafico__) # numero de registros que se muestran por página, se fija a 28 por cada paginación if len(matches) >= 28 and '/buscar/?' not in item.url: file_php = "666more" tipo_serie = "" if item.extra == "movies": anio = scrapertools.find_single_match(item.url, "(?:year=)(\w+)") letra = scrapertools.find_single_match(item.url, "(?:letra=)(\w+)") genero = scrapertools.find_single_match(item.url, "(?:gender=|genre=)(\w+)") params = "letra=%s&year=%s&genre=%s" % (letra, anio, genero) else: tipo2 = scrapertools.find_single_match(item.url, "(?:series/|tipo2=)(\w+)") tipo_serie = "&tipo=serie" if tipo2 != "all": file_php = "letra" tipo_serie += "&tipo2=" + tipo2 genero = "" if tipo2 == "anio": genero = scrapertools.find_single_match( item.url, "(?:anio/|genre=)(\w+)") if tipo2 == "genero": genero = scrapertools.find_single_match( item.url, "(?:genero/|genre=)(\w+)") if tipo2 == "letra": genero = scrapertools.find_single_match( item.url, "(?:letra/|genre=)(\w+)") params = "genre=%s" % genero url = "http://www.pelispedia.tv/api/%s.php?rangeStart=28&rangeEnd=28%s&%s" % ( file_php, tipo_serie, params) if "rangeStart" in item.url: ant_inicio = scrapertools.find_single_match( item.url, "rangeStart=(\d+)&") inicio = str(int(ant_inicio) + 28) url = item.url.replace("rangeStart=" + ant_inicio, "rangeStart=" + inicio) itemlist.append( Item(channel=__channel__, action="listado", title=">> Página siguiente", extra=item.extra, url=url, thumbnail=thumbnail_host, fanart=fanart_host)) return itemlist
def episodios(item): logger.info() itemlist = [] # ~ data = httptools.downloadpage(item.url).data data = obtener_data(item.url) data = re.sub(r"\n|\r|\t|\s{2}| |<Br>|<BR>|<br>|<br/>|<br />|-\s", "", data) patron = '<li class="clearfix gutterVertical20"><a href="([^"]+)".*?><small>(.*?)</small>.*?' \ '<span class.+?>(.*?)</span>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle, scrapedname in matches: # logger.info("scrap {}".format(scrapedtitle)) patron = 'Season\s+(\d),\s+Episode\s+(\d+)' match = re.compile(patron, re.DOTALL).findall(scrapedtitle) season, episode = match[0] if 'season' in item.infoLabels and int( item.infoLabels['season']) != int(season): continue title = "%sx%s: %s" % (season, episode.zfill(2), scrapertools.unescape(scrapedname)) new_item = item.clone(title=title, url=scrapedurl, action="findvideos", fulltitle=title, contentType="episode") if 'infoLabels' not in new_item: new_item.infoLabels = {} new_item.infoLabels['season'] = season new_item.infoLabels['episode'] = episode.zfill(2) itemlist.append(new_item) # TODO no hacer esto si estamos añadiendo a la videoteca if not item.extra: # Obtenemos los datos de todos los capitulos de la temporada mediante multihilos tmdb.set_infoLabels(itemlist, __modo_grafico__) for i in itemlist: if i.infoLabels['title']: # Si el capitulo tiene nombre propio añadirselo al titulo del item i.title = "%sx%s %s" % (i.infoLabels['season'], i.infoLabels['episode'], i.infoLabels['title']) if i.infoLabels.has_key('poster_path'): # Si el capitulo tiene imagen propia remplazar al poster i.thumbnail = i.infoLabels['poster_path'] itemlist.sort(key=lambda it: int(it.infoLabels['episode']), reverse=config.get_setting('orden_episodios', __channel__)) # Opción "Añadir esta serie a la videoteca" if config.get_videolibrary_support() and len(itemlist) > 0: itemlist.append( Item(channel=__channel__, title="Añadir esta serie a la videoteca", url=item.url, action="add_serie_to_library", extra="episodios", show=item.show, category="Series", thumbnail=thumbnail_host, fanart=fanart_host)) return itemlist
def novita(item): logger.info("[cb01anime.py] mainlist") itemlist = [] # Descarga la página data = scrapertools.anti_cloudflare(item.url, headers) ## ------------------------------------------------ cookies = "" matches = config.get_cookie_data(item.url).splitlines()[4:] for cookie in matches: name = cookie.split('\t')[5] value = cookie.split('\t')[6] cookies += name + "=" + value + ";" headers.append(['Cookie', cookies[:-1]]) import urllib _headers = urllib.urlencode(dict(headers)) ## ------------------------------------------------ # Extrae las entradas (carpetas) patronvideos = '<div class="span4"> <a.*?<img src="(.*?)".*?' patronvideos += '<div class="span8">.*?<a href="(.*?)">.*?' patronvideos += '<h1>(.*?)</h1></a>.*?<br />(.*?)<br>.*?' matches = re.compile(patronvideos, re.DOTALL).finditer(data) for match in matches: scrapedthumbnail = match.group(1) scrapedurl = match.group(2) scrapedtitle = scrapertools.unescape(match.group(3)) scrapedplot = scrapertools.unescape(match.group(4)) scrapedplot = scrapertools.decodeHtmlentities(scrapedplot) if scrapedplot.startswith(""): scrapedplot = scrapedplot[64:] if DEBUG: logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") ## ------------------------------------------------ scrapedthumbnail += "|" + _headers ## ------------------------------------------------ # Añade al listado de XBMC itemlist.append( Item(channel=__channel__, action="listacompleta" if scrapedtitle == "Lista Alfabetica Completa Anime/Cartoon" else "episodi", fulltitle=scrapedtitle, show=scrapedtitle, title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, viewmode="movie_with_plot", plot=scrapedplot)) # Put the next page mark try: next_page = scrapertools.get_match(data, "<link rel='next' href='([^']+)'") itemlist.append( Item(channel=__channel__, action="HomePage", title="[COLOR yellow]Torna Home[/COLOR]", folder=True)), itemlist.append( Item(channel=__channel__, action="novita", title="[COLOR orange]Successivo>>[/COLOR]", url=next_page, thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png")) except: pass return itemlist
def downloadpage(url, **opt): # logger.info() """ Open a url and return the data obtained @param url: url to open. @type url: str @param post: If it contains any value, it is sent by POST. @type post: str @param headers: Headers for the request, if it contains nothing the default headers will be used. @type headers: dict, list @param timeout: Timeout for the request. @type timeout: int @param follow_redirects: Indicates if redirects are to be followed. @type follow_redirects: bool @param cookies: Indicates whether cookies are to be used. @type cookies: bool @param replace_headers: If True, headers passed by the "headers" parameter will completely replace the default headers. If False, the headers passed by the "headers" parameter will modify the headers by default. @type replace_headers: bool @param add_referer: Indicates whether to add the "Referer" header using the domain of the url as a value. @type add_referer: bool @param only_headers: If True, only headers will be downloaded, omitting the content of the url. @type only_headers: bool @param random_headers: If True, use the method of selecting random headers. @type random_headers: bool @param ignore_response_code: If True, ignore the method for WebErrorException for error like 404 error in veseriesonline, but it is a functional data @type ignore_response_code: bool @return: Result of the petition @rtype: HTTPResponse @param use_requests: Use requests.session() @type: bool Parameter Type Description -------------------------------------------------- -------------------------------------------------- ------------ HTTPResponse.success: bool True: Request successful | False: Error when making the request HTTPResponse.code: int Server response code or error code if an error occurs HTTPResponse.error: str Description of the error in case of an error HTTPResponse.headers: dict Dictionary with server response headers HTTPResponse.data: str Response obtained from server HTTPResponse.json: dict Response obtained from the server in json format HTTPResponse.time: float Time taken to make the request """ url = scrapertools.unescape(url) parse = urlparse.urlparse(url) domain = parse.netloc if opt.get('cloudscraper'): from lib import cloudscraper session = cloudscraper.create_scraper() else: from lib import requests session = requests.session() if config.get_setting('resolver_dns') and not opt.get( 'use_requests', False): from core import resolverdns session.mount('https://', resolverdns.CipherSuiteAdapter(domain)) req_headers = default_headers.copy() # Headers passed as parameters if opt.get('headers', None) is not None: if not opt.get('replace_headers', False): req_headers.update(dict(opt['headers'])) else: req_headers = dict(opt['headers']) if domain in directIP.keys() and not opt.get('disable_directIP', False): req_headers['Host'] = domain url = urlparse.urlunparse(parse._replace(netloc=directIP.get(domain))) if opt.get('random_headers', False) or HTTPTOOLS_DEFAULT_RANDOM_HEADERS: req_headers['User-Agent'] = random_useragent() url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]") opt['url_save'] = url opt['post_save'] = opt.get('post', None) response = {} info_dict = [] payload = dict() files = {} file_name = '' session.verify = opt.get('verify', True) if opt.get('cookies', True): session.cookies = cj session.headers.update(req_headers) proxy_data = {'dict': {}} inicio = time.time() if opt.get( 'timeout', None) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None: opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT if opt['timeout'] == 0: opt['timeout'] = None if len(url) > 0: try: if opt.get('post', None) is not None or opt.get('file', None) is not None: if opt.get('post', None) is not None: # Convert string post in dict try: json.loads(opt['post']) payload = opt['post'] except: if not isinstance(opt['post'], dict): post = urlparse.parse_qs(opt['post'], keep_blank_values=1) payload = dict() for key, value in post.items(): try: payload[key] = value[0] except: payload[key] = '' else: payload = opt['post'] # Verify 'file' and 'file_name' options to upload a buffer or file if opt.get('file', None) is not None: if os.path.isfile(opt['file']): if opt.get('file_name', None) is None: path_file, opt['file_name'] = os.path.split( opt['file']) files = { 'file': (opt['file_name'], open(opt['file'], 'rb')) } file_name = opt['file'] else: files = { 'file': (opt.get('file_name', 'Default'), opt['file']) } file_name = opt.get('file_name', 'Default') + ', Buffer de memoria' info_dict = fill_fields_pre(url, opt, proxy_data, file_name) if opt.get('only_headers', False): # Makes the request with HEAD method req = session.head(url, allow_redirects=opt.get( 'follow_redirects', True), timeout=opt['timeout']) else: # Makes the request with POST method req = session.post(url, data=payload, allow_redirects=opt.get( 'follow_redirects', True), files=files, timeout=opt['timeout']) elif opt.get('only_headers', False): info_dict = fill_fields_pre(url, opt, proxy_data, file_name) # Makes the request with HEAD method req = session.head(url, allow_redirects=opt.get( 'follow_redirects', True), timeout=opt['timeout']) else: info_dict = fill_fields_pre(url, opt, proxy_data, file_name) # Makes the request with GET method req = session.get(url, allow_redirects=opt.get( 'follow_redirects', True), timeout=opt['timeout']) except Exception as e: from lib import requests req = requests.Response() if not opt.get('ignore_response_code', False) and not proxy_data.get('stat', ''): response['data'] = '' response['success'] = False info_dict.append(('Success', 'False')) import traceback response['code'] = traceback.format_exc() info_dict.append(('Response code', str(e))) info_dict.append(('Finished in', time.time() - inicio)) if not opt.get('alfa_s', False): show_infobox(info_dict) return type('HTTPResponse', (), response) else: req.status_code = str(e) else: response['data'] = '' response['success'] = False response['code'] = '' return type('HTTPResponse', (), response) response_code = req.status_code response['url'] = req.url response['data'] = req.content if req.content else '' if type(response['data']) != str: try: response['data'] = response['data'].decode('utf-8') except: response['data'] = response['data'].decode('ISO-8859-1') if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403]\ and not opt.get('CF', False) and 'Ray ID' in response['data'] and not opt.get('post', None): logger.debug("CF retry... for domain: %s" % domain) from lib import proxytranslate gResp = proxytranslate.process_request_proxy(url) if gResp: req = gResp['result'] response_code = req.status_code response['url'] = gResp['url'] response['data'] = gResp['data'] if not response['data']: response['data'] = '' try: response['json'] = to_utf8(req.json()) except: response['json'] = dict() response['code'] = response_code response['headers'] = req.headers response['cookies'] = req.cookies info_dict, response = fill_fields_post(info_dict, req, response, req_headers, inicio) if opt.get('cookies', True): save_cookies(alfa_s=opt.get('alfa_s', False)) if not 'api.themoviedb' in url and not opt.get('alfa_s', False): show_infobox(info_dict) if not config.get_setting("debug"): logger.info('Page URL:', url) return type('HTTPResponse', (), response)
def get_episodios(item,recursion): logger.info("[rtve.py] get_episodios_documentales") itemlist = [] data = scrapertools.cachePage(item.url) # Extrae los vídeos ''' <li class="odd"> <span class="col_tit" id="2851919" name="progname"> <a href="/alacarta/videos/atencion-obras/atencion-obras-josep-maria-flotats-ferran-adria-sanchis-sinisterra/2851919/">Atención Obras - 07/11/14</a> </span> <span class="col_tip"> <span>Completo</span> </span> <span class="col_dur">55:35</span> <span class="col_pop"><span title="32% popularidad" class="pc32"><em><strong><span>32%</span></strong></em></span></span> <span class="col_fec">07 nov 2014</span> <div id="popup2851919" class="tultip hddn"> <span id="progToolTip" class="tooltip curved"> <span class="pointer"></span> <span class="cerrar" id="close2851919"></span> <span class="titulo-tooltip"><a href="/alacarta/videos/atencion-obras/atencion-obras-josep-maria-flotats-ferran-adria-sanchis-sinisterra/2851919/" title="Ver Atención Obras - 07/11/14">Atención Obras - 07/11/14</a></span> <span class="fecha">07 nov 2014</span> <span class="detalle">Josep María Flotats trae al Teatro María Guerrero de Madrid “El juego del amor y del azar” de Pierre de Marivaux. Un texto que ya ha sido estrenado en el Teatre Nacional de Catalunya. C...</span> ''' patron = '<li class="[^"]+">.*?' patron += '<span class="col_tit"[^<]+' patron += '<a href="([^"]+)">(.*?)</a[^<]+' patron += '</span>[^<]+' patron += '<span class="col_tip"[^<]+<span>([^<]+)</span[^<]+</span[^<]+' patron += '<span class="col_dur">([^<]+)</span>.*?' patron += '<span class="col_fec">([^<]+)</span>.*?' patron += '<span class="detalle">([^>]+)</span>' matches = re.findall(patron,data,re.DOTALL) if DEBUG: scrapertools.printMatches(matches) # Crea una lista con las entradas for match in matches: if not "developer" in config.get_platform(): scrapedtitle = match[1]+" ("+match[2].strip()+") ("+match[3].strip()+") ("+match[4]+")" else: scrapedtitle = match[1] scrapedtitle = scrapedtitle.replace("<em>Nuevo</em> ","") scrapedtitle = scrapertools.unescape(scrapedtitle) scrapedtitle = scrapedtitle.strip() scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = item.thumbnail scrapedplot = scrapertools.unescape(match[5].strip()) scrapedplot = scrapertools.htmlclean(scrapedplot).strip() scrapedextra = match[2] if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="play" , server="rtve" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , show=item.show, category = item.category, extra=scrapedextra, folder=False) ) # Paginación if len(itemlist)>0: next_page_url = scrapertools.find_single_match(data,'<a name="paginaIR" href="([^"]+)"><span>Siguiente</span></a>') if next_page_url!="": next_page_url = urlparse.urljoin(item.url,next_page_url).replace("&","&") #http://www.rtve.es/alacarta/interno/contenttable.shtml?pbq=2&modl=TOC&locale=es&pageSize=15&ctx=36850&advSearchOpen=false if not next_page_url.endswith("&advSearchOpen=false"): next_page_url = next_page_url + "&advSearchOpen=false" siguiente_item = Item(channel=CHANNELNAME,action="episodios",url=urlparse.urljoin(item.url,next_page_url),title=item.title,show=item.show,category=item.category) logger.info("siguiente_item="+siguiente_item.tostring()) # Para evitar listas eternas, si tiene más de 3 páginas añade el item de "siguiente" if recursion<=3: itemlist.extend( get_episodios(siguiente_item,recursion+1) ) else: siguiente_item.title=">> Página siguiente" itemlist.append(siguiente_item) return itemlist
def listvideos(item): logger.info("[discoverymx.py] listvideos") itemlist = [] scrapedthumbnail = "" scrapedplot = "" # Descarga la página data = scrapertools.cache_page(item.url) patronvideos = "<h3 class='post-title entry-title'[^<]+" patronvideos += "<a href='([^']+)'>([^<]+)</a>.*?" patronvideos += "<div class='post-body entry-content'(.*?)<div class='post-footer'>" matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[1] scrapedtitle = re.sub("<[^>]+>", " ", scrapedtitle) scrapedtitle = scrapertools.unescape(scrapedtitle) scrapedurl = match[0] regexp = re.compile(r'src="(http[^"]+)"') matchthumb = regexp.search(match[2]) if matchthumb is not None: scrapedthumbnail = matchthumb.group(1) matchplot = re.compile('<div align="center">(<img.*?)</span></div>', re.DOTALL).findall(match[2]) if len(matchplot) > 0: scrapedplot = matchplot[0] #print matchplot else: scrapedplot = "" scrapedplot = re.sub("<[^>]+>", " ", scrapedplot) scrapedplot = scrapertools.unescape(scrapedplot) if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC #xbmctools.addnewfolder( __channel__ , "findevi" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) itemlist.append( Item(channel=__channel__, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) # Extrae la marca de siguiente página patronvideos = "<a class='blog-pager-older-link' href='([^']+)'" matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = "Página siguiente" scrapedurl = urlparse.urljoin(item.url, matches[0]) scrapedthumbnail = "" scrapedplot = "" itemlist.append( Item(channel=__channel__, action="listvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) return itemlist
def getlistchannel(item): logger.info("[justintv.py] getlistchannel") url = item.url title = item.title if "|Next Page >>" in item.title: item.title = item.title.split('|')[0] if item.title == 'favorites': context = '|9' # Eliminar un canal de favoritos, en el listado de favoritos solo remover else: context = '|8' # Añade un canal a favoritos, en los demas listados solo añadir data = scrapertools.cache_page(url) logger.info(data) datadict = json.loads(data) totalItems = len(datadict) itemlist = [] #print item.action c = 0 try: datadict = sorted(datadict, key=lambda k: k['video_bitrate'], reverse=True) except: pass for match in datadict: try: name = match['name'].split('user_')[-1] except: try: name = match['channel']['login'] if name is None or name == '': raise except: name = match['login'] try: title = match['channel']['title'] if title is None or title == '': raise except: try: title = match['title'] if title is None: title = '' except: title = '' try: title = title if title is None or title == '': raise except: title = name try: tags = scrapertools.unescape(match['channel']['tags']) if tags is None or tags == '': raise except: try: tags = scrapertools.unescape(match['tags']).strip() if tags is None or tags == '': raise except: tags = '' try: status = scrapertools.unescape(match['channel']['status']).strip() if status is None or status == '': raise except: try: status = scrapertools.unescape(match['status']).strip() if status is None or status == '': raise except: status = '' try: subcat = match['channel']['category_title'] if subcat is None or subcat == '': raise except: try: subcat = match['category'] if subcat is None: raise except: subcat = '' try: views = match['channel']['views_count'] except: try: views = match['channel_view_count'] except: views = '' try: bitrate = str(match['video_bitrate']).split('.')[0] except: bitrate = '' try: lang = match['language'] except: lang = '' try: scrapedthumbnail = match['channel']['screen_cap_url_medium'] except: scrapedthumbnail = match['screen_cap_url_medium'] try: fanart_thumb = match['channel']['image_url_huge'] except: try: fanart_thumb = match['image_url_huge'] except: fanart_thumb = fanart scrapedurl = name idx = abbrev.index(lang) lang = languages[idx].decode('utf-8') scrapedplot = title + '\nStatus: ' + status + '\nTags: ' + tags + '\nChannel Name: ' + name + '\nBitrate: ' + bitrate + '\nLanguage: ' + lang + '\nViews: ' + views if config.get_setting("streamlive") == "true": scrapedtitle = title + ' [%s] BitRate: %s (%s)' % (name, bitrate, lang) itemlist.append( Item( channel=item.channel, action="playVideo", title=scrapedtitle.encode("utf-8"), url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot.encode("utf-8"), category=item.plot, totalItems=totalItems, fanart=scrapedthumbnail, context='7', # 7 Lista videos archivados folder=False)) else: scrapedtitle = title + ' [%s] (%s)' % (name, lang) itemlist.append( Item( channel=item.channel, action="listarchives", title=scrapedtitle.encode("utf-8"), url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot.encode("utf-8"), category=item.plot, totalItems=totalItems, fanart=fanart_thumb, extra=fanart_thumb, context='6', # 6 ver canal en vivo folder=True)) if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") if totalItems >= limit: offset1 = re.compile('offset=(.+?)&').findall(url)[0] offset2 = str(int(offset1) + limit + 1) scrapedurl = item.url.replace("offset=" + offset1, "offset=" + offset2) scrapedtitle = item.title + "|Next Page >>" scrapedthumbnail = '' scrapedplot = '' itemlist.append( Item(channel=item.channel, action="listchannel", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, category=item.category, fanart=fanart)) return itemlist