def episodios(item): logger.info("[mundonick.py] episodios") data = scrapertools.cachePage(item.url) #logger.info(data) #<a href=""><img class="linkImgTurbo" src="/shared/media/images/shows/l/legend_of_korra/101_3_82x55.jpg" alt="" title=""></a><a href="/nickturbo/?gid=2418&cid=1696825&vid=853875"><img class="linkImgTurbo" src="images/thumbsLitleFrame.png" alt="Legend Of Korra | Episodio 01" title="Legend Of Korra | Episodio 01" patron = '<a href=""><img class="linkImgTurbo" src="([^"]+)" alt="" title=""></a><a href="([^"]+)"><img class="linkImgTurbo" src="images/thumbsLitleFrame.png" alt="(.*?)" title="(.*?)"' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] #video_urls = [] for match in matches: scrapedurl = match[1].split('=')[3]; scrapedthumbnail = __urlbase__ + match[0] scrapedtitle = scrapertools.htmlclean(match[3]).decode('iso-8859-1').encode("utf8","ignore") itemlist.append( Item(channel=__channel__, action="play", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, folder=False) ) #video_urls.append(get_video_url_internal(scrapedurl )) #for video_url in video_urls: # logger.info(str(video_url)) return itemlist
def categorias(item): itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url, headers=headers) bloque = scrapertools.get_match(data, '<ul>(.*?)</ul>') # Extrae las entradas (carpetas) patron = '<a href="([^"]+)" >(.*?)</a>(.*?)\s*</li>' matches = re.compile(patron, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle, scrapedtot in matches: scrapedplot = "" scrapedthumbnail = "" scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle.replace("Animazione", "")) scrapedurl = scrapertools.decodeHtmlentities(scrapedurl.replace("%s/category/animazione/" % host, "")) if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "]") itemlist.append( Item(channel=__channel__, action="pelicat", title="[COLOR azure]" + scrapedtitle + "[/COLOR][COLOR gray]" + scrapedtot + "[/COLOR]", url=scrapedurl, thumbnail="http://xbmc-repo-ackbarr.googlecode.com/svn/trunk/dev/skin.cirrus%20extended%20v2/extras/moviegenres/All%20Movies%20by%20Genre.png", folder=True)) return itemlist
def lomasvisto(item): logger.info("[seriespepito.py] lomasvisto") # Descarga la página data = scrapertools.cachePage(item.url) data = scrapertools.get_match(data,'s visto de esta semana en Pepito</div><ul(.*?)</ul>') patron = '<a title="([^"]+)" href="([^"]+)"[^<]+' patron += '<img.*?src="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist = [] for scrapedtitle,scrapedurl,scrapedthumbnail in matches: logger.info("title="+scrapedtitle) title = scrapertools.htmlclean(scrapedtitle).strip() title = title.replace("\r","").replace("\n","") #title = unicode( title, "iso-8859-1" , errors="replace" ).encode("utf-8") title = re.compile("\s+",re.DOTALL).sub(" ",title) logger.info("title="+title) url = scrapedurl thumbnail = scrapedthumbnail plot = "" plot = unicode( plot, "iso-8859-1" , errors="replace" ).encode("utf-8") if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="episodios" , title=title , url=url, thumbnail=thumbnail, plot=plot, show=title, viewmode="movie", fanart="http://pelisalacarta.mimediacenter.info/fanart/seriespepito.jpg")) return itemlist
def categorias(item): logger.info("streamondemand.documoo categorias") itemlist = [] data = scrapertools.cache_page(item.url) logger.info(data) # Narrow search by selecting only the combo start = data.find('<ul class="sub-menu sub-menu-2">') end = data.find('<li id="menu-item-18011"', start) bloque = data[start:end] # The categories are the options for the combo patron = '<li[^>]+><a href="(.*?)"><span[^>]+>(.*?)</span></a></li>' matches = re.compile(patron, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) for url, titulo in matches: scrapedtitle = scrapertools.decodeHtmlentities(titulo) scrapedurl = urlparse.urljoin(item.url, url) scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="peliculas", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) return itemlist
def categorias(item): logger.info("[documentalesatonline2.py] novedades") itemlist=[] # Descarga la página data = scrapertools.cache_page(item.url) #logger.info(data) # Extrae las entradas (carpetas) #<li class="jcl_category" style="display:none;" > #<a href="http://www.bizzentte.com/categoria/categorias-en-general-para-todo/arte-y-cultura/" >Arte y Cultura (80)</a> #<a class="jcl_link" href="#jcl" title="Ver Sub-Categorías"> #<span class="jcl_symbol" style="padding-left:5px">(+)</span></a> #<ul> #<li class="jcl_category" style="display:none;" ><a href="http://www.bizzentte.com/categoria/categorias-en-general-para-todo/arte-y-cultura/fotografia/" >Fotografia (2)</a></li><li class="jcl_category" style="display:none;" ><a href="http://www.bizzentte.com/categoria/categorias-en-general-para-todo/arte-y-cultura/grafiti/" >Grafiti (2)</a></li> patronvideos = '<li class="jcl_category"[^>]+><a href="([^"]+)"[^>]*>([^<]+)</a></li>' # '\" url nombre cantidad_entradas matches = re.compile(patronvideos).findall(data) scrapertools.printMatches(matches) for match in matches: #xbmctools.addnewfolder( __channel__ , "novedades" , category , match[1] , match[0] + "feed?paged=1" , "" , "") itemlist.append( Item(channel=__channel__, action="novedades", title=match[1] , url=match[0] , folder=True) ) return itemlist
def buscador(item): logger.info("pelisalacarta.torrentstrenos buscador") itemlist = [] # Descarga la p‡gina data = scrapertools.cache_page(item.url) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) #<div class="torrent-container-2 clearfix"><img class="torrent-image" src="uploads/torrents/images/thumbnails2/4441_step--up--all--in----blurayrip.jpg" alt="Imagen de Presentación" /><div class="torrent-info"><h4><a href ="/descargar_torrent_27233-id_step_up_all_in_microhd_1080p_ac3_5.1--castellano--ac3_5.1_ingles_subs.html">Step Up All In MicroHD 1080p AC3 5.1-Castellano-AC3 5.1 Ingles Subs</a> </h4><p>19-12-2014</p><p>Subido por: <strong>TorrentEstrenos</strong> en <a href="/ver_torrents_41-id_en_peliculas_microhd.html" title="Peliculas MICROHD">Peliculas MICROHD</a><br />Descargas <strong><a href="#" style="cursor:default">46</a></strong></p><a class="btn-download" href ="/descargar_torrent_27233-id_step_up_all_in_microhd_1080p_ac3_5.1--castellano--ac3_5.1_ingles_subs.html">Descargar</a></div></div> patron = '<div class="torrent-container-2 clearfix">.*?' patron += 'src="([^"]+)".*? ' patron += 'href ="([^"]+)".*?' patron += '>([^<]+)</a>.*?' patron += '<p>([^<]+)</p>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)==0 : itemlist.append( Item(channel=__channel__, title="[COLOR gold][B]No se encontraron coincidencias...[/B][/COLOR]", thumbnail ="http://s6.postimg.org/w7nc1wh8x/torrnoisethumb.png", fanart ="http://s6.postimg.org/jez81z5n5/torrnoisefan.jpg",folder=False) ) for scrapedthumbnail, scrapedurl, scrapedtitulo, scrapedcreatedate in matches: scrapedtitulo = scrapedtitulo + "(Torrent:" + scrapedcreatedate + ")" scrapedthumbnail = "http://www.torrentestrenos.com/" + scrapedthumbnail scrapedurl = "http://www.torrentestrenos.com" + scrapedurl itemlist.append( Item(channel=__channel__, title=scrapedtitulo, url=scrapedurl, action="findvideos", thumbnail=scrapedthumbnail, fanart="http://s6.postimg.org/44tc7dtg1/tefanartgeneral.jpg", fulltitle=scrapedtitulo, folder=True) ) return itemlist
def novedades(item): logger.info("[serieonline.py] novedades") # Descarga la página data = scrapertools.cachePage(item.url) # Extrae las entradas patronvideos = '<a href="([^"]+)" title="([^"]+)"><img src="([^"]+)" alt="([^"]+)" class="captify" /></a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: scrapedtitle = match[1] + " " + match[3] scrapedplot = "" scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = urlparse.urljoin(item.url,match[2]) if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, action="findvideos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) # Extrae el paginador patronvideos = '<div class="paginacion-num"><a href="([^"]+)">' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedtitle = "Página siguiente" scrapedurl = urlparse.urljoin(item.url,matches[0]) itemlist.append( Item(channel=CHANNELNAME, action="novedades", title=scrapedtitle , url=scrapedurl , folder=True) ) return itemlist
def quality(item): logger.info("[itastreaming.py] genere") itemlist = [] data = scrapertools.anti_cloudflare(item.url, headers) patron = '<a>Qualità</a>(.+?)</ul>' data = scrapertools.find_single_match(data, patron) patron = '<li id=".*?' patron += 'href="([^"]+)".*?' patron += '>([^"]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle in matches: scrapedtitle = scrapedtitle.replace('&', '-') itemlist.append( Item(channel=__channel__, action="fichas", title=scrapedtitle, url=scrapedurl, folder=True)) return itemlist
def idiomas(item): logger.info("[oranline.py] idiomas") itemlist = [] ''' div class="widget"><h3>Últimos estrenos</h3> <ul> <li class="cat-item cat-item-84"><a href="http://www.oranline.com/Películas/castellano/" title="Ver todas las entradas archivadas en Castellano">Castellano</a> (585) </li> <li class="cat-item cat-item-85"><a href="http://www.oranline.com/Películas/latino/" title="Ver todas las entradas archivadas en Latino">Latino</a> (623) </li> <li class="cat-item cat-item-86"><a href="http://www.oranline.com/Películas/version-original/" title="Ver todas las entradas archivadas en Versión Original">Versión Original</a> (27) </li> <li class="cat-item cat-item-87"><a href="http://www.oranline.com/Películas/vos/" title="Ver todas las entradas archivadas en VOS">VOS</a> (1471) </li> ''' # Descarga la página data = get_main_page(item.url) data = scrapertools.get_match(data,'<div class="widget"><h3>Últimos estrenos</h3>(.*?)</ul>') # Extrae las entradas patron = '<li class="cat-item cat-item-\d+"><a href="([^"]+)"[^>]+>([^<]+)</a>\s+\((\d+)\)' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl,scrapedtitle,cuantas in matches: title=scrapedtitle.strip()+" ("+cuantas+")" url=urlparse.urljoin(item.url,scrapedurl) thumbnail="" plot="" if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="novedades", title=title , url=url , thumbnail=thumbnail , plot=plot , folder=True) ) return itemlist
def listaaz(item): logger.info("[cineblog01.py] listaaz") itemlist = [] data = scrapertools.cache_page(item.url) logger.info(data) # Narrow search by selecting only the combo bloque = scrapertools.get_match(data, '<a href="#char_5a" title="Go to the letter Z">Z</a></span></div>(.*?)</ul></div><div style="clear:both;"></div></div>') # The categories are the options for the combo patron = '<li><a href="([^"]+)"><span class="head">([^<]+)</span></a></li>' matches = re.compile(patron, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) for url, titulo in matches: scrapedtitle = titulo scrapedurl = url scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "]") itemlist.append( Item(channel=__channel__, action="findvid_anime", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail="http://www.justforpastime.net/uploads/3/8/1/5/38155083/273372_orig.jpg", plot=scrapedplot)) return itemlist
def animegenere(item): logger.info("[cineblog01.py] animegenere") itemlist = [] data = scrapertools.cache_page(item.url) logger.info(data) # Narrow search by selecting only the combo bloque = scrapertools.get_match(data, '<select name="select2"(.*?)</select') # The categories are the options for the combo patron = '<option value="([^"]+)">([^<]+)</option>' matches = re.compile(patron, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) for url, titulo in matches: scrapedtitle = titulo scrapedurl = url scrapedthumbnail = "" scrapedplot = "" if DEBUG: logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="animestream", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) return itemlist
def peliculas(item): logger.info("pelisalacarta.cineblogfm peliculas") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url) # Extrae las entradas (carpetas) patron = '<div class="short-story">.*?<a href="(.*?)" title="(.*?)">.*?<img.*?style="background:url[^h]+(.*?)\) no-repeat' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl,scrapedtitle,scrapedthumbnail in matches: response = urllib2.urlopen(scrapedurl) html = response.read() start = html.find("<div class=\"post-title\">") end = html.find("<td class=\"full-right\">", start) scrapedplot = html[start:end] scrapedplot = re.sub(r'<.*?>', '', scrapedplot) if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) # Extrae el paginador patronvideos = '<span class="nav_ext">...</span> <a href=.*?</a> <a href="(.*?)">Avanti</a></div></div>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(item.url,matches[0]) itemlist.append( Item(channel=__channel__, action="peliculas", title="[COLOR orange]Avanti >>[/COLOR]" , url=scrapedurl , folder=True) ) return itemlist
def findvideos(item): logger.info("pelisalacarta.zentorrents findvideos") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) data = re.sub(r"\n|\r|\t|\s{2}| |</p>|<p>|&|amp;","",data) patron = '<div class="descargatext">.*?' patron += '<img alt="([^<]+)" ' patron += 'src="([^"]+)".*?' patron += 'type.*?href="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedtitulo, scrapedthumbnail, scrapedurl in matches: infotitle= "[COLOR yellow][B]Ver--[/B][/COLOR]" scrapedtitulo= scrapedtitulo.replace(scrapedtitulo,"[COLOR white]"+scrapedtitulo+"[/COLOR]") title= infotitle + scrapedtitulo itemlist.append( Item(channel=__channel__, title =title , thumbnail=scrapedthumbnail, url=scrapedurl, fanart=item.fanart, action="play", folder=True) ) return itemlist
def peliculas(item): logger.info("streamondemand.cucinarefacile peliculas") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url) # Extrae las entradas (carpetas) patron = '<div class="post-thumb single-img-box">\s*' patron += '<a href="(.*?)"><img[^=]+=[^=]+=[^=]+="(.*?)"[^>]+></a>\s*' patron += '</div>\s*' patron += '<div[^>]+>\s*' patron += '<h2>[^>]+>(.*?)</a>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl,scrapedthumbnail,scrapedtitle in matches: scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) scrapedplot = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos", fulltitle=scrapedtitle, show=scrapedtitle, title=scrapedtitle, url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) # Extrae el paginador patronvideos = 'a class="nextpostslink" rel="next" href="(.*?)">»' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(item.url,matches[0]) itemlist.append( Item(channel=__channel__, action="HomePage", title="[COLOR yellow]Torna Home[/COLOR]" , folder=True) ) itemlist.append( Item(channel=__channel__, action="peliculas", title="[COLOR orange]Avanti >>[/COLOR]" , url=scrapedurl , folder=True) ) return itemlist
def getvideo(item): logger.info("[tresmellizas.py] play") postdata = item.url data = postespecial("http://styleguide.thetriplets.com/bessones/cinema/cine.php", postdata) # logger.info("[tresmellizas.py] data="+data) patron = "'movie','([^']+)'" matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) url = "http://styleguide.thetriplets.com/bessones/cinema/" + matches[0] + ".flv" """ http://styleguide.thetriplets.com/bessones/cinema/videos/bess/BESS_Sireneta_castellano.flv 'movie','videos/bess/BESS_Sireneta_castellano' """ itemlist = [] itemlist.append( Item( channel=CHANNELNAME, title=item.title, action="play", url=url, thumbnail=item.thumbnail, plot=item.plot, server="directo", folder=False, ) ) return itemlist
def peliculas(item,paginacion=True,data=None): logger.info("[shurweb.py] peliculas") url = item.url # Descarga la página if data is None: data = scrapertools.cachePage(url) # Extrae las entradas patronvideos = '<a class="video_thumb" href="([^"]+)" rel="bookmark" title="([^"]+)">.*?<img.*?src="([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: scrapedtitle = match[1] scrapedtitle = scrapertools.entityunescape(scrapedtitle) fulltitle = scrapedtitle scrapedplot = "" scrapedurl = match[0] scrapedthumbnail = match[2] if DEBUG: logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action='findvideos', title=scrapedtitle , fulltitle=fulltitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , extra=scrapedtitle , viewmode="movie", context="4|5",fanart="http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg") ) #<span class="i_next fr" ><a href="http://www.shurweb.es/videoscategory/animacion/page/2/" >Ver Más Videos</a> </span> try: next_page_url = scrapertools.get_match(data,'<span class="i_next fr" ><a href="([^"]+)" >Ver M') itemlist.append( Item(channel=__channel__, title=">> Página siguiente", action="peliculas", url=urlparse.urljoin(item.url,next_page_url),fanart="http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg")) except: pass return itemlist
def play(item): logger.info("[rtva.py] play") url = item.url # Descarga pagina detalle #http://www.canalsuralacarta.es/television/video/jamaica/2590/12 #_url_xml_datos=http://www.canalsuralacarta.es/webservice/video/2590" data = scrapertools.cachePage(url) patron = '_url_xml_datos=([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: url = urlparse.urljoin(url,matches[0]) logger.info("[rtva.py] url="+url) # Extrae la URL del video #http://ondemand.rtva.ondemand.flumotion.com/rtva/ondemand/flash8/programas/andaluces-por-el-mundo/20110509112657-7-andaluces-por-el-mundo-jamaica-10-05-11.flv #http://ondemand.rtva.ondemand.flumotion.com/rtva/ondemand/flash8/programas/andaluces-por-el-mundo/20110509112657-7-andaluces-por-el-mundo-jamaica-10-05-11.flv data = scrapertools.cachePage(url) patron = '<url>([^<]+)</url>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: url = matches[len(matches)-1] itemlist = [] itemlist.append( Item(channel=CHANNELNAME, title=item.title , action="play" , server="directo" , url=url, thumbnail=item.thumbnail, plot=item.plot , show=item.show , folder=False) ) return itemlist
def series(item,paginacion=True): logger.info("[shurweb.py] series") url = item.url # Descarga la página data = scrapertools.cachePage(url) # Extrae las entradas ''' <li class="clearfix"> <a class="video_thumb" href="http://www.shurweb.es/serie/anatomia-de-grey/" rel="bookmark" title="Anatomía de Grey"> <img width="123" height="100" src="http://www.shurweb.es/wp-content/uploads/2012/02/Greys-Anatomy4.jpg" class="wp-post-image"> </a> <p class="title"><a href="http://www.shurweb.es/serie/anatomia-de-grey/" rel="bookmark" title="Anatomía de Grey">Anatomía de Grey</a></p> </li> ''' patron = '<li class="clearfix">[^<]+' patron += '<a class="video_thumb" href="([^"]+)" rel="bookmark" title="([^"]+)">[^<]+' patron += '<img width="[^"]+" height="[^"]+" src="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for url,title,thumbnail in matches: scrapedtitle = title.replace("&","&") fulltitle = scrapedtitle scrapedplot = "" scrapedurl = url scrapedthumbnail = thumbnail if DEBUG: logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action='episodios', title=scrapedtitle , fulltitle=fulltitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , extra=scrapedtitle , viewmode="movie", show=scrapedtitle, context="4|5",fanart="http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg") ) return itemlist
def episodios(item): logger.info("[shurweb.py] episodios") url = item.url # Descarga la página data = scrapertools.cachePage(url) item = detalle_programa(item,data) # Extrae las entradas ''' <li> <div class="video"> <a class="video_title" href="http://www.shurweb.es/videos/alcatraz-1x10/">Alcatraz 1x10</a> </div> </li> ''' patron = '<li>[^<]+' patron += '<div class="video">[^<]+' patron += '<a class="video_title" href="([^"]+)">([^<]+)</a>' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for url,title in matches: scrapedtitle = title fulltitle = scrapedtitle scrapedplot = item.plot scrapedurl = url scrapedthumbnail = item.thumbnail if DEBUG: logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action='findvideos', title=scrapedtitle , fulltitle=fulltitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , extra=scrapedtitle , show=item.show, context="4|5",fanart="http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg", viewmode="movie_with_plot") ) if config.get_platform().startswith("xbmc") or config.get_platform().startswith("boxee"): itemlist.append( Item(channel=item.channel, title="Añadir esta serie a la biblioteca de XBMC", url=item.url, action="add_serie_to_library", extra="episodios", show=item.show) ) itemlist.append( Item(channel=item.channel, title="Descargar todos los episodios de la serie", url=item.url, action="download_all_episodes", extra="episodios", show=item.show)) return itemlist
def parsewebcategorias(params,url,category): logger.info("[redestv.py] buscacategorias") data = scrapertools.cachePage("http://www.redes-tv.com/index.php?option=com_xmap&sitemap=1&Itemid=31") #href='http://www.redestv.com/category/arte/' title="ARTE">ARTE</a></li><li><a #href="/index.php?option=com_content&view=category&layout=blog&id=1&Itemid=9" title="Biotecnolog\xc3\xada y Salud" patronvideos = "index.php." + url + '(.*?)</ul>' #patronvideos=patronvideos.replace("&","\&") #patronvideos=patronvideos.replace(";","\;") #patronvideos=patronvideos.replace("=","\=") #patronvideos=patronvideos.replace("_","\_") #logger.info(patronvideos) #logger.info("web"+data) matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) if len(matches)>0: #href="/index.php?option=com_content&view=article&id=65:473-farmacos-para-las-emociones&catid=1:biosalud&Itemid=9" title="473: Fármacos para las emociones" patronvideos = 'href="(.+?)" title="(.+?)"' matches1 = re.compile(patronvideos).findall(matches[0]) for i in range(len(matches1)): #xbmctools.addnewvideo( CHANNELNAME , "buscavideos" , category, matches1[i][1] , matches1[i][0] , "thumbnail" , "") xbmctools.addnewvideo( CHANNELNAME , "buscavideos" , category , "redestv", matches1[i][1] , matches1[i][0] , "thumbnail" , "") xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )
def novedades(params,url,category): logger.info("[redestv.py] parseweb") # ------------------------------------------------------ # Descarga la página # ------------------------------------------------------ data = scrapertools.cachePage(url) #logger.info(data) #<div style="text-align: justify;">Cre?amos que el ser humano era el ?nico animal capaz de sentir empat?a. Sin embargo, el altruismo existe en muchos otros animales. Estar conectado con los dem?s, entenderlos y sentir su dolor no es exclusivo del ser humano. El prim?tologo Frans de Waal, gran estudiador de las emociones animales, habla con Punset sobre empat?a y simpat?a, capacidades clave para el ?xito en la vida social.</div><div class="jcomments-links"> <a href="/index.php?option=com_content&view=article&id=161:501-nuestro-cerebro-altruista&catid=2:cermen&Itemid=10#addcomments" class="comment-link">Escribir un comentario</a></div> patronvideos = '<td class="contentheading" width="100%">.+?<a href="(.+?)" class="contentpagetitle">\s+(\d+.+?)</a>' #patronvideos = '<div style="text-align: justify;">.+?</div>.+?<a href="(.+?)#' #logger.info("web"+data) matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) #xbmctools.addnewfolder( CHANNELNAME , "buscavideos" , category, "redestv" , "http://www.redes-tv.com"+matches[0][0] , "" , "") #scrapertools.printMatches(matches) # patronvideos1 = 'src="http://www.megavideo.com/v/(.{8}).+?".+?></embed>.*?<p>(.+?)</p><div' # matches1 = re.compile(patronvideos1,re.DOTALL).findall(data) # if DEBUG: # scrapertools.printMatches(matches1) for i in range(len(matches)): xbmctools.addnewvideo( CHANNELNAME , "buscavideos" , category , "redestv" , matches[i][1] , matches[i][0] , "thumbnail" , "") xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )
def listaletra(item): logger.info("[cineblog01.py] listaaz") itemlist = [] data = anti_cloudflare(item.url) # Narrow search by selecting only the combo bloque = scrapertools.get_match(data, '<option value=\'-1\'>Anime per Lettera</option>(.*?)</select>') # The categories are the options for the combo patron = '<option value="([^"]+)">(\([^<]+)\)</option>' matches = re.compile(patron, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) for url, titulo in matches: scrapedtitle = titulo scrapedurl = url scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "]") itemlist.append( Item(channel=__channel__, action="listanime", title=scrapedtitle, url=sitoanime + scrapedurl, thumbnail="http://www.justforpastime.net/uploads/3/8/1/5/38155083/273372_orig.jpg", plot=scrapedplot)) return itemlist
def peliculas(item): logger.info("streamondemand.scambioetico peliculas") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url,headers=headers,timeout=95) # Extrae las entradas (carpetas) #patron = '<td class=\'row4\'>\s*<a href="(.*?)"[^>]+>(.*?)</a>' patron = '<a href="(.*?)" title="discussione inviata[^>]+>(.*?)</a>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle in matches: scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) url = scrapedurl url = url.replace("&", "&") scrapedplot = "" scrapedthumbnail = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+url+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="play", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]"+scrapedtitle+"[/COLOR]" , url=url , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) # Extrae el paginador patronvideos = ']</b> <a href=\'(.*?)\'>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: url = urlparse.urljoin(item.url,matches[0]) url = url.replace("&", "&") itemlist.append( Item(channel=__channel__, action="peliculas", title="[COLOR orange]Successivo>>[/COLOR]" , url=url , thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png", folder=True) ) return itemlist
def peliculas(item): logger.info("streamondemand.liberostreaming peliculas") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url, headers=headers) # Extrae las entradas (carpetas) patron = '<div class="entry-thumbnails"><a class=[^=]+="(.*?)"><img[^s]+src="(.*?)" class=[^=]+="(.*?)" title="(.*?)"/>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl,scrapedthumbnail,scrapedplot,scrapedtitle in matches: scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle.replace("Streaming","")) scrapedplot = scrapertools.decodeHtmlentities(scrapedplot) #scrapedplot = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos", fulltitle=scrapedtitle, show=scrapedtitle, title=scrapedtitle, url=scrapedurl , thumbnail=scrapedthumbnail, plot=scrapedplot , folder=True) ) # Extrae el paginador patronvideos = '<a class="nextpostslink" rel="next" href="(.*?)">»</a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(item.url,matches[0]) itemlist.append( Item(channel=__channel__, action="peliculas", title="[COLOR orange]Avanti >>[/COLOR]" , url=scrapedurl , folder=True) ) return itemlist
def novedades_episodios(item): logger.info("[tusnovelas.py] novedades_episodios") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) ''' <div class="dia"> <img src="http://img.tusnovelas.com/por-ella-soy-eva.jpg" width="70px" height="105px" align="right"/> <div class="dia-titulo"><a href="http://www.tusnovelas.com/capitulo/por-ella-soy-eva-95.html" class="tts">Por ella soy eva 95</a></div> <div class="dia-lista"><a href="por-ella-soy-eva.html">Lista de capítulos</a></div> 30/06/2012<br /><br /> ''' patron = '<div class="dia">[^<]+' patron += '<img src="([^"]+)"[^<]+' patron += '<div class="dia-titulo"><a href="([^"]+)"[^>]+>([^<]+)</a></div>' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for scrapedthumbnail,url,scrapedtitle in matches: scrapedplot = "" scrapedurl = urlparse.urljoin(item.url,url) if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) return itemlist
def searchlist(item): logger.info("[newhd.py] searchlist") # Descarga la página data = scrapertools.cachePage(item.url) # Extrae las entradas patronvideos = 'class="newstitle">N°(.*?)</div></td>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: try: scrapedurl = re.compile(r'href="(.+?)"').findall(match)[0] except:continue try: scrapedtitle = re.compile(r'html" >(.+?)</a>').findall(match)[0] except: scrapedtitle = "untitle" try: scrapedthumbnail = urlparse.urljoin(item.url,re.compile(r'<img src="([^"]+)" width=').findall(match)[0]) except: scrapedthumbnail = "" try: scrapedplot = re.compile(r'(<div id=.+?)</div></td>').findall(match)[0] scrapedplot = re.sub("<[^>]+>"," ",scrapedplot).strip() except: scrapedplot = "" logger.info(scrapedtitle) # Añade al listado itemlist.append( Item(channel=__channel__, action="videos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot ,context='4' , folder=True) ) return itemlist
def peliculas(item): logger.info("streamondemand.cinestreaming01 peliculas") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url) # Extrae las entradas (carpetas) #patron = '<div class="boxim">\s*' patron = '<div class="box " id="post-.*?">.*?<a href="(.*?)"><img class="boximg" src="http://cinestreaming01.com/wp-content/themes/Boxoffice/timthumb.php?src=(.*?)&h=270&w=180&zc=1" alt=""/></a>\s*' patron += '<h2><a href=".*?" rel="bookmark" title=".*?">(.*?)</a></h2>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl,scrapedthumbnail,scrapedtitle in matches: #scrapedtitle=scrapertools.decodeHtmlentities(scrapedtitle.replace("Streaming e download ita ","")) if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]"+scrapedtitle+"[/COLOR]" , url=scrapedurl , thumbnail=scrapedthumbnail, folder=True) ) # Extrae el paginador patronvideos = '<span class="pnext"><a href="(.*?)">Avanti</a></span>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: scrapedurl = urlparse.urljoin(item.url,matches[0]) itemlist.append( Item(channel=__channel__, action="peliculas", title="[COLOR orange]Successivo>>[/COLOR]" , url=scrapedurl , thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png", folder=True) ) return itemlist
def menuanyos(item): logger.info("[cineblog01.py] menuvk") itemlist = [] data = anti_cloudflare(item.url) # Narrow search by selecting only the combo bloque = scrapertools.get_match(data, '<select name="select3"(.*?)</select>') # The categories are the options for the combo patron = '<option value="([^"]+)">([^<]+)</option>' matches = re.compile(patron, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) for url, titulo in matches: scrapedtitle = titulo scrapedurl = urlparse.urljoin(item.url, url) scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="peliculasrobalo", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) return itemlist
def alfa(item): logger.info("[newhd.py] alfa") # Descarga la página data = scrapertools.cachePage(item.url) # Extrae las entradas patronvideos = '<a href="([^"]+)" class="blue">(.)</a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedurl = scrapedurl.replace("&","&") scrapedtitle = match[1] scrapedthumbnail = "" scrapedplot = "" logger.info(scrapedtitle) # Añade al listado itemlist.append( Item(channel=__channel__, action="novedades", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) return itemlist
def animegenere(item): logger.info("[cineblog01.py] animegenere") itemlist = [] data = anti_cloudflare(item.url) # Narrow search by selecting only the combo bloque = scrapertools.get_match(data, '<select name="select2"(.*?)</select>') # The categories are the options for the combo patron = '<option value="([^"]+)">([^<]+)</option>' matches = re.compile(patron, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle in matches: scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) if DEBUG: logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "]") itemlist.append( Item(channel=__channel__, action="listanime", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=sitoanime + scrapedurl)) return itemlist
def peliculas(item): logger.info("pelisalacarta.channels.oranline peliculas") itemlist = [] # Descarga la página data = get_main_page(item.url) # Extrae las entradas (carpetas) ''' <div class="review-box review-box-compact" style="width: 140px;"> <!--Begin Image1--> <div class="post-thumbnail"> <a href="http://www.oranline.com/pelicula/metro-manila-2013-ver-online-y-descargar-gratis/" title="Metro Manila (2013) Ver Online Y Descargar Gratis"> <img src="http://www.oranline.com/wp-content/uploads/2013/10/metro-manila-140x210.jpg" alt="Metro Manila (2013) Ver Online Y Descargar Gratis" /> </a> <div id="mejor_calidad"> <a href="http://www.oranline.com/pelicula/metro-manila-2013-ver-online-y-descargar-gratis/" title="Metro Manila (2013) Ver Online Y Descargar Gratis"><img id="espanol" src="http://www.oranline.com/wp-content/themes/reviewit/images/HD-R_calidad.png" class="idiomas" alt="Metro Manila (2013) Ver Online Y Descargar Gratis" /> </a> <span>HD-R</span></div> </div> <!--End Image--> <div class="review-box-text"> <h2><a href="http://www.oranline.com/pelicula/metro-manila-2013-ver-online-y-descargar-gratis/" title="Metro Manila (2013) Ver Online Y Descargar Gratis">Metro Manila (2013) Ver Online...</a></h2> <p>Sinopsis Buscando un futuro mejor, Óscar Ramírez y su familia dejan los campos de arroz del norte ...</p> </div> <div id="campos_idiomas"> <img id="espanol" src="http://www.oranline.com/wp-content/themes/reviewit/images/s.png" class="idiomas" alt="" /> <img id="latino" src="http://www.oranline.com/wp-content/themes/reviewit/images/lx.png" class="idiomas" alt="" /> <img id="ingles" src="http://www.oranline.com/wp-content/themes/reviewit/images/ix.png" class="idiomas" alt="" /> <img id="vose" src="http://www.oranline.com/wp-content/themes/reviewit/images/vx.png" class="idiomas" alt="" /> </div> </div> <div class="clear"></div> ''' patron = '<div class="review-box.*?' patron += '<a href="([^"]+)" title="([^"]+)"[^<]+' patron += '<img src="([^"]+)"[^<]+' patron += '</a[^<]+' patron += '<div id="mejor_calidad"[^<]+' patron += '<a[^<]+<img[^<]+' patron += '</a[^<]+' patron += '<span>([^<]+)</span></div[^<]+' patron += '</div[^<]+' patron += '<![^<]+' patron += '<div class="review-box-text"[^<]+' patron += '<h2[^<]+<a[^<]+</a></h2[^<]+' patron += '<p>([^<]+)</p[^<]+' patron += '</div[^<]+' patron += '<div id="campos_idiomas">(.*?)</div>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle, scrapedthumbnail, calidad, scrapedplot, idiomas in matches: scrapedtitle = scrapedtitle.replace("Ver Online Y Descargar Gratis", "").strip() scrapedtitle = scrapedtitle.replace("Ver Online Y Descargar gratis", "").strip() scrapedtitle = scrapedtitle.replace("Ver Online Y Descargar", "").strip() title = scrapedtitle + " (" + calidad + ") (" if "s.png" in idiomas: title = title + "ESP," if "l.png" in idiomas: title = title + "LAT," if "i.png" in idiomas: title = title + "ING," if "v.png" in idiomas: title = title + "VOSE," title = title[:-1] + ")" url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = scrapedplot.strip() if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=__channel__, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot, viewmode="movies_with_plot", folder=True)) try: next_page = scrapertools.get_match(data, "<a href='([^']+)'>\&rsaquo\;</a>") itemlist.append( Item(channel=__channel__, action="peliculas", title=">> Página siguiente", url=urlparse.urljoin(item.url, next_page), folder=True)) except: try: next_page = scrapertools.get_match( data, "<span class='current'>\d+</span><a href='([^']+)'") itemlist.append( Item(channel=__channel__, action="peliculas", title=">> Página siguiente", url=urlparse.urljoin(item.url, next_page), folder=True)) except: pass pass return itemlist
def forum(item): logger.info("[mcanime.py] forum") itemlist = [] # Descarga la p·gina data = scrapertools.cache_page(item.url) #logger.info(data) # Extrae las entradas del foro (series / pelis) patronvideos = '<ul class="topic_row">[^<]+<li class="topic_type"><img.*?' patronvideos += '<li class="topic_title"><h5><a href="([^"]+)">([^<]+)</a>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Extrae try: scrapedtitle = unicode(match[1], "utf-8").encode("iso-8859-1") except: scrapedtitle = match[1] scrapedurl = urlparse.urljoin(item.url, match[0].replace("&", "&")) scrapedthumbnail = "" scrapedplot = "" # Depuracion if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # AÒade al listado de XBMC itemlist.append( Item(channel=__channel__, action='forumdetail', title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) # Extrae la siguiente p·gina patronvideos = '<a href="([^"]+)" class="next">(Siguiente »)</a>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: scrapedtitle = "P·gina siguiente" scrapedurl = urlparse.urljoin(item.url, match[0].replace("&", "&")) scrapedthumbnail = "" scrapedplot = "" # Depuracion if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # AÒade al listado de XBMC itemlist.append( Item(channel=__channel__, action='forum', title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) return itemlist
def home(item): logger.info("[mcanime.py] home") itemlist = [] # Descarga la p·gina data = scrapertools.cache_page(item.url) #logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<div class="release" style="background-image.url\(\'([^\']+)\'\)\;">[^<]+' patronvideos += '<h4>([^<]+)<a href="([^"]+)">([^<]+)</a> <span class="date">([^<]+)</span></h4>[^<]+' patronvideos += '<div class="rimg"><img src="([^"]+)"[^>]+></div>[^<]+' patronvideos += '<div class="rtext">(.*?)</div>[^<]+' patronvideos += '<div class="rfinfo">(.*?)</div>[^<]+' patronvideos += '<div class="rflinks">(.*?)</div>[^<]+' patronvideos += '<div class="rinfo">(.*?)</div>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: if match[0].endswith("anime.gif"): scrapedtitle = match[3].strip() + " " + match[1].strip( ) + " (" + match[4] + ")" scrapedurl = urlparse.urljoin(item.url, match[2]) scrapedthumbnail = urlparse.urljoin(item.url, match[5]) scrapedplot = scrapertools.htmlclean(match[6]) scrapedextra = match[8] scrapedtitle = scrapedtitle.replace("[CR]", " CR ") if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action='findvideos', title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, extra=scrapedextra, folder=True)) # Extrae la marca de siguiente p·gina patronvideos = '<span class="next"><a href="([^"]+)">Anteriores</a>...</span>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = "P·gina siguiente" scrapedurl = urlparse.urljoin(item.url, matches[0]) scrapedthumbnail = "" scrapedplot = "" itemlist.append( Item(channel=__channel__, action='home', title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) return itemlist
def listvideos(item): logger.info("[guaridavalencianista.py] listvideos") itemlist = [] scrapedthumbnail = "" scrapedplot = "" # Descarga la página data = scrapertools.cache_page(item.url) patronvideos = "<h3 class='post-title entry-title'[^<]+" patronvideos += "<a href='([^']+)'>([^<]+)</a>.*?" patronvideos += "<div class='post-body entry-content'(.*?)<div class='post-footer'>" matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[1] scrapedtitle = re.sub("<[^>]+>", " ", scrapedtitle) scrapedtitle = scrapertools.unescape(scrapedtitle)[1:-1] scrapedurl = match[0] regexp = re.compile(r'src="(http[^"]+)"') matchthumb = regexp.search(match[2]) if matchthumb is not None: scrapedthumbnail = matchthumb.group(1) matchplot = re.compile('<div align="center">(<img.*?)</span></div>', re.DOTALL).findall(match[2]) if len(matchplot) > 0: scrapedplot = matchplot[0] #print matchplot else: scrapedplot = "" scrapedplot = re.sub("<[^>]+>", " ", scrapedplot) scrapedplot = scrapertools.unescape(scrapedplot) if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC #xbmctools.addnewfolder( __channel__ , "findevi" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) itemlist.append( Item(channel=__channel__, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) # Extrae la marca de siguiente página patronvideos = "<a class='blog-pager-older-link' href='([^']+)'" matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = "Página siguiente" scrapedurl = urlparse.urljoin(item.url, matches[0]) scrapedthumbnail = "" scrapedplot = "" itemlist.append( Item(channel=__channel__, action="listvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) return itemlist
def elenco(item): logger.info("filmhdstreaming elenco") itemlist = [] data = scrapertools.cache_page(item.url) patron = 'id="box_movies">(.*?)id="containerpage"' filtro = scrapertools.find_single_match(data, patron) patron = 'class="movie">[^>]+><a href="(.*?)"><img src="(.*?)".*?<h2>(.*?)<\/h2>' matches = scrapertools.find_multiple_matches(filtro, patron) for scrapedurl, scrapedimg, scrapedtitle in matches: logger.info("Url:" + scrapedurl + " thumbnail:" + scrapedimg + " title:" + scrapedtitle) title = scrapedtitle.split("(")[0] itemlist.append( infoSod( Item(channel=item.channel, action="findvideos", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", fulltitle=scrapedtitle, url=scrapedurl, thumbnail=scrapedimg, fanart=""))) # Paginazione # =========================================================== patron = '<div class="pagination dark">(.*?)</div>' paginazione = scrapertools.find_single_match(data, patron) if item.extra == "genere": patron = 'class="page dark active">.*?</a>.*?href=\'(.*?)\'>' else: patron = 'class="page dark active">.*?</a>.*?href="(.*?)">' matches = re.compile(patron, re.DOTALL).findall(paginazione) scrapertools.printMatches(matches) # =========================================================== if len(matches) > 0: paginaurl = matches[0] if item.extra == "genere": url = scrapertools.find_single_match( item.url, "http://(.*?)/(.*?)/(.*?)\/.*") ind = "http://" + url[0] + "/" + url[1] + "/" + url[ 2] + "/" + paginaurl itemlist.append( Item(channel=item.channel, action="elenco", title=AvantiTxt, url=ind, thumbnail=AvantiImg, folder=True)) else: itemlist.append( Item(channel=item.channel, action="elenco", title=AvantiTxt, url=host + paginaurl, thumbnail=AvantiImg, folder=True)) itemlist.append( Item(channel=item.channel, action="HomePage", title=HomeTxt, thumbnail=ThumbnailHome, folder=True)) return itemlist
# -*- coding: utf-8 -*-
def series(item, paginacion=True): logger.info("[serieonline.py] series") # Descarga la página data = scrapertools.cachePage(item.url) # Extrae las entradas ''' <div class="series"> <div class="mostrar-series-imagen"> <a href="http://www.serieonline.net/90210/"> <img src="http://www.serieonline.net/imagenes/90210.jpg" alt="90210" height="155" width="200" /> </a> </div> <div class="mostrar-series-texto"> <a href="http://www.serieonline.net/90210/">90210</a> </div> </div> ''' patronvideos = '<div class="series">[^<]+' patronvideos += '<div class="mostrar-series-imagen">[^<]+' patronvideos += '<a href="([^"]+)">[^<]+' patronvideos += '<img src="([^"]+)"[^<]+' patronvideos += '</a>[^<]+' patronvideos += '</div>[^<]+' patronvideos += '<div class="mostrar-series-texto">[^<]+' patronvideos += '<a href="[^>]+>([^<]+)</a>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: scrapedtitle = unicode(match[2], "iso-8859-1", errors="replace").encode("utf-8") scrapedplot = "" scrapedurl = urlparse.urljoin(item.url, match[0]) scrapedthumbnail = urlparse.urljoin(item.url, match[1]) if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # Añade al listado de XBMC itemlist.append( Item(channel=__channel__, action="episodios", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, show=scrapedtitle, viewmode="movie", folder=True)) # Extrae el paginador patronvideos = '<div class="paginacion-num">\d+</div><div class="paginacion-num"><a href="([^"]+)">' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = ">> Página siguiente" scrapedurl = urlparse.urljoin(item.url, matches[0]) newitem = Item(channel=__channel__, action="series", title=scrapedtitle, url=scrapedurl, folder=True) if paginacion: itemlist.append(newitem) else: itemlist.extend(series(newitem, paginacion)) return itemlist
def getlist(item): logger.info("pelisalacarta.mejortorrent seriesydocs") itemlist = [] data = scrapertools.cachePage(item.url) # pelis # <a href="/peli-descargar-torrent-9578-Presentimientos.html"> # <img src="/uploads/imagenes/peliculas/Presentimientos.jpg" border="1"></a # # series # #<a href="/serie-descargar-torrents-11589-11590-Ahora-o-nunca-4-Temporada.html"> #<img src="/uploads/imagenes/series/Ahora o nunca4.jpg" border="1"></a> # # docs # #<a href="/doc-descargar-torrent-1406-1407-El-sueno-de-todos.html"> #<img border="1" src="/uploads/imagenes/documentales/El sueno de todos.jpg"></a> if item.url.find("peliculas") > -1: patron = '<a href="(/peli-descargar-torrent[^"]+)">[^<]+' patron += '<img src="([^"]+)"[^<]+</a>' patron_enlace = "/peli-descargar-torrent-\d+(.*?)\.html" patron_title = '<a href="/peli-descargar-torrent[^"]+">([^<]+)</a>(\s*<b>([^>]+)</b>)?' action = "show_movie_info" folder = True extra = "" elif item.url.find("series-letra") > -1: patron = "<a href='(/serie-descargar-torrent[^']+)'>()" patron_enlace = "/serie-descargar-torrents-\d+-\d+-(.*?)\.html" patron_title = '<a href="/serie-descargar-torrent[^"]+">([^<]+)</a>(\s*<b>([^>]+)</b>)?' action = "episodios" folder = True extra = "series" elif item.url.find("series") > -1: patron = '<a href="(/serie-descargar-torrent[^"]+)">[^<]+' patron += '<img src="([^"]+)"[^<]+</a>' patron_enlace = "/serie-descargar-torrents-\d+-\d+-(.*?)\.html" patron_title = '<a href="/serie-descargar-torrent[^"]+">([^<]+)</a>(\s*<b>([^>]+)</b>)?' action = "episodios" folder = True extra = "series" else: patron = '<a href="(/doc-descargar-torrent[^"]+)">[^<]+' patron += '<img src="([^"]+)"[^<]+</a>' patron_enlace = "/doc-descargar-torrent-\d+-\d+-(.*?)\.html" patron_title = '<a href="/doc-descargar-torrent[^"]+">([^<]+)</a>(\s*<b>([^>]+)</b>)?' action = "episodios" folder = True extra = "docus" matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedthumbnail in matches: title = scrapertools.get_match(scrapedurl, patron_enlace) title = title.replace("-", " ") url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, urllib.quote(scrapedthumbnail)) plot = "" logger.debug("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=item.channel, action=action, title=title, url=url, thumbnail=thumbnail, plot=plot, folder=folder, extra=extra)) matches = re.compile(patron_title, re.DOTALL).findall(data) scrapertools.printMatches(matches) # Cambia el título sacado de la URL por un título con más información. # esta implementación asume que va a encontrar las mismas coincidencias # que en el bucle anterior, lo cual técnicamente es erróneo, pero que # funciona mientras no cambien el formato de la página cnt = 0 for scrapedtitle, notused, scrapedinfo in matches: title = re.sub( '\r\n', '', scrapedtitle).decode('iso-8859-1').encode('utf8').strip() if title.endswith('.'): title = title[:-1] info = scrapedinfo.decode('iso-8859-1').encode('utf8') if info != "": title = '{0} {1}'.format(title, info) itemlist[cnt].title = title cnt += 1 if len(itemlist) == 0: itemlist.append( Item(channel=item.channel, action="mainlist", title="No se ha podido cargar el listado")) else: # Extrae el paginador patronvideos = "<a href='([^']+)' class='paginar'> Siguiente >>" matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item(channel=item.channel, action="getlist", title="Pagina siguiente >>", url=scrapedurl, folder=True)) return itemlist
def episodios(item): logger.info("[kideos.py] episodios") itemlist = [] # Descarga la lista de canales data = scrapertools.cache_page(item.url) ''' <div id="VideoClip" style="width: 150px;"> <br/> <table width="150" border="0" cellpadding="0" cellspacing="0"> <tr> <td valign="bottom" height="70"> <a href="/video/oh-there-you-are-perry-phineas-and-ferb"> <span class="VideoTitles"> <h1>Oh, There You Are, Perry - Phineas and Ferb</h1> </span> </a> </td> </tr> <tr valign="top"> <td valign="top"> <div id="SearchThumbnail" style="background-position:-200px -224px;height:110px;width:140px;"> <a href="/video/oh-there-you-are-perry-phineas-and-ferb"> <h1> <img src="http://img.youtube.com/vi/sD8hqHSyYxw/0.jpg" width="132" height="99" hspace="4" vspace="4" /> </h1> </a> </div> ''' patron = '<div id="VideoClip"[^<]+' patron += '<br/>[^<]+' patron += '<table width="150" border="0" cellpadding="0" cellspacing="0">[^<]+' patron += '<tr>[^<]+' patron += '<td valign="bottom" height="70">[^<]+' patron += '<a href="([^"]+)">[^<]+' patron += '<span class="VideoTitles">[^<]+' patron += '<h1>([^<]+)</h1>[^<]+' patron += '</span>[^<]+' patron += '</a>[^<]+' patron += '</td>[^<]+' patron += '</tr>[^<]+' patron += '<tr valign="top">[^<]+' patron += '<td valign="top">[^<]+' patron += '<div id="SearchThumbnail"[^<]+' patron += '<a[^<]+' patron += '<h1>[^<]+' patron += '<img src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for url, title, thumbnail in matches: scrapedtitle = title scrapedurl = urlparse.urljoin(item.url, url) scrapedthumbnail = thumbnail scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle, action="play", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, show=scrapedtitle, folder=False)) return itemlist
def episodios(item): logger.info("pelisalacarta.mejortorrent episodios") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) total_capis = scrapertools.get_match( data, "<input type='hidden' name='total_capis' value='(\d+)'>") tabla = scrapertools.get_match( data, "<input type='hidden' name='tabla' value='([^']+)'>") titulo = scrapertools.get_match( data, "<input type='hidden' name='titulo' value='([^']+)'>") item.thumbnail = scrapertools.find_single_match( data, "src='http://www\.mejortorrent\.com(/uploads/imagenes/" + tabla + "/[a-zA-Z0-9_ ]+.jpg)'") item.thumbnail = 'http://www.mejortorrent.com' + urllib.quote( item.thumbnail) #<form name='episodios' action='secciones.php?sec=descargas&ap=contar_varios' method='post'> data = scrapertools.get_match( data, "<form name='episodios' action='secciones.php\?sec=descargas\&ap=contar_varios' method='post'>(.*?)</form>" ) ''' <td bgcolor='#C8DAC8' style='border-bottom:1px solid black;'><a href='/serie-episodio-descargar-torrent-18741-Juego-de-tronos-4x01.html'>4x01 - Episodio en V.O. Sub Esp.</a></td> <td width='120' bgcolor='#C8DAC8' align='right' style='border-right:1px solid black; border-bottom:1px solid black;'><div style='color:#666666; font-size:9px; margin-right:5px;'>Fecha: 2014-04-07</div></td> <td width='60' bgcolor='#F1F1F1' align='center' style='border-bottom:1px solid black;'> <input type='checkbox' name='episodios[1]' value='18741'> ''' if item.extra == "series": patron = "<td bgcolor[^>]+><a[^>]+>([^>]+)</a></td>[^<]+" else: patron = "<td bgcolor[^>]+>([^>]+)</td>[^<]+" patron += "<td[^<]+<div[^>]+>Fecha: ([^<]+)</div></td>[^<]+" patron += "<td[^<]+" patron += "<input type='checkbox' name='([^']+)' value='([^']+)'" matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) tmdb_title = re.sub( r'(\s*-\s*)?\d+.*?\s*Temporada|(\s*-\s*)?\s*Miniserie\.?|\(.*\)|\[.*\]', '', item.title).strip() logger.debug('pelisalacarta.mejortorrent episodios tmdb_title=' + tmdb_title) if item.extra == "series": oTmdb = Tmdb(texto_buscado=tmdb_title.strip(), tipo='tv', idioma_busqueda="es") else: oTmdb = Tmdb(texto_buscado=tmdb_title.strip(), idioma_busqueda="es") for scrapedtitle, fecha, name, value in matches: scrapedtitle = scrapedtitle.strip() if scrapedtitle.endswith('.'): scrapedtitle = scrapedtitle[:-1] title = scrapedtitle + " (" + fecha + ")" url = "http://www.mejortorrent.com/secciones.php?sec=descargas&ap=contar_varios" #"episodios%5B1%5D=11744&total_capis=5&tabla=series&titulo=Sea+Patrol+-+2%AA+Temporada" post = urllib.urlencode({ name: value, "total_capis": total_capis, "tabla": tabla, "titulo": titulo }) logger.debug("post=" + post) if item.extra == "series": epi = scrapedtitle.split("x") # Sólo comprobar Tmdb si el formato es temporadaXcapitulo if len(epi) > 1: temporada = re.sub("\D", "", epi[0]) capitulo = re.sub("\D", "", epi[1]) epi_data = oTmdb.get_episodio(temporada, capitulo) logger.debug("epi_data=" + str(epi_data)) if epi_data: item.thumbnail = epi_data["temporada_poster"] item.fanart = epi_data["episodio_imagen"] item.plot = epi_data["episodio_sinopsis"] epi_title = epi_data["episodio_titulo"] if epi_title != "": title = scrapedtitle + " " + epi_title + " (" + fecha + ")" else: try: item.fanart = oTmdb.get_backdrop() except: pass item.plot = oTmdb.get_sinopsis() logger.debug("title=[" + title + "], url=[" + url + "], item=[" + str(item) + "]") itemlist.append( Item(channel=item.channel, action="play", title=title, url=url, thumbnail=item.thumbnail, plot=item.plot, fanart=item.fanart, extra=post, folder=False)) return itemlist
def episodios(item): itemlist = [] encontrados = set() data = scrapertools.cache_page(item.url) # 1 bloque = scrapertools.find_single_match(data, '<h4 style="text-align: center(.*?)</h4>') patron = '<a href="(.*?)" target="_blank">(.*?)</a>' matches = re.compile(patron, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle in matches: if scrapedurl in encontrados: continue encontrados.add(scrapedurl) scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) scrapedtitle = re.sub(r'<[^>]*?>', '', scrapedtitle) scrapedtitle = '[COLOR azure][B]' + scrapedtitle + '[/B][/COLOR]' itemlist.append( Item(channel=__channel__, action="findvideo", title=scrapedtitle, url=scrapedurl, fulltitle=scrapedtitle, show=scrapedtitle, plot=item.plot, fanart=item.thumbnail, thumbnail=item.thumbnail)) # 2 patron = '<strong><a href="(.*?)" target="_blank">([^<]+)<.*?a></strong>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle in matches: if scrapedurl in encontrados: continue encontrados.add(scrapedurl) scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) scrapedtitle = re.sub(r'<[^>]*?>', '', scrapedtitle) scrapedtitle = '[COLOR pink][B]' + scrapedtitle + '[/B][/COLOR]' itemlist.append( Item(channel=__channel__, action="findvideo", title=scrapedtitle, url=scrapedurl, fulltitle=scrapedtitle, show=scrapedtitle, plot=item.plot, fanart=item.thumbnail, thumbnail=item.thumbnail)) # 3 patron = '<a style="color: #ff0000;" href="(.*?)" target="_blank">(.*?)</a>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle in matches: if scrapedurl in encontrados: continue encontrados.add(scrapedurl) scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) scrapedtitle = re.sub(r'<[^>]*?>', '', scrapedtitle) scrapedtitle = '[COLOR greenyellow][B]' + scrapedtitle + '[/B][/COLOR]' itemlist.append( Item(channel=__channel__, action="findvideo", title=scrapedtitle, url=scrapedurl, fulltitle=scrapedtitle, show=scrapedtitle, plot=item.plot, fanart=item.thumbnail, thumbnail=item.thumbnail)) # 4 patron = '<h4 style="text-align: center;"><a href="(.*?)" target="_blank"><strong>(.*?)</b>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle in matches: if scrapedurl in encontrados: continue encontrados.add(scrapedurl) scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) scrapedtitle = re.sub(r'<[^>]*?>', '', scrapedtitle) scrapedtitle = '[COLOR cyan][B]' + scrapedtitle + '[/B][/COLOR]' itemlist.append( Item(channel=__channel__, action="findvideo", title=scrapedtitle, url=scrapedurl, fulltitle=scrapedtitle, show=scrapedtitle, plot=item.plot, fanart=item.thumbnail, thumbnail=item.thumbnail)) # 5 - mob psycho 100 patron = '<h4 style="text-align: center;".*?<a href="(.*?)" target="_blank">(.*?)</h4>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle in matches: if scrapedurl in encontrados: continue encontrados.add(scrapedurl) scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) scrapedtitle = re.sub(r'<[^>]*?>', '', scrapedtitle) scrapedtitle = '[COLOR bisque][B]' + scrapedtitle + '[/B][/COLOR]' itemlist.append( Item(channel=__channel__, action="findvideo", title=scrapedtitle, url=scrapedurl, fulltitle=scrapedtitle, show=scrapedtitle, plot=item.plot, fanart=item.thumbnail, thumbnail=item.thumbnail)) # Comandi di servizio if config.get_library_support() and len(itemlist) != 0: itemlist.append( Item(channel=__channel__, title="Aggiungi " + item.title + " alla libreria", url=item.url, action="add_serie_to_library", extra="episodios", show=item.show)) itemlist.append( Item(channel=item.channel, title="Scarica tutti gli episodi della serie", url=item.url, action="download_all_episodes", extra="episodios", show=item.show)) return itemlist
def buscador(item): logger.info("pelisalacarta.mejortorrent buscador") itemlist = [] data = scrapertools.cachePage(item.url) # pelis # <a href="/peli-descargar-torrent-9578-Presentimientos.html"> # <img src="/uploads/imagenes/peliculas/Presentimientos.jpg" border="1"></a # # series # #<a href="/serie-descargar-torrents-11589-11590-Ahora-o-nunca-4-Temporada.html"> #<img src="/uploads/imagenes/series/Ahora o nunca4.jpg" border="1"></a> # # docs # #<a href="/doc-descargar-torrent-1406-1407-El-sueno-de-todos.html"> #<img border="1" src="/uploads/imagenes/documentales/El sueno de todos.jpg"></a> #busca series patron = "<a href='(/serie-descargar-torrent[^']+)'[^>]+>(.*?)</a>" patron += ".*?<span style='color:gray;'>([^']+)</span>" patron_enlace = "/serie-descargar-torrents-\d+-\d+-(.*?)\.html" matches = scrapertools.find_multiple_matches(data, patron) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle, scrapedinfo in matches: title = scrapertools.remove_htmltags(scrapedtitle).decode( 'iso-8859-1').encode('utf8') + ' ' + scrapedinfo.decode( 'iso-8859-1').encode('utf8') url = urlparse.urljoin(item.url, scrapedurl) logger.debug("title=[" + title + "], url=[" + url + "]") itemlist.append( Item(channel=item.channel, action="episodios", title=title, url=url, folder=True, extra="series", viewmode="movie_with_plot")) #busca pelis patron = "<a href='(/peli-descargar-torrent-[^']+)'[^>]+>(.*?)</a>" patron_enlace = "/peli-descargar-torrent-\d+(.*?)\.html" matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle in matches: title = scrapertools.remove_htmltags(scrapedtitle).decode( 'iso-8859-1').encode('utf-8') url = urlparse.urljoin(item.url, scrapedurl) logger.debug("title=[" + title + "], url=[" + url + "]") itemlist.append( Item(channel=item.channel, action="play", title=title, url=url, folder=False, extra="")) #busca docu patron = "<a href='(/doc-descargar-torrent[^']+)' .*?" patron += "<font Color='darkblue'>(.*?)</font>.*?" patron += "<td align='right' width='20%'>(.*?)</td>" patron_enlace = "/doc-descargar-torrent-\d+-\d+-(.*?)\.html" matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle, scrapedinfo in matches: title = scrapedtitle.decode('iso-8859-1').encode( 'utf8') + " " + scrapedinfo.decode('iso-8859-1').encode('utf8') url = urlparse.urljoin(item.url, scrapedurl) logger.debug("title=[" + title + "], url=[" + url + "]") itemlist.append( Item(channel=item.channel, action="episodios", title=title, url=url, folder=True, extra="docu", viewmode="movie_with_plot")) if len(itemlist) == 0: itemlist.append( Item(channel=item.channel, action="mainlist", title="No se han encontrado nada con ese término")) return itemlist
def peliculas(item): logger.info("[cinetux.py] peliculas") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) ''' <div style="width: 620px; padding: 0; margin-left: 10px;"><center><div id="post-18159"> <!--PELICULA--><div class="movielist textcenter"> <div id="titlecat"><a href="http://www.cinetux.org/2013/03/ver-pelicula-juramento-de-venganza-online-gratis-2009.html" rel="bookmark" title="Ver Película Juramento de Venganza Online Gratis (2009)"><img style="border: 1px solid #FDC101; padding: 1px;" width="130" height="190" src=http://1.bp.blogspot.com/_qNP_wQsK6pg/S4bJOWtjwII/AAAAAAAAALQ/3L0f3yP5c4g/s320/197276.jpg /> <div style="margin-top:2px;">Ver Película Juramen...</div> </a></div> <div style="margin-top:5px;margin-bottom:5px;"><span class="rating"><img src="http://www.cinetux.org/wp-content/plugins/wp-postratings/images/stars_crystal/rating_off.png" alt="0 votes, average: 0,00 out of 5" title="0 votes, average: 0,00 out of 5" class="post-ratings-image" /><img src="http://www.cinetux.org/wp-content/plugins/wp-postratings/images/stars_crystal/rating_off.png" alt="0 votes, average: 0,00 out of 5" title="0 votes, average: 0,00 out of 5" class="post-ratings-image" /><img src="http://www.cinetux.org/wp-content/plugins/wp-postratings/images/stars_crystal/rating_off.png" alt="0 votes, average: 0,00 out of 5" title="0 votes, average: 0,00 out of 5" class="post-ratings-image" /><img src="http://www.cinetux.org/wp-content/plugins/wp-postratings/images/stars_crystal/rating_off.png" alt="0 votes, average: 0,00 out of 5" title="0 votes, average: 0,00 out of 5" class="post-ratings-image" /><img src="http://www.cinetux.org/wp-content/plugins/wp-postratings/images/stars_crystal/rating_off.png" alt="0 votes, average: 0,00 out of 5" title="0 votes, average: 0,00 out of 5" class="post-ratings-image" /></span></div> <center><span class="linkcat"><a href="http://www.cinetux.org/genero/thriller" title="Ver todas las entradas en Thriller" rel="category tag">Thriller</a></span></center> </div> <!--FIN PELICULA--> </div><!-- POST META 18159 END --> </center></div> ''' # Extrae las entradas (carpetas) patron = '<!--PELICULA--><div class="movielist textcenter[^<]+' patron += '<div id="titlecat[^<]+<a href="([^"]+)" rel="bookmark" title="([^"]+)"><img style="[^"]+" width="[^"]+" height="[^"]+" src=(.*?) /[^<]+' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, title, thumbnail in matches: scrapedplot = "" scrapedthumbnail = thumbnail[:-2] scrapedtitle = title[14:] if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="findvideos", title=scrapedtitle, fulltitle=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, viewmode="movie", fanart= "http://pelisalacarta.mimediacenter.info/fanart/cinetux.jpg", folder=True)) ''' <div class="peli_item textcenter"> <div class="pelicula_img"><a href="http://www.cinetux.org/2013/01/ver-pelicula-la-matanza-de-texas-3d-online-gratis-2013.html"><img alt="" src="http://1.bp.blogspot.com/-Rg9cCqo9Akg/UWub17Y4jVI/AAAAAAAAFo0/jp-8bQVxTB4/s200/La+Masacre+De+Texas+3D.jpg" width="104" height="150" /></a></div> <div class="dvdrip"> </div> <p><span class="rosa">DVD-RIP</span><br /><span class="icos_lg"><img style="border: 0pt none;" alt="" src="http://4.bp.blogspot.com/-qVqs0f0dsoM/UVJ2-nPN6MI/AAAAAAAAB_8/NkYdkmM-uvY/s320/lat.png" /><img style="border: 0pt none;" alt="" src="http://3.bp.blogspot.com/-t8w6a8_Hk-w/TeA7nd5Ad9I/AAAAAAAADNI/UYV40sR_sfc/s16/online.png" /><img style="border: 0pt none;" alt="" src="https://lh5.googleusercontent.com/-35yef7ubBv8/TeA7nNfUXJI/AAAAAAAADM0/RCQqAiWLX9o/s16/descarga.png" /></span></p> <div class="calidad5"> </div> <p> </p> </div> ''' patron = '<div class="peli_item textcenter"[^<]+' patron += '<div class="pelicula_img"[^<]+' patron += '<a href="([^"]+)[^<]+<img alt="" src="([^"]+)".*?' patron += '<span class="rosa">(.*?)</span>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedthumbnail, calidad in matches: partes = scrapedurl.split("/") titulo = partes[len(partes) - 1] titulo = titulo.replace("ver-pelicula", "") titulo = titulo.replace("online-gratis", "") titulo = titulo.replace(".html", "") titulo = titulo.replace("-", " ") titulo = titulo.strip().capitalize() scrapedplot = "" scrapedtitle = titulo + " [" + calidad + "]" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="findvideos", title=scrapedtitle, fulltitle=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, viewmode="movie", fanart= "http://pelisalacarta.mimediacenter.info/fanart/cinetux.jpg", folder=True)) patron = '<div id="post-\d+">[^<]+' patron += '<div class="index_item index_item_ie"><a href="([^"]+)" rel="[^"]+" title="[^"]+"><img style="[^"]+" width="\d+" height="\d+" src=([^>]+)>[^<]+' patron += '<center><b>([^<]+)</b></center></a></div>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, thumbnail, title in matches: scrapedplot = "" scrapedthumbnail = thumbnail[:-2] scrapedtitle = title[14:] if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="findvideos", title=scrapedtitle, fulltitle=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, viewmode="movie", fanart= "http://pelisalacarta.mimediacenter.info/fanart/cinetux.jpg", folder=True)) # Extrae el paginador patronvideos = '<a href="([^"]+)"\s*><strong>\»\;</strong></a>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item(channel=__channel__, action="peliculas", title="Página siguiente >>", url=scrapedurl, fanart= "http://pelisalacarta.mimediacenter.info/fanart/cinetux.jpg", folder=True)) return itemlist
def programas(item): logger.info("[kideos.py] programas") itemlist = [] # Descarga la lista de canales item.url = MAIN_URL data = scrapertools.cache_page(item.url) ''' <div id="VideoClip"> <table width="100" border="0" cellpadding="0" cellspacing="0"> <tr> <td valign="bottom" height="70"> <a href="/cookie-monster"><span class="VideoTitles"><h1>Cookie Monster</h1></span></a> </td> </tr> <tr valign="top"> <td height="100" valign="top"> <div id="SearchThumbnail"> <a href="/cookie-monster"> <h1> <img src="http://img.youtube.com/vi/shbgRyColvE/0.jpg" width="100" alt="Cookie Monster" height="69" hspace="4" vspace="4" /> </h1> </a> </div> ''' patron = '<div id="VideoClip">[^<]+' patron += '<table width="100" border="0" cellpadding="0" cellspacing="0">[^<]+' patron += '<tr>[^<]+' patron += '<td[^<]+' patron += '<a href="([^"]+)"><span class="VideoTitles"><h1>([^>]+)</h1></span></a>[^<]+' patron += '</td>[^<]+' patron += '</tr>[^<]+' patron += '<tr valign="top">[^<]+' patron += '<td height="100" valign="top">[^<]+' patron += '<div id="SearchThumbnail">[^<]+' patron += '<a href="[^"]+">[^<]+' patron += '<h1>[^<]+' patron += '<img src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for url, title, thumbnail in matches: scrapedtitle = title scrapedurl = urlparse.urljoin(item.url, url) scrapedthumbnail = thumbnail scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle, action="episodios", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, show=scrapedtitle, folder=True)) return itemlist
def getlist(item): logger.info("[mejortorrent.py] seriesydocs") itemlist = [] data = scrapertools.cachePage(item.url) # pelis # <a href="/peli-descargar-torrent-9578-Presentimientos.html"> # <img src="/uploads/imagenes/peliculas/Presentimientos.jpg" border="1"></a # # series # #<a href="/serie-descargar-torrents-11589-11590-Ahora-o-nunca-4-Temporada.html"> #<img src="/uploads/imagenes/series/Ahora o nunca4.jpg" border="1"></a> # # docs # #<a href="/doc-descargar-torrent-1406-1407-El-sueno-de-todos.html"> #<img border="1" src="/uploads/imagenes/documentales/El sueno de todos.jpg"></a> if item.url.find("peliculas") > -1: patron = '<a href="(/peli-descargar-torrent[^"]+)">[^<]+' patron += '<img src="([^"]+)"[^<]+</a>' patron_enlace = "/peli-descargar-torrent-\d+(.*?)\.html" action = "play" folder = False extra = "" elif item.url.find("series-letra") > -1: patron = "<a href='(/serie-descargar-torrent[^']+)'>()" patron_enlace = "/serie-descargar-torrents-\d+-\d+-(.*?)\.html" action = "episodios" folder = True extra = "series" elif item.url.find("series") > -1: patron = '<a href="(/serie-descargar-torrent[^"]+)">[^<]+' patron += '<img src="([^"]+)"[^<]+</a>' patron_enlace = "/serie-descargar-torrents-\d+-\d+-(.*?)\.html" action = "episodios" folder = True extra = "series" else: patron = '<a href="(/doc-descargar-torrent[^"]+)">[^<]+' patron += '<img src="([^"]+)"[^<]+</a>' patron_enlace = "/doc-descargar-torrent-\d+-\d+-(.*?)\.html" action = "episodios" folder = True extra = "docus" matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedthumbnail in matches: title = scrapertools.get_match(scrapedurl, patron_enlace) title = title.replace("-", " ") url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, urllib.quote(scrapedthumbnail)) plot = "" if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=__channel__, action=action, title=title, url=url, thumbnail=thumbnail, plot=plot, folder=folder, extra=extra)) # Extrae el paginador patronvideos = "<a href='([^']+)' class='paginar'> Siguiente >>" matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item(channel=__channel__, action="getlist", title="Página siguiente >>", url=scrapedurl, folder=True)) return itemlist
def findvideos(item): logger.info("[cinetux.py] findvideos") itemlist = [] # Busca el argumento data = scrapertools.cache_page(item.url) logger.info("data=" + data) ''' <tr class="tabletr"> <td class="episode-server" align="left"><img src="http://www.cinetux.org/imagenes/veronline.png" alt="" width="22" height="22" />Opción 01</td> <td class="episode-server-img" align="center">PutLocker</td> <td class="episode-lang" align="center">Español</td> <td align="center">DVD-SCR</td> <td class="center" align="center"><a rel="nofollow" target="_blank" class="myButtonLink" href="http://www.putlocker.com/file/BADCD9ACA395E318"></a></td> <td align="center">Anónimo</td> </tr> ''' patron = '<tr class="tabletr">[^<]+' patron += '<td class="opcion-td"><img[^>]+>([^>]+)</td>[^<]+' patron += '<td class="server-td[^>]+>([^<]+)</td>[^<]+' patron += '<td class="idioma-td[^>]+>([^>]+)</td>[^<]+' patron += '<td class="calidad-td[^<]+</td>[^<]+' patron += '<td class="fuente-td[^>]+>([^<]+)</td>[^<]+' patron += '<td class="link-td">(.*?)</td>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedtitle, scrapedserver, scrapedlanguage, scrapedquality, scrapedlink in matches: title = "Ver " + scrapedtitle + " en " + scrapedserver + " (" + scrapedlanguage + ") (" + scrapedquality + ")" url = scrapedlink thumbnail = item.thumbnail plot = "" itemlist.append( Item(channel=__channel__, action="play", title=title, fulltitle=item.fulltitle + " [" + scrapedlanguage + "][" + scrapedquality + "]", url=url, thumbnail=thumbnail, plot=plot, fanart= "http://pelisalacarta.mimediacenter.info/fanart/cinetux.jpg", folder=False)) patron = '<tr class="tabletr">[^<]+' patron += '<td class="opcion-td"><img[^>]+>([^>]+)</td>[^<]+' patron += '<td class="server-td[^>]+>([^<]+)</td>[^<]+' patron += '<td class="idioma-td[^>]+>([^<]+)</td>[^<]+' patron += '<td class="fuente-td[^>]+>([^<]+)</td>[^<]+' patron += '<td class="link-td">(.*?)</td>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedtitle, scrapedserver, scrapedlanguage, scrapedquality, scrapedlink in matches: title = "Ver " + scrapedtitle + " en " + scrapedserver + " (" + scrapedlanguage + ") (" + scrapedquality + ")" url = scrapedlink thumbnail = item.thumbnail plot = "" itemlist.append( Item(channel=__channel__, action="play", title=title, fulltitle=item.fulltitle + " [" + scrapedlanguage + "][" + scrapedquality + "]", url=url, thumbnail=thumbnail, plot=plot, fanart= "http://pelisalacarta.mimediacenter.info/fanart/cinetux.jpg", folder=False)) patron = '<tr class="tabletr">[^<]+' patron += '<td class="episode-server[^>]+><img[^>]+>([^>]+)</td>[^<]+' patron += '<td class="episode-server-img[^>]+>([^<]+)</td>[^<]+' patron += '<td class="episode-lang[^>]+>([^>]+)</td>[^<]+' patron += '<td align="center">([^<]+)</td>[^<]+' patron += '<td(.*?)</td>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedtitle, scrapedserver, scrapedlanguage, scrapedquality, scrapedlink in matches: title = "Ver " + scrapedtitle + " en " + scrapedserver + " (" + scrapedlanguage + ") (" + scrapedquality + ")" url = scrapedlink thumbnail = item.thumbnail plot = "" itemlist.append( Item(channel=__channel__, action="play", title=title, fulltitle=item.fulltitle + " [" + scrapedlanguage + "][" + scrapedquality + "]", url=url, thumbnail=thumbnail, plot=plot, folder=False)) if len(itemlist) == 0: itemlist = servertools.find_video_items(data=data) i = 1 for videoitem in itemlist: videoitem.title = "Ver Opción %d en %s" % (i, videoitem.server) videoitem.fulltitle = item.fulltitle videoitem.channel = channel = __channel__ return itemlist
def episodios(item): logger.info("[mejortorrent.py] episodios") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) total_capis = scrapertools.get_match( data, "<input type='hidden' name='total_capis' value='(\d+)'>") tabla = scrapertools.get_match( data, "<input type='hidden' name='tabla' value='([^']+)'>") titulo = scrapertools.get_match( data, "<input type='hidden' name='titulo' value='([^']+)'>") item.thumbnail = scrapertools.find_single_match( data, "src='http://www\.mejortorrent\.com(/uploads/imagenes/" + tabla + "/[a-zA-Z0-9_ ]+.jpg)'") item.thumbnail = 'http://www.mejortorrent.com' + urllib.quote( item.thumbnail) #<form name='episodios' action='secciones.php?sec=descargas&ap=contar_varios' method='post'> data = scrapertools.get_match( data, "<form name='episodios' action='secciones.php\?sec=descargas\&ap=contar_varios' method='post'>(.*?)</form>" ) ''' <td bgcolor='#C8DAC8' style='border-bottom:1px solid black;'><a href='/serie-episodio-descargar-torrent-18741-Juego-de-tronos-4x01.html'>4x01 - Episodio en V.O. Sub Esp.</a></td> <td width='120' bgcolor='#C8DAC8' align='right' style='border-right:1px solid black; border-bottom:1px solid black;'><div style='color:#666666; font-size:9px; margin-right:5px;'>Fecha: 2014-04-07</div></td> <td width='60' bgcolor='#F1F1F1' align='center' style='border-bottom:1px solid black;'> <input type='checkbox' name='episodios[1]' value='18741'> ''' if item.extra == "series": patron = "<td bgcolor[^>]+><a[^>]+>([^>]+)</a></td>[^<]+" else: patron = "<td bgcolor[^>]+>([^>]+)</td>[^<]+" patron += "<td[^<]+<div[^>]+>Fecha: ([^<]+)</div></td>[^<]+" patron += "<td[^<]+" patron += "<input type='checkbox' name='([^']+)' value='([^']+)'" matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedtitle, fecha, name, value in matches: title = scrapedtitle.strip() + " (" + fecha + ")" url = "http://www.mejortorrent.com/secciones.php?sec=descargas&ap=contar_varios" #"episodios%5B1%5D=11744&total_capis=5&tabla=series&titulo=Sea+Patrol+-+2%AA+Temporada" thumbnail = item.thumbnail post = urllib.urlencode({ name: value, "total_capis": total_capis, "tabla": tabla, "titulo": titulo }) logger.info("post=" + post) plot = item.plot if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=__channel__, action="play", title=title, url=url, thumbnail=thumbnail, plot=plot, extra=post, folder=False)) return itemlist
def peliculas(item): logger.info("[pornhub.py] peliculas") itemlist = [] # Descarga la página data = get_main_page(item.url) data = scrapertools.find_single_match(data,'<ul class="nf-videos videos row-4-thumbs">(.*?)<div class="pre-footer">') ''' <li class="videoblock" id="37717631" _vkey="2064578485" > <div class="wrap"> <div class="phimage"> <a href="/view_video.php?viewkey=2064578485" title="Glamorous Brunette Gets F****d Hard On Armchair 2" class="img" data-related-url="/video/ajax_related_video?vkey=2064578485"> <div class="marker-overlays"> <var class="duration">16:29</var> <span class="hd-thumbnail">HD</span> </div> <img src="http://cdn1b.static.pornhub.phncdn.com/www-static/images/blank.gif" alt="Glamorous Brunette Gets F****d Hard On Armchair 2" data-smallthumb="http://i1.cdn2b.image.pornhub.phncdn.com/m=eGcE8daaaa/videos/201501/19/37717631/original/12.jpg" data-mediumthumb="http://i1.cdn2b.image.pornhub.phncdn.com/m=eWdT8daaaa/videos/201501/19/37717631/original/12.jpg" class="thumb" width="150" class="rotating" id="238153595837717631" onmouseover="startThumbChange(37717631, '238153595837717631', 16, 'http://i1.cdn2b.image.pornhub.phncdn.com/m=eWdT8daaaa/videos/201501/19/37717631/original/{index}.jpg');" onmouseout="endThumbChange('238153595837717631');" title="Glamorous Brunette Gets F****d Hard On Armchair 2" /> </a> </div> <div class="add-to-playlist-icon display-none"> <button type="button" data-title="Agregar a una lista de reproducción" class="tooltipTrig open-playlist-link playlist-trigger" onclick="return false;" data-rel="2064578485" >+</button> </div> <div class="thumbnail-info-wrapper clearfix"> <span class="title"> <a href="/view_video.php?viewkey=2064578485" title="Glamorous Brunette Gets F****d Hard On Armchair 2">Glamorous Brunette Gets F****d Hard On Armchair 2</a> </span> <span class="views"><var>35</var> vistas</span> <div class="rating-container up"> <div class="main-sprite icon"></div> <div class="value">100%</div> </div> <var class="added">5 hours ago</var> </div> </div> </li> ''' # Extrae las peliculas patron = '<div class="phimage">.*?' patron += '<a href="/view_video.php\?viewkey=([^"]+)" title="([^"]+).*?' patron += '<var class="duration">([^<]+)</var>(.*?)</div>.*?' patron += 'data-smallthumb="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for viewkey,scrapedtitle,duration,scrapedhd,thumbnail in matches: title=scrapedtitle.replace('&','&')+" ("+duration+")" scrapedhd = scrapertools.find_single_match(scrapedhd,'<span class="hd-thumbnail">(.*?)</span>') if (scrapedhd == 'HD') : title += ' [HD]' url= 'http://es.pornhub.com/embed/' + viewkey if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="play", title=title , url=url ,fanart=__fanart__, thumbnail=thumbnail) ) # Paginador patron = '<li class="page_next"><a href="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches)>0: url=urlparse.urljoin("http://es.pornhub.com",matches[0].replace('&','&')) itemlist.append( Item(channel=__channel__, action="peliculas", title=">> Página siguiente" ,fanart=__fanart__, url=url)) return itemlist
def listvideosMirror(params, url, category): logger.info("[nolomires.py] listvideosMirror") if url == "": url = "http://www.nolomires.com/" # Descarga la p�gina data = scrapertools.cachePage(url) #logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<div class="panel" id="[^"]+" style="background: url\(([^\)]+)\).*?' # IMAGEN 0 patronvideos += '<h2><a href="([^"]+)" ' # URL 1 patronvideos += 'title="([^"]+)">[^<]+</a>' # TITULO 2 patronvideos += '</h2>(.*?)</div>' # SINOPSIS 3 #patronvideos += 'style=.*?src="([^"]+)".*?alt=.*?bold.*?>(.*?)</div>' #patronvideos += '.*?flashvars="file=(.*?flv)\&' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = acentos(match[2]) # URL scrapedurl = match[1] # Thumbnail scrapedthumbnail = match[0] # Argumento scrapedplot = match[3] # Depuracion if (DEBUG): logger.info("scrapedtitle=" + scrapedtitle) logger.info("scrapedurl=" + scrapedurl) logger.info("scrapedthumbnail=" + scrapedthumbnail) # A�ade al listado de XBMC xbmctools.addnewfolder(__channel__, "detail", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot) #Extrae la marca de siguiente p�gina patronvideos = '<a href="([^"]+)" class="nextpostslink">' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = "P�gina siguiente" scrapedurl = matches[0] scrapedthumbnail = "" scrapedplot = "" xbmctools.addnewfolder(__channel__, "listvideos", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot) # Label (top-right)... xbmcplugin.setPluginCategory(handle=pluginhandle, category=category) # Disable sorting... xbmcplugin.addSortMethod(handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE) # End of directory... xbmcplugin.endOfDirectory(handle=pluginhandle, succeeded=True)
def play(item): logger.info("[mejortorrent.py] play") itemlist = [] if item.extra == "": data = scrapertools.cache_page(item.url) logger.info("data=" + data) patron = "<a href='(secciones.php\?sec\=descargas[^']+)'" matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl in matches: title = item.title url = urlparse.urljoin(item.url, scrapedurl) thumbnail = item.thumbnail plot = "" if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") torrent_data = scrapertools.cache_page(url) logger.info("torrent_data=" + torrent_data) #<a href='/uploads/torrents/peliculas/los-juegos-del-hambre-brrip.torrent'> link = scrapertools.get_match( torrent_data, "<a href='(/uploads/torrents/peliculas/.*?\.torrent)'>") link = urlparse.urljoin(url, link) logger.info("link=" + link) itemlist.append( Item(channel=__channel__, action="play", server="torrent", title=title, url=link, thumbnail=thumbnail, plot=plot, folder=False)) else: data = scrapertools.cache_page(item.url, post=item.extra) logger.info("data=" + data) # series # #<a href="http://www.mejortorrent.com/uploads/torrents/series/falling-skies-2-01_02.torrent" #<a href="http://www.mejortorrent.com/uploads/torrents/series/falling-skies-2-03.torrent" # # docus # #<a href="http://www.mejortorrent.com/uploads/torrents/documentales/En_Suenyos_De_Todos_DVDrip.torrent">El sueño de todos. </a> params = dict(urlparse.parse_qsl(item.extra)) patron = '<a href="(http://www.mejortorrent.com/uploads/torrents/' + params[ "tabla"] + '/.*?\.torrent)"' link = scrapertools.get_match(data, patron) logger.info("link=" + link) itemlist.append( Item(channel=__channel__, action="play", server="torrent", title=item.title, url=link, thumbnail=item.thumbnail, plot=item.plot, folder=False)) return itemlist
def episodios(item, load_all_pages=False): logger.info("tvalacarta.channels.tvg episodios") itemlist = [] # Lee la página del programa y extrae el id_programa if "/ax/" in item.url: headers = [] headers.append([ "User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:17.0) Gecko/20100101 Firefox/17.0" ]) headers.append(["X-Requested-With", "XMLHttpRequest"]) headers.append(["Referer", item.url]) data = scrapertools.cache_page(item.url, post="", headers=headers) data = data.replace("\\n", " ") data = data.replace("\\\"", "\"") data = data.replace("\\/", "/") else: data = scrapertools.cache_page(item.url) try: id_programa = scrapertools.get_match(data, "initAlaCartaBuscador.(\d+)") except: id_programa = "" # Lee la primera página de episodios #http://www.crtvg.es/ax/tvgalacartabuscador/programa:33517/pagina:1/seccion:294/titulo:/mes:null/ano:null/temporada:null logger.info("tvalacarta.channels.tvg videos - hay programa") url = "http://www.crtvg.es/ax/tvgalacartabuscador/programa:" + id_programa + "/pagina:1/seccion:294/titulo:/mes:null/ano:null/temporada:null" headers = [] headers.append([ "User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:17.0) Gecko/20100101 Firefox/17.0" ]) headers.append(["X-Requested-With", "XMLHttpRequest"]) headers.append(["Referer", item.url]) data = scrapertools.cache_page(url, post="", headers=headers) data = data.replace("\\n", " ") data = data.replace("\\\"", "\"") data = data.replace("\\/", "/") #logger.info("data="+data) # Extrae los videos ''' <tr> <td class="a-carta-resultado-titulo"> <a href="\/tvg\/a-carta\/rea-publica-74" title="\u00c1rea p\u00fablica">\u00c1rea p\u00fablica<\/a> <\/td> <td class="a-carta-resultado-tempada"> <\/td> <td class="a-carta-resultado-data"> 26\/01\/2016 18:30 <\/td> <\/tr> ''' patron = '<tr[^<]+' patron += '<td class="a-carta-resultado-titulo[^<]+' patron += '<a href="([^"]+)"\s+title="([^"]+)".*?' patron += '<td class="a-carta-resultado-data">(.*?)</td>' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for scrapedurl, scrapedtitle, fecha in matches: title = scrapedtitle.strip() json_title = jsontools.load_json('{"title":"' + title + '"}') title = json_title["title"] title = scrapertools.htmlclean(title) + " - " + fecha.strip() url = urlparse.urljoin(item.url, scrapedurl) thumbnail = "" plot = "" aired_date = scrapertools.parse_date(fecha) if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, title=title, action="play", server="tvg", url=url, thumbnail=thumbnail, plot=plot, show=item.show, aired_date=aired_date, folder=False)) #<a href=\"#\" title=\"Seguinte\" onclick=\"return posteriorpaginaclick(33517, 2, 294) patron = '<a href="\#" title="Seguinte" onclick="return posteriorpaginaclick\((\d+), (\d+), (\d+)' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = ">>> Página siguiente" #http://www.crtvg.es/ax/tvgalacartabuscador/programa:33517/pagina:2/seccion:294/titulo:/mes:null/ano:null/temporada:null scrapedurl = "http://www.crtvg.es/ax/tvgalacartabuscador/programa:%s/pagina:%s/seccion:%s/titulo:/mes:null/ano:null/temporada:null" % ( match[0], match[1], match[2]) scrapedthumbnail = urlparse.urljoin(item.url, match[2]) scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") next_page_item = Item(channel=CHANNELNAME, title=scrapedtitle, action="episodios", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, show=item.show, category=item.category, folder=True) if load_all_pages: itemlist.extend(episodios(next_page_item, load_all_pages)) else: itemlist.append(next_page_item) break return itemlist
def detail(params, url, category): logger.info("[nolomires.py] detail") title = acentos(urllib.unquote_plus(params.get("title"))) thumbnail = urllib.unquote_plus(params.get("thumbnail")) plot = "" scrapedurl = "" # Descarga la p�gina data = scrapertools.cachePage(url) #logger.info(data) # Extrae el argumento patronarg = '<h[2-3]>(<span style.*?)</p>' matches = re.compile(patronarg, re.DOTALL).findall(data) if len(matches) > 0: plot = re.sub("<[^>]+>", " ", matches[0]) patronthumb = '<div id="textimg"><img src="([^"]+)"' matches = re.compile(patronthumb, re.DOTALL).findall(data) if len(matches) > 0: thumbnail = matches[0] # ------------------------------------------------------------------------------------ # Busca los enlaces a los videos en los servidores habilitados # ------------------------------------------------------------------------------------ listavideos = servertools.findvideos(data) for video in listavideos: videotitle = video[0] url = video[1] server = video[2] xbmctools.addnewvideo(__channel__, "play", category, server, title.strip() + " - " + videotitle, url, thumbnail, plot) ## --------------------------------------------------------------------------------------## # Busca enlaces a videos .flv o (.mp4 dentro de un xml) # ## --------------------------------------------------------------------------------------## patronvideos = 'file=(http\:\/\/[^\&]+)\&' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) playWithSubt = "play" c = 0 if len(matches) > 0: for match in matches: print "link xml :%s" % match subtitle = "[FLV-Directo]" c += 1 sub = "" if ("/xml" in match): data2 = scrapertools.cachePage(match) logger.info("data2=" + data2) patronvideos = '<track>.*?' patronvideos += '<title>([^<]+)</title>[^<]+' patronvideos += '<location>([^<]+)</location>(?:[^<]+' patronvideos += '<meta rel="type">video</meta>[^<]+|[^<]+)' patronvideos += '<meta rel="captions">([^<]+)</meta>[^<]+' patronvideos += '</track>' matches2 = re.compile(patronvideos, re.DOTALL).findall(data2) scrapertools.printMatches(matches) if len(matches2) == 0: newpatron = '<title>([^<]+)</title>[^<]+<location>([^<]+)</location>' matches2 = re.compile(newpatron, re.DOTALL).findall(data2) sub = "None" for match2 in matches2: try: if match2[2].endswith( ".xml" ): # Subtitulos con formato xml son incompatibles con XBMC sub = "[Subtitulo incompatible con xbmc]" playWithSubt = "play" except: pass if ".mp4" in match2[1]: subtitle = "[MP4-Directo]" scrapedtitle = '%s (castellano) - %s %s' % ( title, match2[0], subtitle) scrapedurl = match2[1].strip() scrapedthumbnail = thumbnail scrapedplot = plot if ("cast.xml" or "mirror.xml") not in match and sub == "": scrapedtitle = '%s (V.O.S) - %s %s %s' % ( title, match2[0], subtitle, sub) try: if not match2[2].endswith("cine-adicto2.srt") and ( sub == ""): scrapedurl = scrapedurl + "|" + match2[2] playWithSubt = "play2" except: pass if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") # A�ade al listado de XBMC xbmctools.addnewvideo(__channel__, playWithSubt, category, "Directo", scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot) else: if match.endswith(".srt"): scrapedurl = scrapedurl + "|" + match xbmctools.addnewvideo(__channel__, "play2", category, "Directo", title + " (V.O.S) - " + subtitle, scrapedurl, thumbnail, plot) if match.endswith(".xml"): sub = "[Subtitulo incompatible con xbmc]" xbmctools.addnewvideo( __channel__, "play", category, "Directo", title + " (V.O) - %s %s" % (subtitle, sub), scrapedurl, thumbnail, plot) scrapedurl = match print scrapedurl ## --------------------------------------------------------------------------------------## # Busca enlaces de videos para el servidor vk.com # ## --------------------------------------------------------------------------------------## #http://vkontakte.ru/video_ext.php?oid=93103247&id=149051583&hash=793cde84b05681fa&hd=1 ''' patronvideos = '(http\:\/\/vk.+?\/video_ext\.php[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) if len(matches)>0: for match in matches: print " encontro VK.COM :%s" %match videourl = scrapertools.unescape(match) xbmctools.addnewvideo( __channel__ , "play" , category , "vk" , title + " - "+"[VK]", videourl , thumbnail , plot ) ''' # Label (top-right)... xbmcplugin.setPluginCategory(handle=pluginhandle, category=category) xbmcplugin.addSortMethod(handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE) xbmcplugin.endOfDirectory(handle=pluginhandle, succeeded=True)
def episodios(item): logger.info("tvalacarta.channels.boing episodios") # Descarga la página #http://www.boing.es/serie/hora-de-aventuras #http://www.boing.es/videos/hora-de-aventuras data = scrapertools.cachePage(item.url.replace("/serie/","/videos/")) #logger.info(data) bloque = scrapertools.get_match(data,'<div class="Contenedor100">(.*?)<\!-- \/Contenedor100 -->',1) logger.info(str(bloque)) # Extrae los videos ''' <div class="pic"><div class="pic2"><div class="pic3"> <a href="/serie/geronimo-stilton/video/top-model"> <img class="bcvid" height="73" width="130" src="http://i.cdn.turner.com/tbseurope/big/Boing_ES/thumbs/SP_SA_GERSTI0017_01.jpg" /> </a> </div></div></div> <div class="series"><a href="/serie/geronimo-stilton">Gerónimo Stilton</a></div> <div class="title"><a href="/serie/geronimo-stilton/video/top-model">Top Model</a></div> ''' ''' <div class="pic"><div class="pic2"><div class="pic3"> <a href="/serie/generator-rex/video/hombre-contra-hombre"> <img style="margin-top:10px" height="73" width="130" src="http://i.cdn.turner.com/tbseurope/big/Boing_ES_16_9/thumbs/SP_SA_GENREX0047_01.jpg" /> </a> </div></div></div> <div class="stars"><form action="/videos/generator-rex" accept-charset="UTF-8" method="post" id="fivestar-custom-widget" class="fivestar-widget"> <div><div class="fivestar-form-vote-18249 clear-block"><input type="hidden" name="content_type" id="edit-content-type" value="node" /> <input type="hidden" name="content_id" id="edit-content-id" value="18249" /> <div class="fivestar-form-item fivestar-average-stars"><div class="form-item" id="edit-vote-wrapper"> <span class='edit-vote-design'><span class='form-item-value-design1'><span class='form-item-value-design2'><span class='form-item-value-design3'> <input type="hidden" name="vote_count" id="edit-vote-count" value="0" /> <input type="hidden" name="vote_average" id="edit-vote-average" value="76.25" /> <input type="hidden" name="auto_submit_path" id="edit-auto-submit-path" value="/fivestar/vote/node/18249/vote" class="fivestar-path" /> <select name="vote" class="form-select" id="edit-vote-1" ><option value="-">Select rating</option><option value="20">Give it 1/5</option><option value="40">Give it 2/5</option><option value="60">Give it 3/5</option><option value="80" selected="selected">Give it 4/5</option><option value="100">Give it 5/5</option></select><input type="hidden" name="auto_submit_token" id="edit-auto-submit-token" value="36639bc15e086e0bfc3d93bfec3d5287" class="fivestar-token" /> </span></span></span></span></div> </div><input type="hidden" name="destination" id="edit-destination" value="videos/generator-rex" /> <input type="submit" name="op" id="edit-fivestar-submit" value="Rate" class="form-submit fivestar-submit" /> <input type="hidden" name="form_build_id" id="form-d62c4ce5673f9173ca3edb7e81986457" value="form-d62c4ce5673f9173ca3edb7e81986457" /> <input type="hidden" name="form_id" id="edit-fivestar-custom-widget" value="fivestar_custom_widget" /> </div> </div></form></div> <div class="series"><a href="/serie/generator-rex">Generator Rex</a></div> <div class="title"><a href="/serie/generator-rex/video/hombre-contra-hombre">Hombre contra hombre</a></div> ''' ''' <div class="pic3"> <a href="/serie/monster-high/video/monster-high-superpillada" class="imagecache imagecache-130x73 imagecache-linked imagecache-130x73_linked"><img src="http://www.boing.es/sites/default/files/imagecache/130x73/pantallazo2mh.jpg" alt="" title="" class="imagecache imagecache-130x73" width="130" height="73" /></a> </div></div></div> <div class="stars"><form action="/videos/monster-high" accept-charset="UTF-8" method="post" id="fivestar-custom-widget" class="fivestar-widget"> <div><div class="fivestar-form-vote-24388 clear-block"><input type="hidden" name="content_type" id="edit-content-type" value="node" /> <input type="hidden" name="content_id" id="edit-content-id" value="24388" /> <div class="fivestar-form-item fivestar-average-stars"><div class="form-item" id="edit-vote-wrapper"> <span class='edit-vote-design'><span class='form-item-value-design1'><span class='form-item-value-design2'><span class='form-item-value-design3'> <input type="hidden" name="vote_count" id="edit-vote-count" value="0" /> <input type="hidden" name="vote_average" id="edit-vote-average" value="67.9646" /> <input type="hidden" name="auto_submit_path" id="edit-auto-submit-path" value="/fivestar/vote/node/24388/vote" class="fivestar-path" /> <select name="vote" class="form-select" id="edit-vote-1" ><option value="-">Select rating</option><option value="20">Give it 1/5</option><option value="40">Give it 2/5</option><option value="60">Give it 3/5</option><option value="80" selected="selected">Give it 4/5</option><option value="100">Give it 5/5</option></select><input type="hidden" name="auto_submit_token" id="edit-auto-submit-token" value="219ac03ae7ca6956d5484acb00454195" class="fivestar-token" /> </span></span></span></span></div> </div><input type="hidden" name="destination" id="edit-destination" value="videos/monster-high" /> <input type="submit" name="op" id="edit-fivestar-submit" value="Rate" class="form-submit fivestar-submit" /> <input type="hidden" name="form_build_id" id="form-9e308b4823178e9cbca63316130d805e" value="form-9e308b4823178e9cbca63316130d805e" /> <input type="hidden" name="form_id" id="edit-fivestar-custom-widget" value="fivestar_custom_widget" /> </div> </div></form></div> <div class="series"><a href="/serie/monster-high">Monster High</a></div> <div class="title"><a href="/serie/monster-high/video/monster-high-superpillada">Monster High: Superpillada</a></div> ''' patron = '<div class="pic3"[^<]+' patron += '<a href="([^"]+)"[^<]+<img style="[^"]+" height="\d+" width="\d+" src="([^"]+)".*?' patron += '<div class="series">(.*?)</div[^<]+' patron += '<div class="title"><a[^>]+>([^<]+)</a>' matches = re.compile(patron,re.DOTALL).findall(bloque) scrapertools.printMatches(matches) #if DEBUG: scrapertools.printMatches(matches) if len(matches)==0: patron = '<div class="pic3"[^<]+' patron += '<a href="([^"]+)"[^<]+<img src="([^"]+)".*?' patron += '<div class="series">(.*?)</div[^<]+' patron += '<div class="title"><a[^>]+>([^<]+)</a>' matches = re.compile(patron,re.DOTALL).findall(bloque) scrapertools.printMatches(matches) #if DEBUG: scrapertools.printMatches(matches) itemlist = [] for scrapedurl,scrapedthumbnail,scrapedshow,scrapedtitle in matches: title = scrapedtitle if scrapedshow!="": title = scrapertools.find_single_match(scrapedshow,'<a[^>]+>([^<]+)</a>') + " - " + title if (DEBUG): logger.info("title=["+title+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") url = urlparse.urljoin(item.url,scrapedurl) itemlist.append( Item(channel=CHANNELNAME, title=title , action="play", server="boing" , url=url, thumbnail=scrapedthumbnail, page=url, show = item.show, folder=False) ) next_page = scrapertools.find_single_match(data,'<li class="pager-next"><a href="([^"]+)"') if next_page!="": itemlist.append( Item(channel=item.channel, title=">> Página siguiente" , action="episodios" , url=urlparse.urljoin(item.url,next_page), folder=True) ) return itemlist
def novedades(item): logger.info("[unsoloclic.py] novedades") itemlist = [] # Descarga la página data = scrapertools.cachePage(item.url) ''' <div class="post-45732 post type-post status-publish format-standard hentry category-2012 category-blu-ray category-mkv-hd720p" id="post-45732"> <h2 class="title"><a href="http://unsoloclic.info/2012/11/ek-tha-tiger-2012-blu-ray-720p-hd/" rel="bookmark" title="Permanent Link to Pelicula Ek Tha Tiger (2012) BLU-RAY 720p HD">Pelicula Ek Tha Tiger (2012) BLU-RAY 720p HD</a></h2> <div class="postdate"><img src="http://unsoloclic.info/wp-content/themes/TinyWeb/images/date.png" /> noviembre 5th, 2012 <!-- <img src="http://unsoloclic.info/wp-content/themes/TinyWeb/images/user.png" /> unsoloclic --> </div> <div class="entry"> <p><a href="http://unsoloclic.info/2012/11/ek-tha-tiger-2012-blu-ray-720p-hd/" rel="attachment wp-att-45737"><img src="http://unsoloclic.info/wp-content/uploads/2012/11/Ek-Tha-Tiger-2012.jpg" alt="" title="Ek Tha Tiger (2012)" width="500" height="629" class="aligncenter size-full wp-image-45737" /></a></p> <h2 style="text-align: center;"></h2> <div class="readmorecontent"> <a class="readmore" href="http://unsoloclic.info/2012/11/ek-tha-tiger-2012-blu-ray-720p-hd/" rel="bookmark" title="Permanent Link to Pelicula Ek Tha Tiger (2012) BLU-RAY 720p HD">Seguir Leyendo</a> </div> </div> </div><!--/post-45732--> ''' ''' <div class="post-45923 post type-post status-publish format-standard hentry category-2012 category-blu-ray category-comedia category-drama category-mkv category-mkv-hd720p category-romance tag-chris-messina tag-jenna-fischer tag-lee-kirk tag-the-giant-mechanical-man-pelicula tag-topher-grace" id="post-45923"> <h2 class="title"><a href="http://unsoloclic.info/2012/12/the-giant-mechanical-man-2012-bluray-720p-hd/" rel="bookmark" title="Permanent Link to The Giant Mechanical Man (2012) BluRay 720p HD">The Giant Mechanical Man (2012) BluRay 720p HD</a></h2> <div class="postdate"><img src="http://unsoloclic.info/wp-content/themes/TinyWeb/images/date.png" /> diciembre 24th, 2012 <!-- <img src="http://unsoloclic.info/wp-content/themes/TinyWeb/images/user.png" /> deportv --> </div> <div class="entry"> <p style="text-align: center;"><a href="http://unsoloclic.info/2012/12/the-giant-mechanical-man-2012-bluray-720p-hd/"><img class="aligncenter size-full wp-image-45924" title="Giant Michanical Man Pelicula Descargar" src="http://unsoloclic.info/wp-content/uploads/2012/12/Giant-Michanical-Man-Pelicula-Descargar.jpg" alt="" width="380" height="500" /></a></p> <p style="text-align: center;"> <div class="readmorecontent"> <a class="readmore" href="http://unsoloclic.info/2012/12/the-giant-mechanical-man-2012-bluray-720p-hd/" rel="bookmark" title="Permanent Link to The Giant Mechanical Man (2012) BluRay 720p HD">Seguir Leyendo</a> </div> </div> </div><!--/post-45923--> ''' patron = '<div class="post[^"]+" id="post-\d+">[^<]+' patron += '<h2 class="title"><a href="([^"]+)" rel="bookmark" title="[^"]+">([^<]+)</a></h2>[^<]+' patron += '<div class="postdate">.*?</div>[^<]+' patron += '<div class="entry">[^<]+' patron += '<p[^<]+<a[^<]+<img.*?src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for scrapedurl, scrapedtitle, scrapedthumbnail in matches: scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=item.channel, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) ''' <a href="http://unsoloclic.info/page/2/" >« Peliculas anteriores</a> ''' patron = '<a href="([^"]+)" >\«\; Peliculas anteriores</a>' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = ">> Página siguiente" scrapedplot = "" scrapedurl = urlparse.urljoin(item.url, match) scrapedthumbnail = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=item.channel, action="novedades", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) return itemlist
def unpackjs(texto): logger.info("unpackjs") # Extrae el cuerpo de la funcion patron = "eval\(function\(p\,a\,c\,k\,e\,d\)\{[^\}]+\}(.*?)\.split\('\|'\)\)\)" matches = re.compile(patron, re.DOTALL).findall(texto) scrapertools.printMatches(matches) # Separa el código de la tabla de conversion if len(matches) > 0: data = matches[0] logger.info("[unpackerjs.py] bloque funcion=" + data) else: return "" patron = "(.*)'([^']+)'" matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) cifrado = matches[0][0] logger.info("[unpackerjs.py] cifrado=" + cifrado) logger.info("[unpackerjs.py] palabras=" + matches[0][1]) descifrado = "" # Crea el dicionario con la tabla de conversion claves = [] claves.extend([ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" ]) claves.extend([ "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "1a", "1b", "1c", "1d", "1e", "1f", "1g", "1h", "1i", "1j", "1k", "1l", "1m", "1n", "1o", "1p", "1q", "1r", "1s", "1t", "1u", "1v", "1w", "1x", "1y", "1z" ]) claves.extend([ "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "2a", "2b", "2c", "2d", "2e", "2f", "2g", "2h", "2i", "2j", "2k", "2l", "2m", "2n", "2o", "2p", "2q", "2r", "2s", "2t", "2u", "2v", "2w", "2x", "2y", "2z" ]) claves.extend([ "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "3a", "3b", "3c", "3d", "3e", "3f", "3g", "3h", "3i", "3j", "3k", "3l", "3m", "3n", "3o", "3p", "3q", "3r", "3s", "3t", "3u", "3v", "3w", "3x", "3y", "3z" ]) palabras = matches[0][1].split("|") diccionario = {} i = 0 for palabra in palabras: if palabra != "": diccionario[claves[i]] = palabra else: diccionario[claves[i]] = claves[i] logger.info(claves[i] + "=" + palabra) i = i + 1 # Sustituye las palabras de la tabla de conversion # Obtenido de http://rc98.net/multiple_replace def lookup(match): try: return diccionario[match.group(0)] except: logger.info("[unpackerjs.py] Error al encontrar la clave " + match.group(0)) return "" #lista = map(re.escape, diccionario) # Invierte las claves, para que tengan prioridad las más largas claves.reverse() cadenapatron = '|'.join(claves) #logger.info("[unpackerjs.py] cadenapatron="+cadenapatron) compiled = re.compile(cadenapatron) descifrado = compiled.sub(lookup, cifrado) logger.info("descifrado=" + descifrado) return descifrado
def novedades(item): logger.info("[documentalesatonline2.py] novedades") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) # Entradas ''' <h2 id="post-5250"><a href="http://www.bizzentte.com/2011/08/chips-implantes-de-futuro-2009-documental-c-odisea-rfid-espanol/" rel="bookmark">Chips: Implantes de futuro.2009 (Documental C.Odisea) (RFID) (Español)</a></h2> <div class="main"> <p>En este interesante documental, seguimos a Mark Stepanek mientras delibera si debe o no obtener una identificación por radiofrecuencia (RFID), es decir, implantarse un microchip, semejante al que pueda llevar una mascota, en su propia mano..</p> <ul class="readmore"> <li>» <a href="http://www.bizzentte.com/2011/08/chips-implantes-de-futuro-2009-documental-c-odisea-rfid-espanol/#comments">Comentarios</a> <a href="http://www.bizzentte.com/2011/08/chips-implantes-de-futuro-2009-documental-c-odisea-rfid-espanol/#comments" title="Comentarios en Chips: Implantes de futuro.2009 (Documental C.Odisea) (RFID) (Español)">(3)</a> </li> </ul> </div> ''' ''' <div class="entry"> <div class="latest"> <h2 id="post-6553"><a href="http://www.bizzentte.com/2013/02/equipo-de-investigacion-lasexta-todos-los-capitulos-de-la-4%c2%aa-temporada-espanol/" rel="bookmark">Equipo de Investigación (laSexta) (Todos los capítulos de la 4ª Temporada) (Español)</a></h2> <div class="main"> <a href="http://www.bizzentte.com/2013/02/equipo-de-investigacion-lasexta-todos-los-capitulos-de-la-4%c2%aa-temporada-espanol/" title="Equipo de Investigación (laSexta) (Todos los capítulos de la 4ª Temporada) (Español)"><img src="http://www.bizzentte.com/wp-content/uploads/2013/01/Equipo-de-investigacion-4x00-Portada.jpg" alt="Equipo de Investigación (laSexta) (Todos los capítulos de la 4ª Temporada) (Español)" class="listimg" /></a> <p>Os dejo toda la 4ª temporada en HDTV..</p> <ul class="readmore"> <li>» <a href="http://www.bizzentte.com/2013/02/equipo-de-investigacion-lasexta-todos-los-capitulos-de-la-4%c2%aa-temporada-espanol/#comments">Comentarios</a> <a href="http://www.bizzentte.com/2013/02/equipo-de-investigacion-lasexta-todos-los-capitulos-de-la-4%c2%aa-temporada-espanol/#respond" title="Comentarios en Equipo de Investigación (laSexta) (Todos los capítulos de la 4ª Temporada) (Español)">(0)</a> </li> </ul> </div> ''' patron = '<h2 id="post-[^"]+"><a href="([^"]+)"[^>]+>([^<]+)</a></h2>[^<]+' patron += '<div class="main">.*?' patron += '<p>([^<]+)</p>' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[1] scrapedurl = urlparse.urljoin(item.url, match[0]) scrapedthumbnail = "" scrapedplot = match[2] if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=item.channel, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) # Página siguiente patron = '<a href="([^"]+)" >P..gina siguiente \»\;</a>' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: itemlist.append( Item(channel=item.channel, action="novedades", title="!Página siguiente", url=urlparse.urljoin(item.url, match), folder=True)) return itemlist
def lista(item): logger.info() itemlist = [] # Descarga la pagina data = httptools.downloadpage(item.url, post=item.extra).data # logger.info("data="+data) bloque = scrapertools.find_single_match(data, '(?:<ul class="pelilist">|<ul class="buscar-list">)(.*?)</ul>') patron = '<a href="([^"]+).*?' # la url patron += '<img src="([^"]+)"[^>]+>.*?' # el thumbnail patron += '<h2[^>]*>(.*?)</h2.*?' # el titulo patron += '<span>([^<].*?)<' # la calidad matches = re.compile(patron, re.DOTALL).findall(bloque) scrapertools.printMatches(matches) for scrapedurl, scrapedthumbnail, scrapedtitle, calidad in matches: scrapedtitle = scrapertools.htmlclean(scrapedtitle) title = scrapedtitle.strip() if scrapertools.htmlclean(calidad): title += " (" + scrapertools.htmlclean(calidad) + ")" url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = "" logger.debug("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") year = scrapertools.find_single_match(scrapedthumbnail, r'-(\d{4})') contentTitle = scrapertools.htmlclean(scrapedtitle).strip() patron = '([^<]+)<br>' matches = re.compile(patron, re.DOTALL).findall(calidad + '<br>') idioma = '' if host+"/serie" in url: contentTitle = re.sub('\s+-|\.{3}$', '', contentTitle) capitulo = '' temporada = 0 episodio = 0 if len(matches) == 3: calidad = matches[0].strip() idioma = matches[1].strip() capitulo = matches[2].replace('Cap', 'x').replace('Temp', '').replace(' ', '') temporada, episodio = capitulo.strip().split('x') itemlist.append(Item(channel=item.channel, action="episodios", title=title, fulltitle=title, url=url, thumbnail=thumbnail, plot=plot, folder=True, contentTitle=contentTitle, language=idioma, contentSeason=int(temporada), contentEpisodeNumber=int(episodio), quality=calidad)) else: if len(matches) == 2: calidad = matches[0].strip() idioma = matches[1].strip() itemlist.append(Item(channel=item.channel, action="findvideos", title=title, fulltitle=title, url=url, thumbnail=thumbnail, plot=plot, folder=True, contentTitle=contentTitle, language=idioma, contentThumbnail=thumbnail, quality=calidad)) next_page_url = scrapertools.find_single_match(data, '<li><a href="([^"]+)">Next</a></li>') if next_page_url != "": itemlist.append(Item(channel=item.channel, action="lista", title=">> Página siguiente", url=urlparse.urljoin(item.url, next_page_url), folder=True)) else: next_page_url = scrapertools.find_single_match(data, '<li><input type="button" class="btn-submit" value="Siguiente" onClick="paginar..(\d+)') if next_page_url != "": itemlist.append(Item(channel=item.channel, action="lista", title=">> Página siguiente", url=item.url, extra=item.extra + "&pg=" + next_page_url, folder=True)) return itemlist
def geturl(urlvideo): logger.info("[zshare.py] url="+urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url=urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = {'User-Agent':'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)'} # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.zshare\.net/download/([^\/]+)\/' matches = re.compile(patron,re.DOTALL).findall(url) logger.info("[zshare.py] fragmentos de la URL") scrapertools.printMatches(matches) codigo = "" nombre = "" if len(matches)>0: codigo = matches[0] txdata = "op=download1&download=1&usr_login=&id="+codigo+"&fname=&referer2=&method_free=Continue" logger.info(txdata) req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data patron = 'var link_enc=new Array\(([^\)]+)\)' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) logger.info("[zshare.py] bloque packed") if len(matches)>0: logger.info(matches[0]) ''' <center> <script type='text/javascript'>eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5tcd6dva|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|'))) </script> </center> ''' # El javascript empaquetado es #eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|'))) ''' eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\' <7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"> <2 1="j"0="i"> <2 1="v"0="u"> <2 1="b"0="5"/> <2 1="c"0="5"/> <2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/> <8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"> <embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"> </8> </7>\');',36,51, 0'value 1|name 2|param 3|com 4|http 5|false 6|divx 7|object 8|embed 9|plugin a|go b|bannerEnabled c|autoPlay d| e|320px f|height g|630px h|width i|none j|custommode k|avi l|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_ m|Capitancinema n|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa o|182 p|206 q|45 r|73 s|76 t|src u|auto v|bufferingMode w|id x|download y|pluginspage z|video 10|type 11|embedmvshre 12|cab 13|DivXBrowserPlugin 14|codebase 15|CC0F21721616 16|9C46 17|41fa 18|D0AB 19|67DABFBF 1a|clsid 1b|classid 1c|embedcontmvshre 1d|write 1e|document '.split(' |'))) ''' # El javascript desempaquetado es #document.write('<object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab"><param name="custommode"value="none"><param name="bufferingMode"value="auto"><param name="bannerEnabled"value="false"/><param name="autoPlay"value="false"/><param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/><embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"></embed></object>'); ''' <object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab"> <param name="custommode"value="none"> <param name="bufferingMode"value="auto"> <param name="bannerEnabled"value="false"/> <param name="autoPlay"value="false"/> <param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/> <embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"> </embed> </object>'); ''' # La URL del video es #http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi # Lo descifra from core import jsunpack descifrado = jsunpack.unpack(data) logger.info("descifrado="+descifrado) # Extrae la URL url = "" patron = "'([^']+)'" new_array = re.compile(patron,re.DOTALL).findall(matches[0]) logger.info(new_array) for i in new_array: url = url + i logger.info("[zshare.py] url="+url) return url
def episodios(item): logger.info("pelisalacarta.seriesmu episodios") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) data = re.sub(r"\n|\r|\t|\s{2}| ","",data) seguir = scrapertools.get_match(data,'<ul><li text="Siguiendo" color="green" class="([^"]+)"') abandonar = scrapertools.get_match(data,'<li text="Abandonada" color="red" class="([^"]+)">') fanart = scrapertools.get_match(data,'<div class="media-cover" style="background-image: url\(http://series.mu([^"]+)\)') fanart = urlparse.urljoin(host, fanart) seguir = urlparse.urljoin(host, seguir) abandonar = urlparse.urljoin(host, abandonar) if not item.title.endswith("XBMC"): if '<div class=""></div>' in data: url = seguir title = bbcode_kodi2html("[COLOR yellow]Seguir[/COLOR]") thumbnail= "http://s14.postimg.org/ca5boj275/smseguir.png" else: url = abandonar title = bbcode_kodi2html("[COLOR green]Siguiendo[/COLOR]: [COLOR red]Abandonar[/COLOR]") thumbnail="http://s18.postimg.org/hh4l8hj1l/smabandonar2.png" itemlist.append( Item(channel=item.channel, title=title, url=url, fanart=fanart, thumbnail=thumbnail, action="cambiar_estado", extra=item.url, folder=False)) patrontemporada = '<ul (temp[^<]+)>(.*?)</ul>' matchestemporadas = re.compile(patrontemporada,re.DOTALL).findall(data) for nombre_temporada,bloque_episodios in matchestemporadas: if (DEBUG): logger.info("nombre_temporada="+nombre_temporada) if (DEBUG): logger.info("bloque_episodios="+bloque_episodios) # Extrae los episodios patron = '<span>(.*?)' patron += '</span>([^<]+).*?' patron += '<i class="(.*?)".*?' patron += '<i class="icon-play".*?' patron += 'href="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(bloque_episodios) scrapertools.printMatches(matches) for scrapednumber, scrapedtitle, scrapedeyes, scrapedurl in matches: if "open" in scrapedeyes: scrapedeyes = re.sub(r"eye-w icon-eye-open",bbcode_kodi2html("[COLOR salmon]"+" [Visto]"+"[/COLOR]"),scrapedeyes) if "close" in scrapedeyes: scrapedeyes = re.sub(r"eye-w icon-eye-close",bbcode_kodi2html("[COLOR chartreuse]"+" [Pendiente]"+"[/COLOR]"),scrapedeyes) title = nombre_temporada + "X" + scrapednumber + scrapedtitle + scrapedeyes title = title.replace("temp=","Temporada ") title = title.replace(scrapedtitle,bbcode_kodi2html("[COLOR white]"+scrapedtitle+"[/COLOR]")) puntuacion = scrapertools.get_match(data,'<li><div class="num" id="val-score">(.*?)</div>') puntuacion = puntuacion.replace(puntuacion,bbcode_kodi2html("[COLOR yellow]"+puntuacion+"[/COLOR]")) puntuacion_title = "Puntuación :" puntuacion_title = puntuacion_title.replace(puntuacion_title,bbcode_kodi2html("[COLOR pink]"+puntuacion_title+"[/COLOR]")) puntuacion = puntuacion_title + " " + puntuacion + "[CR]" scrapedplot = scrapertools.get_match(data,'<h2>(.*?)<div class="card media-chapters">') plotformat = re.compile('<p>(.*?)</p>',re.DOTALL).findall(scrapedplot) scrapedplot = scrapedplot.replace(scrapedplot,bbcode_kodi2html("[COLOR white]"+scrapedplot+"[/COLOR]")) for plot in plotformat: scrapedplot = scrapedplot.replace(plot,bbcode_kodi2html("[COLOR skyblue][B]"+plot+"[/B][/COLOR]")) scrapedplot = scrapedplot.replace("</h2><p>","[CR]") scrapedplot = scrapedplot.replace("</p></div>","") scrapedplot = puntuacion + scrapedplot fanart = scrapertools.get_match(data,'<div class="media-cover" style="background-image: url\(http://series.mu([^"]+)\)') fanart = urlparse.urljoin(host, fanart) scrapedurl = urlparse.urljoin(host, scrapedurl) if scrapedtitle != " ": itemlist.append( Item(channel=__channel__, title =title , url=scrapedurl, action="findvideos", thumbnail=item.thumbnail, plot=scrapedplot, fanart=fanart, show=item.show.strip(), folder=True) ) if config.get_library_support() and len(itemlist)>0: itemlist.append( Item(channel=__channel__, title="Añadir esta serie a la biblioteca de XBMC", url=item.url, action="add_serie_to_library", extra="episodios", show=item.show) ) return itemlist
def foro(item): logger.info("pelisalacarta.channels.mocosoftx foro") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url, headers=MAIN_HEADERS) # Extrae los foros y subforos patron = '<h4><a href="([^"]+)"[^>]+>([^<]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle in matches: scrapedtitle = unicode(scrapedtitle, "iso-8859-1", errors="replace").encode("utf-8") title = ">> Foro " + scrapedtitle url = urlparse.urljoin(item.url, scrapedurl) #http://mocosoftx.com/foro/fotos-hentai/?PHPSESSID=nflddqf9nvbm2dd92 if "PHPSESSID" in url: url = scrapertools.get_match(url, "(.*?)\?PHPSESSID=") thumbnail = "" plot = "" itemlist.append( Item(channel=__channel__, title=title, action="foro", url=url, plot=plot, thumbnail=thumbnail, folder=True)) # Extrae los hilos individuales patron = '<td class="icon2 windowbgb">[^<]+' patron += '<img src="([^"]+)"[^<]+' patron += '</td>[^<]+' patron += '<td class="subject windowbgb2">[^<]+' patron += '<div >[^<]+' patron += '<span id="msg_\d+"><a href="([^"]+)">([^>]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedthumbnail, scrapedurl, scrapedtitle in matches: title = scrapedtitle url = urlparse.urljoin(item.url, scrapedurl) if "PHPSESSID" in url: url = scrapertools.get_match(url, "(.*?)\?PHPSESSID=") thumbnail = scrapedthumbnail plot = "" itemlist.append( Item(channel=__channel__, title=title, action="findvideos", url=url, plot=plot, thumbnail=thumbnail, folder=True)) # Extrae la marca de siguiente página #<a class="navPages" href="http://mocosoftx.com/foro/peliculas-xxx-online-(completas)/20/?PHPSESSID=rpejdrj1trngh0sjdp08ds0ef7">2</a> patronvideos = '<strong>\d+</strong[^<]+<a class="navPages" href="([^"]+)">' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = ">> Página siguiente" scrapedurl = urlparse.urljoin(item.url, matches[0]) if "PHPSESSID" in scrapedurl: scrapedurl = scrapertools.get_match(scrapedurl, "(.*?)\?PHPSESSID=") scrapedthumbnail = "" scrapedplot = "" itemlist.append( Item(channel=__channel__, title=scrapedtitle, action="foro", url=scrapedurl, plot=scrapedplot, thumbnail=scrapedthumbnail, folder=True)) return itemlist