def busqueda(item): logger.info() itemlist = [] # Descarga la página data = httptools.downloadpage(item.url).data from core import jsontools data = jsontools.load_json(data) for entry in data["results"]: title = entry["richSnippet"]["metatags"]["ogTitle"] url = entry["url"] plot = entry["content"] plot = scrapertools.htmlclean(plot) thumbnail = entry["richSnippet"]["metatags"]["ogImage"] title = scrapertools.find_single_match(title,'(.*?) \(.*?\)') year = re.sub(r'.*?\((\d{4})\)','', title) title = year fulltitle = title logger.debug(plot) new_item = item.clone(action="findvideos", title=title, fulltitle=fulltitle, url=url, thumbnail=thumbnail, contentTitle=title, contentType="movie", plot= plot, infoLabels = {'year':year, 'sinopsis':plot}) itemlist.append(new_item) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) actualpage = int(scrapertools.find_single_match(item.url, 'start=(\d+)')) totalresults = int(data["cursor"]["resultCount"]) if actualpage + 20 <= totalresults: url_next = item.url.replace("start="+str(actualpage), "start="+str(actualpage+20)) itemlist.append(Item(channel=item.channel, action="busqueda", title=">> Página Siguiente", url=url_next)) return itemlist
def busqueda(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}| ","",data) patron = '<img class=.*?src="([^"]+)" alt="(.*?)(?:\|.*?|\(.*?|")>.*?h3><a href="(.*?)".*?class="year">(.*?)<\/span>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedthumbnail, scrapedtitle, scrapedurl, scrapedyear in matches: url = scrapedurl title = scrapertools.decodeHtmlentities(scrapedtitle) thumbnail = scrapedthumbnail plot = '' year = scrapedyear itemlist.append( Item(channel=item.channel, action="idioma" , title=title , fulltitle=title, url=url, thumbnail=thumbnail, plot=plot, contentSerieName=title, infoLabels={'year':year})) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb = True) itemlist = fail_tmdb(itemlist) #Paginacion patron = "<a rel='nofollow' class=previouspostslink' href='([^']+)'>Siguiente ›</a>" next_page_url = scrapertools.find_single_match(data,"<a rel='nofollow' class=previouspostslink' href='([^']+)'>Siguiente ›</a>") if next_page_url!="": item.url=next_page_url import inspect itemlist.append(Item(channel = item.channel,action = "busqueda",title = ">> Página siguiente", url = next_page_url, thumbnail='https://s32.postimg.org/4zppxf5j9/siguiente.png')) return itemlist
def peliculas(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data patron = '<div class="home_post_cont.*? post_box">.*?<a href="([^"]+)".*?src="([^"]+)".*?title="(.*?) \((.*?)\)".*?p>([^&]+)<' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl,scrapedthumbnail,scrapedtitle, scrapedyear, scrapedplot in matches: url = urlparse.urljoin(item.url,scrapedurl) contentTitle = scrapedtitle title = scrapedtitle+' ('+scrapedyear+')' thumbnail = scrapedthumbnail plot = scrapedplot year = scrapedyear itemlist.append( Item(channel=item.channel, action="findvideos" , title=title , url=url, thumbnail=thumbnail, plot=plot, fanart='https://s31.postimg.org/puxmvsi7v/cinecalidad.png', contentTitle = contentTitle, infoLabels={'year':year} )) try: patron = "<link rel='next' href='([^']+)' />" next_page = re.compile(patron,re.DOTALL).findall(data) itemlist.append( Item(channel=item.channel, action="peliculas", title="Página siguiente >>" , url=next_page[0], fanart='https://s31.postimg.org/puxmvsi7v/cinecalidad.png') ) except: pass tmdb.set_infoLabels_itemlist(itemlist, seekTmdb = True) return itemlist
def ultimos(item): logger.info("pelisalacarta.channels.cinefox ultimos") item.text_color = color2 itemlist = [] data = scrapertools.downloadpage(item.url) bloque = scrapertools.find_multiple_matches(data, ' <div class="media-card "(.*?)<div class="info-availability ' 'one-line">') for match in bloque: patron = '<div class="audio-info">(.*?)<img class.*?src="([^"]+)".*?href="([^"]+)">([^<]+)</a>' matches = scrapertools.find_multiple_matches(match, patron) for idiomas, scrapedthumbnail, scrapedurl, scrapedtitle in matches: show = re.sub(r'(\s*[\d]+x[\d]+\s*)', '', scrapedtitle) audios = [] if "medium-es" in idiomas: audios.append('CAST') if "medium-vs" in idiomas: audios.append('VOSE') if "medium-la" in idiomas: audios.append('LAT') if "medium-en" in idiomas: audios.append('V.O') title = show + " - " + re.sub(show, '', scrapedtitle) + " [" + "/".join(audios) + "]" url = urlparse.urljoin(host, scrapedurl) itemlist.append(item.clone(action="menu_info_episode", title=title, url=url, thumbnail=scrapedthumbnail, contentTitle=show, fulltitle=show, show=show, context="25")) try: from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) except: pass next_page = scrapertools.find_single_match(data, 'href="([^"]+)"[^>]+>Siguiente') if next_page != "": itemlist.append(item.clone(action="ultimos", title=">> Siguiente", url=next_page, text_color=color3)) return itemlist
def series(item): logger.info("pelisalacarta.channels.cinefox series") itemlist = [] if "valores" in item: itemlist.append(item.clone(action="", title=item.valores, text_color=color4)) data = scrapertools.downloadpage(item.url) bloque = scrapertools.find_multiple_matches(data, ' <div class="media-card "(.*?)<div class="info-availability ' 'one-line">') for match in bloque: patron = '<div class="audio-info">.*?<img class.*?src="([^"]+)".*?href="([^"]+)">([^<]+)</a>' matches = scrapertools.find_multiple_matches(match, patron) for scrapedthumbnail, scrapedurl, scrapedtitle in matches: url = urlparse.urljoin(host, scrapedurl + "/episodios") itemlist.append(Item(channel=item.channel, action="episodios", title=scrapedtitle, url=url, thumbnail=scrapedthumbnail, contentTitle=scrapedtitle, fulltitle=scrapedtitle, show=scrapedtitle, text_color=color2, context="25")) try: from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) except: pass next_page = scrapertools.find_single_match(data, 'href="([^"]+)"[^>]+>Siguiente') if next_page != "": itemlist.append(Item(channel=item.channel, action="series", title=">> Siguiente", url=next_page, thumbnail=item.thumbnail, text_color=color3)) return itemlist
def temporadas(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data url_base= item.url patron = '<a href="javascript:.*?;" class="lccn"><b>([^<]+)<\/b><\/a>' matches = re.compile(patron,re.DOTALL).findall(data) infoLabels=item.infoLabels temp=1 if matches: for scrapedtitle in matches: url = url_base title = scrapedtitle thumbnail = item.thumbnail plot = item.plot contentSeasonNumber=str(temp) infoLabels['season']=contentSeasonNumber fanart = scrapertools.find_single_match(data,'<img src="([^"]+)"/>.*?</a>') itemlist.append( Item(channel=item.channel, action="episodiosxtemp" , title=title , fulltitle=item.title, url=url, thumbnail=thumbnail, plot=plot, fanart = fanart, contentSeasonNumber=contentSeasonNumber, contentSerieName =item.contentSerieName, infoLabels=infoLabels)) temp = temp+1 if config.get_library_support() and len(itemlist) > 0: itemlist.append(Item(channel=item.channel, title='[COLOR yellow]Añadir esta serie a la biblioteca[/COLOR]', url=item.url, action="add_serie_to_library", extra="episodios", contentSerieName=item.contentSerieName )) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb = True) return itemlist else: item.title ='' item.modo = 'unico' return episodiosxtemp(item)
def temporadas(item): logger.info() itemlist = [] templist = [] data = httptools.downloadpage(item.url).data realplot = '' patron = "<button class='classnamer' onclick='javascript: mostrarcapitulos.*?blank'>([^<]+)</button>" matches = re.compile(patron,re.DOTALL).findall(data) serieid = scrapertools.find_single_match(data,"<link rel='shortlink' href='http:\/\/mundoflv.com\/\?p=([^']+)' \/>") item.thumbnail = item.thumbvid infoLabels=item.infoLabels for scrapedtitle in matches: url = 'http://mundoflv.com/wp-content/themes/wpRafael/includes/capitulos.php?serie='+serieid+'&temporada=' + scrapedtitle title = 'Temporada '+ scrapertools.decodeHtmlentities(scrapedtitle) contentSeasonNumber = scrapedtitle thumbnail = item.thumbnail realplot = scrapertools.find_single_match(data, '\/><\/a>([^*]+)<p><\/p>.*') plot ='' fanart = '' itemlist.append( Item(channel=item.channel, action="episodiosxtemp" , title=title , fulltitle=item.title, url=url, thumbnail=thumbnail, plot=plot, fanart = fanart, extra1=item.extra1, contentSerieName=item.contentSerieName, contentSeasonNumber = contentSeasonNumber, infoLabels = {'season':contentSeasonNumber})) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb = True) if config.get_library_support() and len(itemlist) > 0: itemlist.append(Item(channel=item.channel, title='[COLOR yellow]Añadir esta serie a la biblioteca[/COLOR]', url=item.url, action="add_serie_to_library", extra="episodios", contentSerieName=item.contentSerieName, extra1 = item.extra1)) return itemlist
def episodiosxtemp(item): logger.info() itemlist =[] data = httptools.downloadpage(item.url).data data = re.sub(r'"|\n|\r|\t| |<br>|\s{2,}', "", data) patron = 'class=numerando>(.*?)x(.*?)<\/div><div class=episodiotitle><a href=(.*?)>(.*?)<\/a><span class=date>.*?' matches = re.compile(patron,re.DOTALL).findall(data) infoLabels = item.infoLabels for scrapedtemp, scrapedep, scrapedurl, scrapedtitle in matches: url = host+scrapedurl contentEpisodeNumber = scrapedep.strip(' ') temp = scrapedtemp.strip(' ') title = item.contentSerieName+' '+temp+'x'+contentEpisodeNumber+' '+scrapedtitle thumbnail = item.thumbnail plot = item.plot fanart=item.fanart infoLabels['episode']=contentEpisodeNumber logger.debug('Nombre: '+item.contentSerieName) infoLabels = item.infoLabels if item.extra1 == 'library': itemlist.append( Item(channel=item.channel, action="findvideos" , title=title, fulltitle = item.fulltitle, url=url, thumbnail=item.thumbnail, plot=plot, contentSerieName = item.contentSerieName, contentSeasonNumber = item.contentSeasonNumber, infoLabels=infoLabels )) elif temp == item.contentSeasonNumber: itemlist.append( Item(channel=item.channel, action="findvideos" , title=title, fulltitle = item.fulltitle, url=url, thumbnail=item.thumbnail, plot=plot, contentSerieName = item.contentSerieName, contentSeasonNumber = item.contentSeasonNumber, infoLabels=infoLabels )) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb= True) return itemlist
def listado(item): logger.info() itemlist = list() item.infoLabels['mediatype'] = "movie" if "Estrenos" in item.title: bloque_head = "ESTRENOS CARTELERA" elif "Series" in item.title: bloque_head = "RECIENTE SERIES" item.infoLabels['mediatype'] = "tvshow" else: bloque_head = "RECIENTE PELICULAS" # Descarga la página data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t| |\s{2}", "", data) # Extrae las entradas (carpetas) bloque = scrapertools.find_single_match(data, bloque_head+'\s*</h2>(.*?)</section>') patron = '<div class="".*?href="([^"]+)".*?src="([^"]+)".*?<div class="calZG">(.*?)</div>' \ '(.*?)</div>.*?href.*?>(.*?)</a>' matches = scrapertools.find_multiple_matches(bloque, patron) for scrapedurl, scrapedthumbnail, calidad, idiomas, scrapedtitle in matches: title = scrapedtitle langs = [] if 'idio idi1' in idiomas: langs.append("VOS") if 'idio idi2' in idiomas: langs.append("LAT") if 'idio idi4' in idiomas: langs.append("ESP") if langs: title += " [%s]" % "/".join(langs) if calidad: title += " %s" % calidad filtro_thumb = scrapedthumbnail.replace("http://image.tmdb.org/t/p/w342", "") filtro_list = {"poster_path": filtro_thumb} filtro_list = filtro_list.items() if item.contentType == "tvshow": new_item = item.clone(action="episodios", title=title, url=scrapedurl, thumbnail=scrapedthumbnail, fulltitle=scrapedtitle, infoLabels={'filtro': filtro_list}, contentTitle=scrapedtitle, context="buscar_trailer", text_color=color1, show=scrapedtitle, text_blod=False) else: new_item = item.clone(action="findvideos", title=title, url=scrapedurl, thumbnail=scrapedthumbnail, fulltitle=scrapedtitle, infoLabels={'filtro': filtro_list}, text_blod=False, contentTitle=scrapedtitle, context="buscar_trailer", text_color=color1) itemlist.append(new_item) if item.action == "listado": try: tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) except: pass return itemlist
def lista (item): logger.info () itemlist = [] data = httptools.downloadpage(item.url).data if item.extra != 'letras': patron = '<li class="TPostMv">.*?<a href="(.*?)"><div class="Image">.*?src="(.*?)\?resize=.*?".*?class="Title">(.*?)<\/h2>.*?' patron +='<span class="Year">(.*?)<\/span>.*?<span class="Qlty">(.*?)<\/span><\/p><div class="Description"><p>(.*?)<\/p>' else: patron ='<td class="MvTbImg"> <a href="(.*?)".*?src="(.*?)\?resize=.*?".*?<strong>(.*?)<\/strong> <\/a><\/td><td>(.*?)<\/td><td>.*?' patron +='class="Qlty">(.*?)<\/span><\/p><\/td><td>(.*?)<\/td><td>' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl, scrapedthumbnail, scrapedtitle, scrapedyear, calidad, scrapedplot in matches: url = scrapedurl thumbnail = scrapedthumbnail plot= scrapedplot contentTitle = scrapedtitle title = contentTitle+' ('+calidad+')' year = scrapedyear fanart ='' itemlist.append( Item(channel=item.channel, action='findvideos' , title=title, url=url, thumbnail=thumbnail, plot=plot, fanart=fanart, contentTitle = contentTitle, infoLabels ={'year':year})) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb =True) #Paginacion if itemlist !=[]: actual_page_url = item.url next_page = scrapertools.find_single_match(data,'<a class="nextpostslink" rel="next" href="(.*?)">') import inspect if next_page !='': itemlist.append(Item(channel = item.channel, action = "lista", title = 'Siguiente >>>', url = next_page, thumbnail='https://s32.postimg.org/4zppxf5j9/siguiente.png')) return itemlist
def listado(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data patron = '<td class="MvTbImg">.*?href="([^"]+)".*?src="([^"]+)".*?<strong>([^<]+)<.*?' \ '.*?<td>(\d+).*?class="Qlty">([^<]+)<' matches = scrapertools.find_multiple_matches(data, patron) if item.extra == "next": matches_ = matches[15:] else: matches_ = matches[:15] for scrapedurl, scrapedthumbnail, scrapedtitle, year, calidad in matches_: titulo = "%s [%s]" % (scrapedtitle, calidad) scrapedthumbnail = scrapedthumbnail.replace("-55x85", "") infolabels = {'year': year} itemlist.append(Item(channel=item.channel, action="findvideos", url=scrapedurl, title=titulo, contentTitle=scrapedtitle, infoLabels=infolabels, text_color=color2, thumbnail=scrapedthumbnail, contentType="movie", fulltitle=scrapedtitle)) tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) if not item.extra and len(matches) > 15: itemlist.append(item.clone(title=">> Página Siguiente", extra="next", text_color=color3)) elif item.extra == "next": next_page = scrapertools.find_single_match(data, '<a class="nextpostslink".*?href="([^"]+)"') if next_page: itemlist.append(item.clone(title=">> Página Siguiente", url=next_page, text_color=color3, extra="")) return itemlist
def temporadas(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data try: from core import tmdb tmdb.set_infoLabels_item(item, __modo_grafico__) except: pass matches = scrapertools.find_multiple_matches(data, '<a class="movie-season" data-id="([^"]+)"') matches = list(set(matches)) for season in matches: item.infoLabels['season'] = season itemlist.append(item.clone(action="episodios", title="Temporada "+season, context=["buscar_trailer"], contentType="season")) itemlist.sort(key=lambda item: item.title) try: from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) except: pass if not "trailer" in item.infoLabels: trailer_url = scrapertools.find_single_match(data, 'class="n-movie-trailer">([^<]+)</span>') item.infoLabels['trailer'] = trailer_url.replace("youtu.be/", "http://www.youtube.com/watch?v=") itemlist.append(item.clone(channel="trailertools", action="buscartrailer", title="Buscar Tráiler", text_color="magenta", context="")) return itemlist
def series(item): logger.info() itemlist = [] if "valores" in item: itemlist.append(item.clone(action="", title=item.valores, text_color=color4)) data = httptools.downloadpage(item.url).data bloque = scrapertools.find_multiple_matches(data, '<div class="media-card "(.*?)<div class="hidden-info">') for match in bloque: patron = '<img class.*?src="([^"]+)".*?href="([^"]+)">([^<]+)</a>' matches = scrapertools.find_multiple_matches(match, patron) for scrapedthumbnail, scrapedurl, scrapedtitle in matches: url = urlparse.urljoin(host, scrapedurl + "/episodios") itemlist.append(Item(channel=item.channel, action="episodios", title=scrapedtitle, url=url, thumbnail=scrapedthumbnail, contentTitle=scrapedtitle, fulltitle=scrapedtitle, show=scrapedtitle, text_color=color2, contentType="tvshow")) try: from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) except: pass next_page = scrapertools.find_single_match(data, 'href="([^"]+)"[^>]+>Siguiente') if next_page != "": title = ">> Siguiente - Página " + scrapertools.find_single_match(next_page, 'page=(\d+)') itemlist.append(Item(channel=item.channel, action="series", title=title, url=next_page, thumbnail=item.thumbnail, extra=item.extra, text_color=color3)) return itemlist
def episodios(item): logger.info() itemlist = [] #Rellena diccionarios idioma y calidad idiomas_videos, calidad_videos = dict_videos() data = httptools.downloadpage(item.url).data data = data.replace("\n", "").replace("\t", "") data = scrapertools.decodeHtmlentities(data) patron = '<li><a class="movie-episode"[^>]+season="'+str(item.infoLabels['season'])+'"[^>]+>([^<]+)</a></li>' matches = scrapertools.find_multiple_matches(data, patron) capitulos = [] for title in matches: if not title in capitulos: episode = int(title.split(" ")[1]) capitulos.append(title) itemlist.append(item.clone(action="findvideostv", title=title, contentEpisodeNumber=episode, contentType="episode")) itemlist.sort(key=lambda item: item.contentEpisodeNumber) try: from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) except: pass for item in itemlist: if item.infoLabels["episodio_titulo"]: item.title = "%dx%02d: %s"%(item.contentSeason, item.contentEpisodeNumber, item.infoLabels["episodio_titulo"]) else: item.title = "%dx%02d: %s"%(item.contentSeason, item.contentEpisodeNumber, item.title) return itemlist
def episodios(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data patron = '<span class="ico season_play"><\/span>([^<]+)<\/a>.<a href="([^"]+)" class="season-online enabled">' temporada = 'temporada/'+item.extra.strip(' ') matches = re.compile(patron,re.DOTALL).findall(data) infoLabels=item.infoLabels for scrapedtitle, scrapedurl in matches: if temporada in scrapedurl: url = scrapedurl contentSeasonNumber = re.findall(r'temporada.*?(\d+)',url) capitulo = re.findall(r'Capitulo \d+', scrapedtitle) contentEpisodeNumber = re.findall(r'\d+', capitulo[0]) contentEpisodeNumber = contentEpisodeNumber[0] infoLabels['episode']=contentEpisodeNumber title = contentSeasonNumber[0]+'x'+contentEpisodeNumber+' - '+scrapedtitle thumbnail = scrapertools.find_single_match(data,'<img src="([^"]+)" alt="" class="picture-movie">') plot = '' fanart = '' itemlist.append( Item(channel=item.channel, action="findvideos" , title=title , fulltitle=item.title, url=url, thumbnail=thumbnail, plot=plot, fanart = fanart, extra=scrapedtitle, contentSeasonNumber = item.contentSeasonNumber, infoLabels = infoLabels)) if item.extra != 'temporadas': tmdb.set_infoLabels_itemlist(itemlist, seekTmdb = True) itemlist = fail_tmdb(itemlist) return itemlist
def temporadas(item): logger.info() itemlist = [] templist =[] data = httptools.downloadpage(item.url).data patron = '<span class="ico accordion_down"><\/span>Temporada([^<]+)' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedtitle in matches: infoLabels=item.infoLabels url = item.url title = 'Temporada '+scrapedtitle.strip(' \r\n') thumbnail = scrapertools.find_single_match(data,'<img src="([^"]+)" alt="" class="picture-movie">') plot = scrapertools.find_single_match(data,'<span>Sinopsis:<\/span>.([^<]+).<span class="text-detail-hide"><\/span>') fanart = scrapertools.find_single_match(data,'<img src="([^"]+)"/>.*?</a>') contentSeasonNumber = scrapedtitle.strip(' \r\n') itemlist.append( Item(channel=item.channel, action="episodios" , title=title, fulltitle=item.title, url=url, thumbnail=thumbnail, plot=plot, fanart = fanart, extra=scrapedtitle.rstrip('\n'), contentSerieName =item.contentSerieName, contentSeasonNumber = contentSeasonNumber, infoLabels={'season':contentSeasonNumber})) if item.extra == 'temporadas': for tempitem in itemlist: templist += episodios(tempitem) else: tmdb.set_infoLabels_itemlist(itemlist, seekTmdb = True) if config.get_library_support() and len(itemlist) > 0: itemlist.append(Item(channel=item.channel, title='[COLOR yellow]Añadir esta serie a la biblioteca[/COLOR]', url=item.url, action="add_serie_to_library", extra="temporadas", contentSerieName=item.contentSerieName, contentSeasonNumber=contentSeasonNumber)) if item.extra == 'temporadas': return templist else: return itemlist
def lista (item): logger.info () itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r'"|\n|\r|\t| |<br>|\s{2,}', "", data) patron = '<div class=item><a href=(.*?) title=(.*?)\(.*?\)><div class=img><img src=(.*?) alt=.*?' patron +='<span class=player><\/span><span class=year>(.*?)<\/span><span class=calidad>(.*?)<\/span><\/div>' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedyear, calidad in matches: url = scrapedurl thumbnail = scrapedthumbnail plot= '' contentTitle = scrapedtitle title = contentTitle+' ('+calidad+')' year = scrapedyear fanart ='' itemlist.append( Item(channel=item.channel, action='findvideos' , title=title, url=url, thumbnail=thumbnail, plot=plot, fanart=fanart, contentTitle = contentTitle, infoLabels ={'year':year})) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb =True) #Paginacion if itemlist !=[]: actual_page_url = item.url next_page = scrapertools.find_single_match(data,'<link rel=next href=(.*?) \/>') import inspect if next_page !='': itemlist.append(Item(channel = item.channel, action = "lista", title = 'Siguiente >>>', url = next_page, thumbnail='https://s32.postimg.org/4zppxf5j9/siguiente.png')) return itemlist
def lista(item): logger.info() itemlist = [] if 'series/' in item.extra: accion = 'temporadas' tipo = 'tvshow' else: accion = 'findvideos' tipo = 'movie' data = httptools.downloadpage(item.url).data if item.title != 'Buscar': patron ='<img.*?width="147" heigh="197".*?src="([^"]+)".*?>.*?.<i class="icon online-play"><\/i>.*?.<h2 class="title title-.*?">.*?.<a href="([^"]+)" title="([^"]+)">.*?>' actual = scrapertools.find_single_match(data,'<a href="http:\/\/www.pelisplus.tv\/.*?\/pag-([^p]+)pag-2" class="page bicon last"><<\/a>') else: patron = '<img data-original="([^"]+)".*?width="147" heigh="197".*?src=.*?>.*?\n<i class="icon online-play"><\/i>.*?\n<h2 class="title title-.*?">.*?\n<a href="([^"]+)" title="([^"]+)">.*?>' actual = '' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedthumbnail, scrapedurl, scrapedtitle in matches: url = scrapedurl title = scrapertools.decodeHtmlentities(scrapedtitle) thumbnail = scrapedthumbnail filtro_thumb = scrapedthumbnail.replace("https://image.tmdb.org/t/p/w154", "") filtro_list = {"poster_path": filtro_thumb} #Nombre del campo a filtrar y valor en los resultados de la api de tmdb filtro_list = filtro_list.items() if item.title != 'Buscar': itemlist.append(Item(channel=item.channel,contentType=tipo, action=accion, title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, fulltitle=scrapedtitle, infoLabels={'filtro': filtro_list}, contentTitle =scrapedtitle, contentSerieName= scrapedtitle, extra = item.extra)) else: item.extra = item.extra.rstrip('s/') if item.extra in url: itemlist.append(Item(channel=item.channel,contentType=tipo, action=accion, title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, fulltitle=scrapedtitle, infoLabels={'filtro': filtro_list}, contentTitle =scrapedtitle, contentSerieName= scrapedtitle, extra = item.extra)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb = True) # Encuentra los elementos que no tienen plot y carga las paginas correspondientes para obtenerlo# for item in itemlist: if item.infoLabels['plot']=='': data = httptools.downloadpage(item.url).data item.fanart = scrapertools.find_single_match(data,'meta property="og:image" content="([^"]+)" \/>') item.plot =scrapertools.find_single_match(data,'<span>Sinopsis:<\/span>.([^<]+)<span class="text-detail-hide"><\/span>.<\/p>') #Paginacion if item.title != 'Buscar' and actual !='': if itemlist !=[]: next_page = str(int(actual)+1) next_page_url = host+item.extra+'pag-'+next_page import inspect itemlist.append(Item(channel = item.channel, action = "lista", title = 'Siguiente >>>', url = next_page_url, thumbnail='https://s32.postimg.org/4zppxf5j9/siguiente.png',extra=item.extra)) return itemlist
def busqueda(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r'"|\n|\r|\t| |<br>|\s{2,}', "", data) patron = '<li class=s-item><div class=s-img><img class=imx style=margin-top:0px; src=(.*?) alt=(.*?)><span><\/span><\/div><div class=s-box>.*?' patron +='<h3><a href=(.*?)>.*?<\/a><\/h3><span class=year>(.*?)<\/span><p>(.*?)<\/p>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedthumbnail, scrapedtitle, scrapedurl, scrapedyear, scrapedplot in matches: url = scrapedurl title = scrapertools.decodeHtmlentities(scrapedtitle) thumbnail = scrapedthumbnail plot = scrapedplot year = scrapedyear itemlist.append( Item(channel=item.channel, action="findvideos" , title=title , fulltitle=title, url=url, thumbnail=thumbnail, plot=plot, contentSerieName=title, infoLabels={'year':year})) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb = True) #Paginacion if itemlist !=[]: actual_page_url = item.url next_page = scrapertools.find_single_match(data,'<link rel=next href=(.*?) \/>') import inspect if next_page !='': itemlist.append(Item(channel = item.channel, action = "busqueda", title = 'Siguiente >>>', url = next_page, thumbnail='https://s32.postimg.org/4zppxf5j9/siguiente.png')) return itemlist
def lista (item): logger.info () itemlist = [] if item.extra == 'buscar': data = httptools.downloadpage(host+'/index.php?'+'categoria=0&keysrc='+item.text).data else: data = httptools.downloadpage(item.url).data data = re.sub(r'"|\n|\r|\t| |<br>|\s{2,}', "", data) if item.extra == 'masvistas': patron ='<div class=bloquecenmarcado><a title=.*? target=_blank href=(.*?) class=game><img src=(.*?) alt=(.*?) title= class=bloquecenimg \/>.*?<strong>(.*?)<\/strong>' else: patron = '<div class=fotonoticia><a.*?target=_blank href=(.*?)><img src=(.*?) alt=(.*?) \/>.*?class=textonoticia>.*?\/><br \/>(.*?)<\/div>' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl, scrapedthumbnail, scrapedtitle, scrapedplot in matches: url = host+scrapedurl thumbnail = host+scrapedthumbnail plot= scrapertools.htmlclean(scrapedplot) plot = plot.decode('iso8859-1').encode('utf-8') contentTitle = scrapedtitle title = contentTitle title = title.decode('iso8859-1').encode('utf-8') fanart ='' itemlist.append( Item(channel=item.channel, action='findvideos' , title=title, url=url, thumbnail=thumbnail, plot=plot, fanart=fanart, contentTitle = contentTitle)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb =True) #Paginacion if itemlist !=[]: actual_page_url = item.url next_page = scrapertools.find_single_match(data,'class=current>.*?<\/span><a href=(.*?)>.*?<\/a>') import inspect if next_page !='' and item.extra != 'masvistas': itemlist.append(Item(channel = item.channel, action = "lista", title = 'Siguiente >>>', url = host+next_page, thumbnail='https://s32.postimg.org/4zppxf5j9/siguiente.png')) return itemlist
def lista (item): logger.info () itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r'"|\n|\r|\t| |<br>|\s{2,}', "", data) #logger.debug(data) #return patron = '<div id=mt-1830 class=item><a href=(.*?)><div class=image><img src=(.*?) alt=(.*?) width=.*? height=.*?class=player>.*?class=ttx>(.*?)<div class=degradado>.*?class=year>(.*?)<\/span><\/div><\/div>' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl, scrapedthumbnail, scrapedtitle, scrapedplot, scrapedyear in matches: url = host+scrapedurl thumbnail = scrapedthumbnail plot= scrapedplot scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) title = scrapedtitle+' '+scrapedyear fanart = '' itemlist.append( Item(channel=item.channel, action='temporadas' , title=scrapedtitle , url=url, thumbnail=thumbnail, plot=plot, fanart=fanart, contentSerieName = scrapedtitle, contentYear = scrapedyear, infoLabels={'year':scrapedyear})) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb= True) #Paginacion if itemlist !=[]: actual_page_url = item.url next_page = scrapertools.find_single_match(data,'<div class=pag_b><a href=(.*?) >Siguiente<\/a><\/div>') import inspect if next_page !='': itemlist.append(Item(channel = item.channel, action = "lista", title = 'Siguiente >>>', url = item.url+next_page, thumbnail='https://s32.postimg.org/4zppxf5j9/siguiente.png')) return itemlist
def temporadas(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r'"|\n|\r|\t| |<br>|\s{2,}', "", data) #logger.debug(data) #return patron = '<span class=se-t.*?>(.*?)<\/span>' matches = re.compile(patron,re.DOTALL).findall(data) infoLabels = item.infoLabels for scrapedtitle in matches: contentSeasonNumber = scrapedtitle.strip('') title = item.contentSerieName+' Temporada '+scrapedtitle thumbnail = item.thumbnail plot = item.plot fanart = item.fanart infoLabels['season']=contentSeasonNumber itemlist.append( Item(channel=item.channel, action= 'episodiosxtemp' , url= item.url, title=title ,contentSerieName = item.contentSerieName, thumbnail=thumbnail, plot=plot, fanart = fanart, contentSeasonNumber = contentSeasonNumber, infoLabels=item.infoLabels)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb= True) if config.get_library_support() and len(itemlist) > 0: itemlist.append(Item(channel=item.channel, title='[COLOR yellow]Añadir esta serie a la biblioteca[/COLOR]', url=item.url, action="add_serie_to_library", extra="episodiosxtemp", contentSerieName = item.contentSerieName, contentYear=item.contentYear, extra1='library')) return itemlist
def scraper(item): logger.info("pelisalacarta.borrachodetorrent scraper") itemlist=[] data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}| ","",data) if item.contentType=="movie": patron =scrapertools.find_multiple_matches(data, '<a id="busca_a" class="busca_a" href="([^"]+)">.*?<img src="([^"]+)".*?</b></b>([^"]+)</span>.*?<span class="tt">([^"]+)</span>.*?<span class="year_SKA">([^"]+)</span>.*?<span class="calidad2">([^"]+)</span>') for url,thumb,rating,title,year,calidad in patron: titulo = "[COLOR teal]"+title+"[/COLOR]"+ " " + "[COLOR floralwhite]"+calidad+"[/COLOR]" + " "+ "[COLOR cyan]"+rating+"[/COLOR]" title= re.sub(r"!|¡","",title) title= re.sub(r"’|PRE-Estreno","'",title) new_item= item.clone(action="findvideos", title=titulo, url=url, thumbnail=thumb,fulltitle=title,contentTitle=title, contentType="movie",extra=year,library=True) new_item.infoLabels['year'] = year new_item.infoLabels['rating'] = rating itemlist.append(new_item) else: data= re.sub(r'×','x',data) patron =scrapertools.find_multiple_matches(data, 'id="busca_a" class="busca_a" href="([^"]+)">.*?<img src="([^"]+)".*?<span class="tt">([^"]+)</span>.*?<span class="calidad2">([^"]+)</span>') for url,thumb,title,calidad in patron: titulo = "[COLOR teal]"+title+"[/COLOR]"+ " " + "[COLOR floralwhite]"+calidad+"[/COLOR]" title =re.sub(r'\d+x\d+','',title) title= re.sub(r"’","'",title) filtro_thumb = thumb.replace("https://image.tmdb.org/t/p/w300", "") filtro_list = {"poster_path": filtro_thumb} filtro_list = filtro_list.items() new_item = item.clone(action="findvideos", title=titulo, url=url, thumbnail=thumb, fulltitle=title, infoLabels={'filtro': filtro_list}, contentTitle=title,show=title,contentType="tvshow",library=True) itemlist.append(new_item) ## Paginación next=scrapertools.find_single_match(data,"<div class='paginado'>.*?<a class='current'>.*?href='([^']+)'") if len(next)>0: url =next itemlist.append(item.clone( title="[COLOR dodgerblue][B]Siguiente >>[/B][/COLOR]", url=url)) try: from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) except: pass for item_tmdb in itemlist: logger.info(str(item_tmdb.infoLabels['tmdb_id'])) return itemlist
def episodios(item): logger.info("pelisalacarta.channels.cinefox episodios") itemlist = [] if item.extra == "ultimos": data = scrapertools.downloadpage(item.url, headers=headers.items()) item.url = scrapertools.find_single_match(data, '<a href="([^"]+)" class="h1-like media-title"') item.url += "/episodios" data = scrapertools.downloadpage(item.url, headers=headers.items()) data_season = data[:] headers["Referer"] = item.url if item.extra == "episodios" or not __menu_info__: action = "findvideos" else: action = "menu_info_episode" seasons = scrapertools.find_multiple_matches(data, '<a href="([^"]+)"[^>]+><span class="season-toggle') for i, url in enumerate(seasons): if i != 0: data_season = scrapertools.downloadpage(url, headers=headers.items()) patron = '<div class="ep-list-number">.*?href="([^"]+)">([^<]+)</a>.*?<span class="name">([^<]+)</span>' matches = scrapertools.find_multiple_matches(data_season, patron) for scrapedurl, episode, scrapedtitle in matches: item.contentSeason = episode.split("x")[0] item.contentEpisodeNumber = episode.split("x")[1] title = episode + " - " + scrapedtitle extra = "episode" if item.extra == "episodios": extra = "episode|" itemlist.append(item.clone(action=action, title=title, url=scrapedurl, text_color=color2, extra=extra, contentType="episode")) if item.extra != "episodios": try: from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) except: pass itemlist.reverse() if item.extra != "episodios": id = scrapertools.find_single_match(item.url, '/(\d+)/') data_trailer = scrapertools.downloadpage("http://www.cinefox.tv/media/trailer?idm=%s&mediaType=1" % id, headers=headers.items()) item.infoLabels["trailer"] = jsontools.load_json(data_trailer)["video"]["url"] itemlist.append(item.clone(channel="trailertools", action="buscartrailer", title="Buscar Tráiler", text_color="magenta")) if config.get_library_support(): itemlist.append(Item(channel=item.channel, action="add_serie_to_library", text_color=color5, title="Añadir serie a la biblioteca", show=item.show, thumbnail=item.thumbnail, url=item.url, fulltitle=item.fulltitle, fanart=item.fanart, extra="episodios")) return itemlist
def lista(item): logger.info() itemlist = [] max_items = 20 next_page_url = '' data = httptools.downloadpage(item.url).data data = re.sub(r'"|\n|\r|\t| |<br>|\s{2,}', "", data) if item.extra == 'recomendadas': patron = '<a href=(.*?)><div class=imgss><img src=(.*?) alt=(.*?)(?:–.*?|\(.*?|) width=120.*?icon-grade.*?' \ 'ttps>.*?ytps>(.*?)<\/span>' else: patron = '<div class=movie>.*?img src=(.*?) alt=(.*?)(?:–.*?|\(.*?|) width=.*?<a href=(.*?)>.*?<\/h2>.*?' \ '(?:year.)(.*?)<\/span>' matches = re.compile(patron, re.DOTALL).findall(data) if item.next_page != 'b': if len(matches) > max_items: next_page_url = item.url matches = matches [:max_items] next_page = 'b' else: matches = matches[max_items:] next_page = 'a' patron_next_page = '<div class=siguiente><a href=(.*?)\?' matches_next_page = re.compile(patron_next_page, re.DOTALL).findall(data) if len(matches_next_page) > 0: next_page_url = urlparse.urljoin(item.url, matches_next_page[0]) for scrapedthumbnail, scrapedtitle, scrapedurl, scrapedyear in matches: if item.extra == 'recomendadas': url = scrapedthumbnail title = scrapedurl thumbnail = scrapedtitle else: url = scrapedurl thumbnail = scrapedthumbnail title = scrapedtitle year = scrapedyear fanart ='' plot= '' if 'serie' not in url: itemlist.append( Item(channel=item.channel, action='findvideos' , title=title , url=url, thumbnail=thumbnail, plot=plot, fanart=fanart, contentTitle = title, infoLabels={'year':year})) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb = True) #Paginacion if next_page_url !='': itemlist.append(Item(channel = item.channel, action = "lista", title = 'Siguiente >>>', url = next_page_url, thumbnail='https://s32.postimg.org/4zppxf5j9/siguiente.png',extra=item.extra, next_page = next_page)) return itemlist
def scraper(item): logger.info("pelisalacarta.ver-pelis scraper") itemlist=[] url_next_page = "" global i data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}| ","",data) patron =scrapertools.find_multiple_matches(data, '<a class="thumb cluetip".*?href="([^"]+)".*?src="([^"]+)" alt="([^"]+)".*?"res">([^"]+)</span>') if len(patron)>20: if item.next_page != 20: url_next_page = item.url patron = patron [:20] next_page = 20 item.i=0 else: patron = patron[item.i:][:20] next_page = 20 url_next_page = item.url for url,thumb,title,cuality in patron: title= re.sub(r"Imagen","",title) title = ''.join((c for c in unicodedata.normalize('NFD',unicode(title.decode('utf-8'))) if unicodedata.category(c) != 'Mn')).encode("ascii", "ignore") titulo = "[COLOR floralwhite]"+title +"[/COLOR]"+" " + "[COLOR crimson][B]"+cuality+"[/B][/COLOR]" title= re.sub(r"!|\/.*","",title).strip() if item.extra!="search": item.i+=1 new_item= item.clone(action="findvideos", title=titulo, url=url, thumbnail=thumb,fulltitle=title,contentTitle=title, contentType="movie",library=True) new_item.infoLabels['year'] = get_year(url) itemlist.append(new_item) ## Paginación if url_next_page: itemlist.append(item.clone( title="[COLOR crimson]Siguiente >>[/COLOR]", url=url_next_page,next_page=next_page,thumbnail="http://imgur.com/w3OMy2f.png", i=item.i)) try: from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) for item in itemlist: if not "Siguiente >>" in item.title: if "0." in str(item.infoLabels['rating']): item.infoLabels['rating']="[COLOR indianred]Sin puntuacíon[/COLOR]" else: item.infoLabels['rating']="[COLOR orange]"+str(item.infoLabels['rating'])+"[/COLOR]" item.title= item.title+ " "+ str(item.infoLabels['rating']) except: pass for item_tmdb in itemlist: logger.info(str(item_tmdb.infoLabels['tmdb_id'])) return itemlist
def peliculas(item): logger.info("pelisalacarta.channels.oranline peliculas") itemlist = [] # Descarga la página data = scrapertools.downloadpage(item.url) # Extrae las entradas (carpetas) bloque = scrapertools.find_multiple_matches(data, '<li class="item">(.*?)</li>') for match in bloque: patron = 'href="([^"]+)".*?title="([^"]+)".*?src="([^"]+)".*?' \ 'div class="idiomas">(.*?)<div class="calidad">(.*?)</div>' matches = scrapertools.find_multiple_matches(match, patron) for scrapedurl, scrapedtitle, scrapedthumbnail, idiomas, calidad in matches: title = scrapedtitle + " [" if '<div class="esp">' in idiomas: title += "ESP/" if '<div class="lat">' in idiomas: title += "LAT/" if '<div class="ing">' in idiomas: title += "ING/" if '<div class="vos">' in idiomas: title += "VOS/" if title[-1:] != "[": title = title[:-1] + "]" else: title = title[:-1] if "span" in calidad: calidad = scrapertools.find_single_match(calidad, '<span[^>]+>([^<]+)<') title += " (" + calidad.strip() + ")" if DEBUG: logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format(title, scrapedurl, scrapedthumbnail)) filtro_thumb = scrapedthumbnail.replace("http://image.tmdb.org/t/p/w185", "") filtro_list = {"poster_path": filtro_thumb} filtro_list = filtro_list.items() new_item = item.clone(action="findvideos", title=title, url=scrapedurl, thumbnail=scrapedthumbnail, fulltitle=scrapedtitle, infoLabels={'filtro': filtro_list}, contentTitle=scrapedtitle, context="05", text_color=color1, viewmode="list") itemlist.append(new_item) try: tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) except: pass next_page = scrapertools.find_single_match(data, '<a href="([^"]+)"\s+><span [^>]+>»</span>') if next_page != "": itemlist.append(item.clone(action="peliculas", title=">> Página siguiente", url=next_page.replace("&", "&"), text_color=color3)) return itemlist
def lista(item): logger.info() itemlist = [] item.infoLabels = {} item.text_color = color2 data = httptools.downloadpage(item.url).data data = data.replace("\n", "").replace("\t", "") data = scrapertools.decodeHtmlentities(data) bloque = scrapertools.find_single_match(data, '<div class="movies-block-main"(.*?)<div class="movies-' 'long-pagination"') patron = '<div class="thumb"><img src="([^"]+)".*?<a href="([^"]+)".*?' \ '(?:class="n-movie-trailer">([^<]+)<\/span>|<div class="imdb-votes">)' \ '.*?<div class="imdb"><span>(.*?)</span>.*?<span>Year.*?">(.*?)</a>.*?<span>' \ '(?:Género|Genre).*?<span>(.*?)</span>.*?<span>Language.*?<span>(.*?)</span>.*?' \ '<div class="info-full-text".*?>(.*?)<.*?<div class="views">(.*?)<.*?' \ '<div class="movie-block-title".*?>(.*?)<' if bloque == "": bloque = data[:] matches = scrapertools.find_multiple_matches(bloque, patron) for thumbnail, url, trailer, vote, year, genre, idioma, sinopsis, calidad, title in matches: url = url.replace("#", "") + "&ajax=1" thumbnail = thumbnail.replace("/157/", "/400/").replace("/236/", "/600/").replace(" ", "%20") idioma = idioma.replace(" ", "").split(",") idioma.sort() titleidioma = "[" + "/".join(idioma) + "]" titulo = title+" "+titleidioma+" ["+calidad+"]" item.infoLabels['plot'] = sinopsis item.infoLabels['year'] = year item.infoLabels['genre'] = genre item.infoLabels['rating'] = vote item.infoLabels['trailer'] = trailer.replace("youtu.be/", "http://www.youtube.com/watch?v=") if item.extra != "tv" or "Series" not in genre: itemlist.append(item.clone(action="findvideos", title=titulo, fulltitle=title, url=url, thumbnail=thumbnail, context=["buscar_trailer"], contentTitle=title, contentType="movie")) else: itemlist.append(item.clone(action="temporadas", title=titulo, fulltitle=title, url=url, thumbnail=thumbnail, context=["buscar_trailer"], contentTitle=title, show=title, contentType="tvshow")) try: from core import tmdb # Obtenemos los datos basicos de todas las peliculas mediante multihilos tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) except: pass # Paginacion next_page = scrapertools.find_single_match(data, 'class="pagination-active".*?href="([^"]+)"') if next_page != "": url = next_page.replace("#", "") + "&ajax=1" itemlist.append(item.clone(action="lista", title=">> Siguiente", url=url, text_color=color3)) return itemlist
def episodios(item): logger.info("pelisalacarta.channels.vixto episodios") itemlist = list() # Descarga la página data = scrapertools.downloadpage(item.url) data = re.sub(r"\n|\r|\t| |\s{2}", "", data) # Extrae las entradas (carpetas) bloque = scrapertools.find_single_match(data, '<strong>Temporada:(.*?)</div>') matches = scrapertools.find_multiple_matches(bloque, 'href="([^"]+)">(.*?)</a>') for scrapedurl, scrapedtitle in matches: title = "Temporada %s" % scrapedtitle new_item = item.clone(action="", title=title, text_color=color2) new_item.infoLabels["season"] = scrapedtitle new_item.infoLabels["mediatype"] = "season" data_season = scrapertools.downloadpage(scrapedurl) data_season = re.sub(r"\n|\r|\t| |\s{2}", "", data_season) patron = '<li class="media">.*?href="([^"]+)"(.*?)<div class="media-body">.*?href.*?>' \ '(.*?)</a>' matches = scrapertools.find_multiple_matches(data_season, patron) elementos = [] for url, status, title in matches: if not "Enlaces Disponibles" in status: continue elementos.append(title) item_epi = item.clone(action="findvideos", url=url, text_color=color1) item_epi.infoLabels["season"] = scrapedtitle episode = scrapertools.find_single_match(title, 'Capitulo (\d+)') titulo = scrapertools.find_single_match(title, 'Capitulo \d+\s*-\s*(.*?)$') item_epi.infoLabels["episode"] = episode item_epi.infoLabels["mediatype"] = "episode" item_epi.title = "%sx%s %s" % (scrapedtitle, episode.zfill(2), titulo) itemlist.insert(0, item_epi) if elementos: itemlist.insert(0, new_item) if item.infoLabels["tmdb_id"] and itemlist: try: tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) except: pass if itemlist: if config.get_library_support(): itemlist.append(Item(channel=item.channel, title="Añadir serie a la biblioteca", text_color="green", filtro=True, action="add_serie_to_library", fulltitle=item.fulltitle, extra="episodios", url=item.url, infoLabels=item.infoLabels, show=item.show)) else: itemlist.append(item.clone(title="Serie sin episodios disponibles", action="", text_color=color3)) return itemlist
def episodios(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data data = data.replace("\n", "").replace("\t", "") plot = scrapertools.find_single_match(data, '<p><p>(.*?)</p>') item.plot = scrapertools.htmlclean(plot) bloque = scrapertools.find_multiple_matches(data, '<td data-th="Temporada"(.*?)</div>') for match in bloque: matches = scrapertools.find_multiple_matches(match, '.*?href="([^"]+)".*?title="([^"]+)"') for scrapedurl, scrapedtitle in matches: try: season, episode = scrapertools.find_single_match(scrapedtitle, '(\d+)(?:×|x)(\d+)') item.infoLabels['season'] = season item.infoLabels['episode'] = episode contentType = "episode" except: try: episode = scrapertools.find_single_match(scrapedtitle, '(?i)(?:Capitulo|Capítulo|Episodio)\s*(\d+)') item.infoLabels['season'] = "1" item.infoLabels['episode'] = episode contentType = "episode" except: contentType = "tvshow" scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) + " " scrapedtitle = scrapedtitle.replace('Temporada', '') if "ES.png" in match: scrapedtitle += "[CAST]" if "SUB.png" in match: scrapedtitle += "[VOSE]" if "LA.png" in match: scrapedtitle += "[LAT]" if "EN.png" in match: scrapedtitle += "[V.O]" itemlist.append(item.clone(action="findvideos", title=scrapedtitle, url=scrapedurl, fulltitle=scrapedtitle, contentType=contentType)) itemlist.reverse() if itemlist and item.extra != "episodios": try: from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) except: pass itemlist.append(item.clone(channel="trailertools", title="Buscar Tráiler", action="buscartrailer", context="", text_color="magenta")) if item.category != "" and config.get_library_support(): itemlist.append(Item(channel=item.channel, title="Añadir esta temporada a la biblioteca", url=item.url, action="add_serie_to_library", extra="episodios", text_color="green", show=item.show)) return itemlist
def episodios(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t| |<br>", "", data) patron = '<td class="MvTbImg B"><a href="([^"]+)".*?' # url patron += '<td class="MvTbTtl"><a href="https://cine24h.net/episode/(.*?)/">([^<]+)</a>' # title de episodios matches = scrapertools.find_multiple_matches(data, patron) for scrapedurl, scrapedtitle, scrapedname in matches: scrapedtitle = scrapedtitle.replace('--', '0') patron = '(\d+)x(\d+)' match = re.compile(patron, re.DOTALL).findall(scrapedtitle) season, episode = match[0] if 'season' in item.infoLabels and int( item.infoLabels['season']) != int(season): continue title = "%sx%s: %s" % (season, episode.zfill(2), scrapedname) new_item = item.clone(title=title, url=scrapedurl, action="findvideos", text_color=color3, fulltitle=title, contentType="episode") if 'infoLabels' not in new_item: new_item.infoLabels = {} new_item.infoLabels['season'] = season new_item.infoLabels['episode'] = episode.zfill(2) itemlist.append(new_item) # TODO no hacer esto si estamos añadiendo a la videoteca if not item.extra: # Obtenemos los datos de todos los capitulos de la temporada mediante multihilos tmdb.set_infoLabels(itemlist, __modo_grafico__) for i in itemlist: if i.infoLabels['title']: # Si el capitulo tiene nombre propio añadirselo al titulo del item i.title = "%sx%s %s" % (i.infoLabels['season'], i.infoLabels['episode'], i.infoLabels['title']) if i.infoLabels.has_key('poster_path'): # Si el capitulo tiene imagen propia remplazar al poster i.thumbnail = i.infoLabels['poster_path'] itemlist.sort(key=lambda it: int(it.infoLabels['episode']), reverse=config.get_setting('orden_episodios', __channel__)) tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) # Opción "Añadir esta serie a la videoteca" if config.get_videolibrary_support() and len(itemlist) > 0: itemlist.append( Item(channel=__channel__, title="Añadir esta serie a la videoteca", url=item.url, action="add_serie_to_library", extra="episodios", show=item.show, category="Series", text_color=color1, thumbnail=thumbnail_host, fanart=fanart_host)) return itemlist
def episodios(item): logger.info() itemlist = [] infoLabels = item.infoLabels data = re.sub(r"\n|\r|\t|\s{2,}", "", httptools.downloadpage(item.url).data) data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8") calidad = item.quality pattern = '<ul class="%s">(.*?)</ul>' % "pagination" # item.pattern pagination = scrapertools.find_single_match(data, pattern) if pagination: pattern = '<li><a href="([^"]+)">Last<\/a>' full_url = scrapertools.find_single_match(pagination, pattern) url, last_page = scrapertools.find_single_match( full_url, r'(.*?\/pg\/)(\d+)') list_pages = [item.url] for x in range(2, int(last_page) + 1): response = httptools.downloadpage('%s%s' % (url, x)) if response.sucess: list_pages.append("%s%s" % (url, x)) else: list_pages = [item.url] for index, page in enumerate(list_pages): data = re.sub(r"\n|\r|\t|\s{2,}", "", httptools.downloadpage(page).data) data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8") data = data.replace( "chapters", "buscar-list") #Compatibilidad con mispelisy.series.com pattern = '<ul class="%s">(.*?)</ul>' % "buscar-list" # item.pattern if scrapertools.find_single_match(data, pattern): data = scrapertools.get_match(data, pattern) else: logger.debug(item) logger.debug("data: " + data) return itemlist if "pelisyseries.com" in host: pattern = '<li[^>]*><div class.*?src="(?P<thumb>[^"]+)?".*?<a class.*?href="(?P<url>[^"]+).*?<h3[^>]+>(?P<info>.*?)?<\/h3>.*?<\/li>' else: pattern = '<li[^>]*><a href="(?P<url>[^"]+).*?<img.*?src="(?P<thumb>[^"]+)?".*?<h2[^>]+>(?P<info>.*?)?<\/h2>' matches = re.compile(pattern, re.DOTALL).findall(data) #logger.debug("patron: " + pattern) #logger.debug(matches) season = "1" for url, thumb, info in matches: if "pelisyseries.com" in host: interm = url url = thumb thumb = interm if "<span" in info: # new style pattern = ".*?[^>]+>.*?Temporada\s*(?P<season>\d+)?.*?Capitulo(?:s)?\s*(?P<episode>\d+)?" \ "(?:.*?(?P<episode2>\d+)?)<.+?<span[^>]+>(?P<lang>.*?)?<\/span>\s*Calidad\s*<span[^>]+>" \ "[\[]\s*(?P<quality>.*?)?\s*[\]]<\/span>" if "Especial" in info: # Capitulos Especiales pattern = ".*?[^>]+>.*?Temporada.*?\[.*?(?P<season>\d+).*?\].*?Capitulo.*?\[\s*(?P<episode>\d+).*?\]?(?:.*?(?P<episode2>\d+)?)<.+?<span[^>]+>(?P<lang>.*?)?<\/span>\s*Calidad\s*<span[^>]+>[\[]\s*(?P<quality>.*?)?\s*[\]]<\/span>" if not scrapertools.find_single_match( info, pattern ): #en caso de error de formato, creo uno básico logger.debug("patron episodioNEW: " + pattern) logger.debug(info) info = '><strong>%sTemporada %s Capitulo 0</strong> - <span >Español Castellano</span> Calidad <span >[%s]</span>' % ( item.contentTitle, season, item.infoLabels['quality']) r = re.compile(pattern) match = [m.groupdict() for m in r.finditer(info)][0] if match['season'] is None: match['season'] = season if match['episode'] is None: match['episode'] = "0" if match['quality']: item.quality = match['quality'] if match["episode2"]: multi = True title = "%s (%sx%s-%s) [%s]" % ( item.show, match["season"], str( match["episode"]).zfill(2), str( match["episode2"]).zfill(2), match["lang"]) if not config.get_setting("unify") and match[ "quality"]: #Si Titulos Inteligentes NO seleccionados: title = "%s[%s]" % (title, match["quality"]) else: multi = False title = "%s (%sx%s) [%s]" % ( item.show, match["season"], str( match["episode"]).zfill(2), match["lang"]) if not config.get_setting("unify") and match[ "quality"]: #Si Titulos Inteligentes NO seleccionados: title = "%s[%s]" % (title, match["quality"]) else: # old style if scrapertools.find_single_match(info, '\[\d{3}\]'): info = re.sub(r'\[(\d{3}\])', r'[Cap.\1', info) elif scrapertools.find_single_match(info, '\[Cap.\d{2}_\d{2}\]'): info = re.sub(r'\[Cap.(\d{2})_(\d{2})\]', r'[Cap.1\1_1\2]', info) elif scrapertools.find_single_match(info, '\[Cap.([A-Za-z]+)\]'): info = re.sub(r'\[Cap.([A-Za-z]+)\]', '[Cap.100]', info) if scrapertools.find_single_match(info, '\[Cap.\d{2,3}'): pattern = "\[(?P<quality>.*?)\].*?\[Cap.(?P<season>\d).*?(?P<episode>\d{2})(?:_(?P<season2>\d+)" \ "(?P<episode2>\d{2}))?.*?\].*?(?:\[(?P<lang>.*?)\])?" elif scrapertools.find_single_match(info, 'Cap.\d{2,3}'): pattern = ".*?Temp.*?\s(?P<quality>.*?)\s.*?Cap.(?P<season>\d).*?(?P<episode>\d{2})(?:_(?P<season2>\d+)(?P<episode2>\d{2}))?.*?\s(?P<lang>.*)?" if not scrapertools.find_single_match( info, pattern ): #en caso de error de formato, creo uno básico logger.debug("patron episodioOLD: " + pattern) logger.debug(info) info = '%s [%s][Cap.%s00][Español]' % ( item.contentTitle, item.infoLabels['quality'], season) r = re.compile(pattern) match = [m.groupdict() for m in r.finditer(info)][0] str_lang = "" if match['quality']: item.quality = match['quality'] if match["lang"] is not None: str_lang = "[%s]" % match["lang"] item.quality = "%s %s" % (item.quality, match['lang']) if match["season2"] and match["episode2"]: multi = True if match["season"] == match["season2"]: title = "%s (%sx%s-%s) %s" % ( item.show, match["season"], match["episode"], match["episode2"], str_lang) if not config.get_setting("unify") and match[ "quality"]: #Si Titulos Inteligentes NO seleccionados: title = "%s[%s]" % (title, match["quality"]) else: title = "%s (%sx%s-%sx%s) %s" % ( item.show, match["season"], match["episode"], match["season2"], match["episode2"], str_lang) if not config.get_setting("unify") and match[ "quality"]: #Si Titulos Inteligentes NO seleccionados: title = "%s[%s]" % (title, match["quality"]) else: title = "%s (%sx%s) %s" % (item.show, match["season"], match["episode"], str_lang) if not config.get_setting("unify") and match[ "quality"]: #Si Titulos Inteligentes NO seleccionados: title = "%s[%s]" % (title, match["quality"]) multi = False season = match['season'] episode = match['episode'] logger.debug("title: " + title + " / url: " + url + " / calidad: " + item.quality + " / multi: " + str(multi) + " / Season: " + str(season) + " / EpisodeNumber: " + str(episode)) itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=url, thumbnail=thumb, quality=item.quality, multi=multi, contentSeason=season, contentEpisodeNumber=episode, infoLabels=infoLabels)) # order list #tmdb.set_infoLabels(itemlist, True) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) if len(itemlist) > 1: itemlist = sorted( itemlist, key=lambda it: (int(it.contentSeason), int(it.contentEpisodeNumber))) if config.get_videolibrary_support() and len(itemlist) > 0: itemlist.append( item.clone(title="Añadir esta serie a la videoteca", action="add_serie_to_library", extra="episodios", quality=calidad)) return itemlist
def list_all(item): logger.info() itemlist = list() next = False # if item.title == "Mas Vistas": # post = {"action": "action_changue_post_by", "type": "#Views", "posttype": "series"} # matches = create_soup(item.url, post=post, headers={"referer": host}) # elif item.title == "Ultimas": # post = {"action": "action_changue_post_by", "type": "#Latest", "posttype": "series"} # matches = create_soup(item.url, post=post) # else: soup = create_soup(item.url) matches = soup.find( "ul", class_=re.compile(r"MovieList Rows AX A04 B03 C20 D03 E20 Alt")) if not matches: return itemlist matches = matches.find_all("article") first = item.first last = first + 25 if last >= len(matches): last = len(matches) next = True for elem in matches[first:last]: url = elem.a["href"] title = elem.find(["div", "ul"], class_="Title").text #thumb = elem.img["data-src"] thumb = elem.img["src"] itemlist.append( Item(channel=item.channel, url=url, title=title, thumbnail=thumb, action="seasons", contentSerieName=title, context=filtertools.context(item, list_language, list_quality))) tmdb.set_infoLabels_itemlist(itemlist, True) if not next: url_next_page = item.url first = last else: try: url_next_page = soup.find("div", class_="nav-links").find_all("a")[-1] if url_next_page.text: url_next_page = '' else: url_next_page = url_next_page["href"] except: return itemlist url_next_page = '%s' % url_next_page first = 0 if url_next_page and len(matches) > 26: itemlist.append( Item(channel=item.channel, title="Siguiente >>", url=url_next_page, action='list_all', first=first)) return itemlist
def peliculas(item): logger.info() itemlist = [] if "valores" in item and item.valores: itemlist.append( item.clone(action="", title=item.valores, text_color=color4)) action = "findvideos" data = httptools.downloadpage(item.url).data bloque = scrapertools.find_multiple_matches( data, '<div class="media-card "(.*?)<div class="hidden-info">') for match in bloque: if item.extra == "mapa": patron = '.*?src="([^"]+)".*?href="([^"]+)">([^<]+)</a>' matches = scrapertools.find_multiple_matches(match, patron) for scrapedthumbnail, scrapedurl, scrapedtitle in matches: url = urlparse.urljoin(host, scrapedurl) filter_thumb = scrapedthumbnail.replace( "https://image.tmdb.org/t/p/w200_and_h300_bestv2", "") filter_list = {"poster_path": filter_thumb} filter_list = filter_list.items() itemlist.append( Item(channel=item.channel, action=action, title=scrapedtitle, url=url, extra="media", thumbnail=scrapedthumbnail, contentTitle=scrapedtitle, fulltitle=scrapedtitle, text_color=color2, contentType="movie", infoLabels={'filtro': filter_list})) else: patron = '<div class="audio-info">(.*?)<div (class="quality.*?)src="([^"]+)".*?href="([^"]+)">([^<]+)</a>' matches = scrapertools.find_multiple_matches(match, patron) for idiomas, calidad, scrapedthumbnail, scrapedurl, scrapedtitle in matches: calidad = scrapertools.find_single_match( calidad, '<div class="quality-info".*?>([^<]+)</div>') if calidad: calidad = calidad.capitalize().replace("Hd", "HD") audios = [] if "medium-es" in idiomas: audios.append('CAST') if "medium-vs" in idiomas: audios.append('VOSE') if "medium-la" in idiomas: audios.append('LAT') if "medium-en" in idiomas or 'medium-"' in idiomas: audios.append('VO') title = "%s [%s]" % (scrapedtitle, "/".join(audios)) if calidad: title += " (%s)" % calidad url = urlparse.urljoin(host, scrapedurl) filter_thumb = scrapedthumbnail.replace( "https://image.tmdb.org/t/p/w200_and_h300_bestv2", "") filter_list = {"poster_path": filter_thumb} filter_list = filter_list.items() itemlist.append( Item(channel=item.channel, action=action, title=title, url=url, extra="media", thumbnail=scrapedthumbnail, contentTitle=scrapedtitle, fulltitle=scrapedtitle, text_color=color2, contentType="movie", quality=calidad, language=audios, infoLabels={'filtro': filter_list})) next_page = scrapertools.find_single_match( data, 'href="([^"]+)"[^>]+>Siguiente') tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) if next_page != "" and item.title != "": itemlist.append( Item(channel=item.channel, action="peliculas", title=">> Siguiente", url=next_page, thumbnail=item.thumbnail, extra=item.extra, text_color=color3)) if not config.get_setting("last_page", item.channel) and config.is_xbmc(): itemlist.append( Item(channel=item.channel, action="select_page", title="Ir a página...", url=next_page, thumbnail=item.thumbnail, text_color=color5)) return itemlist
def lista(item): logger.info() itemlist = [] if 'series/' in item.extra: accion = 'temporadas' tipo = 'tvshow' else: accion = 'findvideos' tipo = 'movie' data = httptools.downloadpage(item.url).data if item.title != 'Buscar': patron = '<img.*?width="147" heigh="197".*?src="([^"]+)".*?>.*?.<i class="icon online-play"><\/i>.*?.<h2 ' \ 'class="title title-.*?">.*?.<a href="([^"]+)" title="([^"]+)">.*?>' actual = scrapertools.find_single_match( data, '<a href="http:\/\/www.pelisplus.tv\/.*?\/pag-([^p]+)pag-2" ' 'class="page bicon last"><<\/a>') else: patron = '<img data-original="([^"]+)".*?width="147" heigh="197".*?src=.*?>.*?\n<i class="icon ' \ 'online-play"><\/i>.*?\n<h2 class="title title-.*?">.*?\n<a href="([^"]+)" title="([^"]+)">.*?>' actual = '' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedthumbnail, scrapedurl, scrapedtitle in matches: url = scrapedurl title = scrapertools.decodeHtmlentities(scrapedtitle) thumbnail = scrapedthumbnail filtro_thumb = scrapedthumbnail.replace( "https://image.tmdb.org/t/p/w154", "") filtro_list = { "poster_path": filtro_thumb } # Nombre del campo a filtrar y valor en los resultados de la api # de tmdb filtro_list = filtro_list.items() if item.title != 'Buscar': itemlist.append( Item(channel=item.channel, contentType=tipo, action=accion, title=title, url=scrapedurl, thumbnail=thumbnail, fulltitle=scrapedtitle, infoLabels={'filtro': filtro_list}, contentTitle=scrapedtitle, contentSerieName=scrapedtitle, extra=item.extra, context=autoplay.context)) else: item.extra = item.extra.rstrip('s/') if item.extra in url: itemlist.append( Item(channel=item.channel, contentType=tipo, action=accion, title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, fulltitle=scrapedtitle, infoLabels={'filtro': filtro_list}, contentTitle=scrapedtitle, contentSerieName=scrapedtitle, extra=item.extra, context=autoplay.context)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) # Encuentra los elementos que no tienen plot y carga las paginas correspondientes para obtenerlo# for item in itemlist: if item.infoLabels['plot'] == '': data = httptools.downloadpage(item.url).data item.fanart = scrapertools.find_single_match( data, 'meta property="og:image" content="([^"]+)" \/>') item.plot = scrapertools.find_single_match( data, '<span>Sinopsis:<\/span>.([^<]+)<span ' 'class="text-detail-hide"><\/span>.<\/p>') # Paginacion if item.title != 'Buscar' and actual != '': if itemlist != []: next_page = str(int(actual) + 1) next_page_url = host + item.extra + 'pag-' + next_page itemlist.append( Item( channel=item.channel, action="lista", title='Siguiente >>>', url=next_page_url, thumbnail='https://s16.postimg.org/9okdu7hhx/siguiente.png', extra=item.extra)) return itemlist
def episodios(item): logger.info() itemlist = [] # Descarga la página data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}| |<br>", "", data) if not item.infoLabels["tmdb_id"]: item.infoLabels["tmdb_id"] = scrapertools.find_single_match( data, '<a href="https://www.themoviedb.org/[^/]+/(\d+)') item.infoLabels["year"] = scrapertools.find_single_match( data, 'class="e_new">(\d{4})') if not item.infoLabels["genre"]: item.infoLabels["genre"] = ", ".join( scrapertools.find_multiple_matches( data, '<a itemprop="genre"[^>]+>([^<]+)</a>')) if not item.infoLabels["plot"]: item.infoLabels["plot"] = scrapertools.find_single_match( data, 'itemprop="description">([^<]+)</div>') dc = scrapertools.find_single_match(data, "var dc_ic = '\?dc=([^']+)'") patron = '<divd class="capitulo puntossuspensivos.*?c_name="([^"]+)" c_num="([^"]+)"' \ '.*?load_f_links\((\d+)\s*,\s*(\d+)' matches = scrapertools.find_multiple_matches(data, patron) lista_epis = [] for title, episodio, c_id, ficha in matches: episodio = episodio.replace("X", "x") if episodio in lista_epis: continue lista_epis.append(episodio) url = "http://playmax.mx/c_enlaces_n.php?ficha=%s&c_id=%s&dc=%s" % ( ficha, c_id, dc) title = "%s - %s" % (episodio, title) new_item = Item(channel=item.channel, action="findvideos", title=title, url=url, thumbnail=item.thumbnail, fanart=item.fanart, show=item.show, infoLabels=item.infoLabels, text_color=color2, referer=item.url, contentType="episode") try: new_item.infoLabels["season"], new_item.infoLabels[ "episode"] = episodio.split('x', 1) except: pass itemlist.append(new_item) itemlist.sort(key=lambda it: (it.infoLabels["season"], it.infoLabels["episode"]), reverse=True) if __modo_grafico__: tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) library_path = config.get_library_path() if config.get_library_support() and not item.extra: title = "Añadir serie a la biblioteca" if item.infoLabels["imdb_id"] and not library_path.lower().startswith( "smb://"): try: from core import filetools path = filetools.join(library_path, "SERIES") files = filetools.walk(path) for dirpath, dirname, filename in files: if item.infoLabels["imdb_id"] in dirpath: for f in filename: if f != "tvshow.nfo": continue from core import library head_nfo, it = library.read_nfo( filetools.join(dirpath, dirname, f)) canales = it.library_urls.keys() canales.sort() if "playmax" in canales: canales.pop(canales.index("playmax")) canales.insert(0, "[COLOR red]playmax[/COLOR]") title = "Serie ya en tu biblioteca. [%s] ¿Añadir?" % ",".join( canales) break except: import traceback logger.info(traceback.format_exc()) pass itemlist.append( item.clone(action="add_serie_to_library", title=title, text_color=color5, extra="episodios###library")) if itemlist and not __menu_info__: ficha = scrapertools.find_single_match(item.url, '-f(\d+)-') itemlist.extend(acciones_fichas(item, sid, ficha)) return itemlist
def list_all(item): logger.info() itemlist = [] next = False data = get_source(item.url) patron = 'spotlight_container>.*?image lazy data-original=(.*?)>.*?<div class=spotlight_title>(.*?)<' patron += '(.*?) sres>(\d{4})<.*?playLink href=(.*?)>' matches = re.compile(patron, re.DOTALL).findall(data) first = item.first last = first+19 if last > len(matches): last = len(matches) next = True for scrapedthumbnail, scrapedtitle, type_data, year, scrapedurl in matches[first:last]: url = scrapedurl title = scrapedtitle season = scrapertools.find_single_match(type_data, 'class=title-season>Temporada<.*?> (\d+) <') episode = scrapertools.find_single_match(type_data, 'class=title-season>Episodio<.*?> (\d+) <') if season != '' or episode != '': item.type = 'tvshow' else: item.type = 'movie' new_item = Item(channel=item.channel, title=title, url=url, thumbnail=scrapedthumbnail, type=item.type, infoLabels={'year': year}) if item.type == 'tvshow': new_item.action = 'episodios' new_item.contentSerieName = scrapedtitle season = season.strip() episode = episode.strip() if season == '': if 'Anime' in item.title: season = 1 else: season = scrapertools.find_single_match(url, '.*?temp-(\d+)') new_item.contentSeasonNumber = season else: new_item.contentSeasonNumber = season if episode != '': new_item.contentEpisodeNumber = episode if season != '' and episode != '': new_item.title = '%s %sx%s' % (new_item.title, season, episode) elif episode == '': new_item.title = '%s Temporada %s' % (new_item.title, season) else: new_item.action = 'findvideos' new_item.contentTitle = scrapedtitle itemlist.append(new_item) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) # Paginación if not next: url_next_page = item.url first = last else: url_next_page = scrapertools.find_single_match(data, "<a href=([^ ]+) class=page-link aria-label=Next>") first = 0 if url_next_page: itemlist.append(item.clone(title="Siguiente >>", url=url_next_page, action='list_all', first=first)) return itemlist
def lista(item): logger.info() itemlist = [] duplicado = [] max_items = 24 next_page_url = '' data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t| |<br>", "", data) data = scrapertools.decodeHtmlentities(data) patron = '"box_image_b.*?"><a href="([^"]+)" title=".*?><img src="([^"]+)" alt="(.*?)(\d{4}).*?"' matches = re.compile(patron, re.DOTALL).findall(data) if item.next_page != 'b': if len(matches) > max_items: next_page_url = item.url matches = matches[:max_items] next_page = 'b' else: matches = matches[max_items:] next_page = 'a' patron_next_page = '<a class="page dark gradient" href="([^"]+)">PROXIMO' matches_next_page = re.compile(patron_next_page, re.DOTALL).findall(data) if len(matches_next_page) > 0: next_page_url = urlparse.urljoin(item.url, matches_next_page[0]) for scrapedurl, scrapedthumbnail, scrapedtitle, scrapedyear in matches: url = scrapedurl thumbnail = scrapedthumbnail contentTitle = re.sub(r"\(.*?\)|\/.*?|\(|\)|.*?\/|!", "", scrapedtitle) title = scrapertools.decodeHtmlentities( contentTitle) + '(' + scrapedyear + ')' fanart = '' plot = '' if url not in duplicado: itemlist.append( Item(channel=item.channel, action='findvideos', title=title, url=url, thumbnail=thumbnail, plot=plot, fanart=fanart, contentTitle=contentTitle, infoLabels={'year': scrapedyear})) duplicado.append(url) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) if next_page_url != '': itemlist.append( Item(channel=item.channel, action="lista", title='Siguiente >>>', url=next_page_url, thumbnail='https://s16.postimg.org/9okdu7hhx/siguiente.png', extra=item.extra, next_page=next_page)) return itemlist
def episodios(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}| |<Br>|<BR>|<br>|<br/>|<br />|-\s", "", data) data = re.sub(r"<!--.*?-->", "", data) data = re.sub(r"a> <img src=/assets/img/banderas/", "a><idioma>", data) data = re.sub(r"<img src=/assets/img/banderas/", "|", data) data = re.sub(r"\.png border='\d+' height='\d+' width='\d+'[^>]+>\s+<", "</idioma><", data) data = re.sub(r"\.png border='\d+' height='\d+' width='\d+'[^>]+>", "", data) patron = '<div id="T1".*?' patron += "<img src='([^']+)'" matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) > 0: thumbnail = matches[0] else: thumbnail = item.thumbnail patron = "<a href='([^']+)'>(.*?)</a><idioma>(.*?)</idioma>" matches = re.compile(patron, re.DOTALL).findall(data) infoLabels = item.infoLabels for scrapedurl, scrapedtitle, scrapedidioma in matches: idioma = "" filter_langs = [] for i in scrapedidioma.split("|"): idioma += " [" + IDIOMAS.get(i, "OVOS") + "]" filter_langs.append(IDIOMAS.get(i, "OVOS")) season_episode = scrapertools.get_season_and_episode(scrapedtitle) title = '%s %s %s' % (season_episode, scrapedtitle, idioma) season_episode = season_episode.split('x') infoLabels['season'] = season_episode[0] infoLabels['episode'] = season_episode[1] itemlist.append( Item(channel=item.channel, title=title, url=urlparse.urljoin(HOST, scrapedurl), action="findvideos", show=item.show, thumbnail=thumbnail, plot="", language=filter_langs, infoLabels=infoLabels)) itemlist = filtertools.get_links(itemlist, item, list_idiomas, list_quality) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) # Opción "Añadir esta serie a la videoteca de XBMC" if config.get_videolibrary_support() and len(itemlist) > 0: itemlist.append( item.clone(title="Añadir esta serie a la videoteca", action="add_serie_to_library", extra="episodios")) return itemlist
def wrapper(*args): function = func.__name__ itemlist = [] args = func(*args) item = args['item'] action = args['action'] if 'action' in args else 'findvideos' anime = args['anime'] if 'anime' in args else '' addVideolibrary = args[ 'addVideolibrary'] if 'addVideolibrary' in args else True search = args['search'] if 'search' in args else '' blacklist = args['blacklist'] if 'blacklist' in args else [] data = args['data'] if 'data' in args else '' patron = args['patron'] if 'patron' in args else args[ 'patronMenu'] if 'patronMenu' in args else '' if 'headers' in args: headers = args['headers'] elif 'headers' in func.__globals__: headers = func.__globals__['headers'] else: headers = '' patronNext = args['patronNext'] if 'patronNext' in args else '' patronBlock = args['patronBlock'] if 'patronBlock' in args else '' typeActionDict = args[ 'type_action_dict'] if 'type_action_dict' in args else {} typeContentDict = args[ 'type_content_dict'] if 'type_content_dict' in args else {} debug = args['debug'] if 'debug' in args else False if 'pagination' in args: pagination = args['pagination'] if args['pagination'] else 20 else: pagination = '' lang = args['deflang'] if 'deflang' in args else '' pag = item.page if item.page else 1 # pagination matches = [] log('PATRON= ', patron) if not data: data = httptools.downloadpage( item.url, headers=headers, ignore_response_code=True).data.replace("'", '"') data = re.sub('\n|\t', ' ', data) data = re.sub(r'>\s+<', '> <', data) # replace all ' with " and eliminate newline, so we don't need to worry about log('DATA =', data) if patronBlock: blocks = scrapertoolsV2.find_multiple_matches_groups( data, patronBlock) block = "" for bl in blocks: blockItemlist, blockMatches = scrapeBlock( item, args, bl['block'], patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang) for it in blockItemlist: if 'lang' in bl: it.contentLanguage, it.title = scrapeLang( bl, it.contentLanguage, it.title) if 'quality' in bl and bl['quality']: it.quality = bl['quality'].strip() it.title = it.title + typo(bl['quality'].strip(), '_ [] color kod') log('BLOCK ', '=', block) itemlist.extend(blockItemlist) matches.extend(blockMatches) elif patron: itemlist, matches = scrapeBlock(item, args, data, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang) checkHost(item, itemlist) if 'itemlistHook' in args: itemlist = args['itemlistHook'](itemlist) if patronNext: nextPage(itemlist, item, data, patronNext, 2) # next page for pagination if pagination and len(matches) >= pag * pagination: itemlist.append( Item(channel=item.channel, action=item.action, contentType=item.contentType, title=typo(config.get_localized_string(30992), 'color kod bold'), url=item.url, args=item.args, page=pag + 1, thumbnail=thumb())) if action != 'play' and function != 'episodios' and 'patronMenu' not in args: tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) if anime: from specials import autorenumber if function == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold') else: autorenumber.renumber(itemlist) if addVideolibrary and (item.infoLabels["title"] or item.fulltitle): # item.fulltitle = item.infoLabels["title"] videolibrary(itemlist, item, function=function) if config.get_setting('downloadenabled') and ( function == 'episodios' or function == 'finvideos'): download(itemlist, item, function=function) if 'patronMenu' in args: itemlist = thumb(itemlist, genre=True) if 'fullItemlistHook' in args: itemlist = args['fullItemlistHook'](itemlist) itemlist = filterLang(item, itemlist) return itemlist
def list_all(item): logger.info() itemlist = [] if item.type in ['serie', 'recents']: contentType = 'serie' action = 'seasons' else: contentType = 'pelicula' action = 'findvideos' if item.type not in ['normal', 'seccion', 'serie']: post = { 'page': item.page, 'type': item.type, 'slug': item.slug, 'id': item.id } post = urllib.urlencode(post) data = httptools.downloadpage(item.url, post=post, headers=CHANNEL_HEADERS).data data = re.sub(r'"|\n|\r|\t| |<br>|\s{2,}', "", data) patron = '<a href=(.*?)><figure><img.*?src=(.*?) alt=.*?<p>(.*?)<\/p><span>(\d{4})<\/span>' else: data = get_source(item.url) patron = 'item-%s><a href=(.*?)><figure><img.*?data-src=(.*?) alt=.*?<p>(.*?)<\/p><span>(\d{4})</span>' % contentType matches = scrapertools.find_multiple_matches(data, patron) for scrapedurl, scrapedthumbnail, scrapedtitle, scrapedyear in matches: url = host + scrapedurl + 'p001/' thumbnail = scrapedthumbnail contentTitle = scrapedtitle title = contentTitle year = scrapedyear new_item = item.clone(action=action, title=title, url=url, thumbnail=thumbnail, plot="", fanart="", infoLabels={'year': year}) if contentType == 'serie': new_item.contentSerieName = title else: new_item.contentTitle = title itemlist.append(new_item) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) #Paginacion next_page_valid = scrapertools.find_single_match( data, '<div class=butmore(?: site=series|) page=(.*?) id=(.*?) ' 'type=(.*?) limit=.*?>') if item.type != 'normal' and (len(itemlist) > 19 or next_page_valid): type = item.type if item.type == 'serie': type = 'recents' if next_page_valid: page = str(int(next_page_valid[0]) + 1) if item.type != 'recents': id = next_page_valid[1] type = next_page_valid[2] else: id = '' else: if not item.page: item.page = "1" page = str(int(item.page) + 1) id = item.id if type == 'recents': type_pagination = '/series/pagination/' else: type_pagination = '/pagination/' url = host + type_pagination itemlist.append( item.clone(action="list_all", title='Siguiente >>>', page=page, url=url, id=id, type=type)) return itemlist
def recientes(item): logger.info() item.contentType = "tvshow" itemlist = [] data = httptools.downloadpage(item.url).data bloque = scrapertools.find_single_match(data, '<ul class="emision"(.*?)</ul>') patron = '<li><a href="([^"]+)" title="([^"]+)".*?src="([^"]+)"' matches = scrapertools.find_multiple_matches(bloque, patron) for url, title, thumb in matches: url = host + url try: contentTitle = re.split( r"(?i) \d+ (?:Sub Español|Audio Español|Español Latino)", title)[0] except: contentTitle = "" contentTitle = re.sub( r"(?i) Ova| Especiales| \(Pelicula[s]*\)| \(Película[s]*\)| Sub| Español| Peliculas| Audio| Latino", "", contentTitle) tipo = "tvshow" show = contentTitle action = "episodios" context = renumbertools.context(item) if item.extra == "recientes": action = "findvideos" context = "" if not item.extra and (url.endswith("-pelicula/") or url.endswith("-pelicula")): tipo = "movie" show = "" action = "peliculas" if not thumb.startswith("http"): thumb = "http:%s" % thumb action = "findvideos" infoLabels = {'filtro': {"original_language": "ja"}.items()} itemlist.append( item.clone(action=action, title=title, url=url, thumbnail=thumb, text_color=color3, contentTitle=contentTitle, contentSerieName=show, infoLabels=infoLabels, thumb_=thumb, contentType=tipo, context=context)) try: from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) if item.extra and itemlist: for it in itemlist: it.thumbnail = it.thumb_ except: pass return itemlist
def pelis(item): logger.info() itemlist = [] httptools.downloadpage("https://kproxy.com/") url = "https://kproxy.com/doproxy.jsp" post = "page=%s&x=34&y=14" % urllib.quote(item.url) response = httptools.downloadpage(url, post, follow_redirects=False).data url = scrapertools.find_single_match( response, '<meta http-equiv="refresh".*?url=([^"]+)"') data = httptools.downloadpage(url).data lista = jsontools.load(data) if item.extra == "next": lista_ = lista[25:] else: lista_ = lista[:25] for i in lista_: punt = i.get("puntuacio", "") valoracion = "" if punt and not 0: valoracion = " (Val: %s)" % punt if i.get("magnets", {}).get("M1080", {}).get("magnet", ""): url = i.get("magnets", {}).get("M1080", {}).get("magnet", "") calidad = "%s" % i.get("magnets", {}).get("M1080", {}).get( "quality", "") else: url = i.get("magnets", {}).get("M720", {}).get("magnet", "") calidad = "%s" % (i.get("magnets", {}).get("M720", {}).get( "quality", "")) if not url: continue title = "%s %s%s" % (i.get("nom", ""), valoracion, calidad) thumbnail = "" fanart = "" if i.get("posterurl", ""): thumbnail = "http://image.tmdb.org/t/p/w342%s" % i.get( "posterurl", "") if i.get("backurl", ""): fanart = "http://image.tmdb.org/t/p/w1280%s" % i.get("backurl", "") plot = i.get("info", "") if plot is None: plot = "" infoLabels = { 'plot': plot, 'year': i.get("year"), 'tmdb_id': i.get("id") } itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=url, server="torrent", contentType="movie", thumbnail=thumbnail, fanart=fanart, infoLabels=infoLabels, contentTitle=i.get("nom"), quality=calidad)) from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) if len(lista_) == 25 and item.extra == "next": url = re.sub( r'page=(\d+)', r'page=' + str(int(re.search('\d+', item.url).group()) + 1), item.url) itemlist.append( Item(channel=item.channel, action="pelis", title=">> Página siguiente", url=url)) elif len(lista_) == 25: itemlist.append( Item(channel=item.channel, action="pelis", title=">> Página siguiente", url=item.url, extra="next")) return itemlist
def scraper(item): logger.info("pelisalacarta.alltorrent scraper") itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) patron = scrapertools.find_multiple_matches( data, '<div class="browse-movie-wrap col-xs-10 col-sm-4 col-md-5 col-lg-4"><a href="([^"]+)".*?src="([^"]+)".*?alt="([^"]+)".*?rel="tag">([^"]+)</a> ' ) for url, thumb, title, year in patron: title = re.sub(r"\(\d+\)", "", title) title = ''.join( (c for c in unicodedata.normalize('NFD', unicode(title.decode('utf-8'))) if unicodedata.category(c) != 'Mn')).encode("ascii", "ignore") titulo = "[COLOR lime]" + title + "[/COLOR]" title = re.sub(r"!|\/.*", "", title).strip() new_item = item.clone(action="findvideos", title=titulo, url=url, thumbnail=thumb, fulltitle=title, contentTitle=title, contentType="movie", library=True) new_item.infoLabels['year'] = year itemlist.append(new_item) ## Paginación next = scrapertools.find_single_match( data, '<li><a href="([^"]+)" rel="nofollow">Next Page') if len(next) > 0: url = next itemlist.append( item.clone(title="[COLOR olivedrab][B]Siguiente >>[/B][/COLOR]", action="scraper", url=url, thumbnail="http://imgur.com/TExhOJE.png")) try: from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) for item in itemlist: if not "Siguiente >>" in item.title: if "0." in str(item.infoLabels['rating']): item.infoLabels[ 'rating'] = "[COLOR olive]Sin puntuacíon[/COLOR]" else: item.infoLabels['rating'] = "[COLOR yellow]" + str( item.infoLabels['rating']) + "[/COLOR]" item.title = item.title + " " + str(item.infoLabels['rating']) except: pass for item_tmdb in itemlist: logger.info(str(item_tmdb.infoLabels['tmdb_id'])) return itemlist
def findvideos(item): logger.info() itemlist = [] item.text_color = color3 data = scrapertools.downloadpage(item.url) data = scrapertools.decodeHtmlentities(data) # Busca en la seccion descarga/torrent data_download = scrapertools.find_single_match( data, '<th>Episodio - Enlaces de Descarga</th>(.*?)</table>') patron = '<p class="item_name".*?<a href="([^"]+)".*?>([^"]+)</a>' matches = scrapertools.find_multiple_matches(data_download, patron) for scrapedurl, scrapedepi in matches: new_item = item.clone() if "Episodio" not in scrapedepi: scrapedtitle = "[Torrent] Episodio " + scrapedepi else: scrapedtitle = "[Torrent] " + scrapedepi scrapedtitle = scrapertools.htmlclean(scrapedtitle) new_item.infoLabels['episode'] = scrapertools.find_single_match( scrapedtitle, "Episodio (\d+)") logger.debug("title=[" + scrapedtitle + "], url=[" + scrapedurl + "]") itemlist.append( new_item.clone(action="play", title=scrapedtitle, url=scrapedurl, server="torrent", contentType="episode")) # Busca en la seccion online data_online = scrapertools.find_single_match( data, "<th>Enlaces de Visionado Online</th>(.*?)</table>") patron = '<a href="([^"]+)\\n.*?src="([^"]+)".*?' \ 'title="Enlace de Visionado Online">([^"]+)</a>' matches = scrapertools.find_multiple_matches(data_online, patron) for scrapedurl, scrapedthumb, scrapedtitle in matches: # Deshecha enlaces de trailers scrapedtitle = scrapertools.htmlclean(scrapedtitle) if (scrapedthumb != "images/series/youtube.png") & (scrapedtitle != "Trailer"): new_item = item.clone() server = scrapertools.find_single_match(scrapedthumb, "images/series/(.*?).png") title = "[" + server.capitalize() + "]" + " " + scrapedtitle new_item.infoLabels['episode'] = scrapertools.find_single_match( scrapedtitle, "Episodio (\d+)") itemlist.append( new_item.clone(action="play", title=title, url=scrapedurl, contentType="episode")) # Comprueba si hay otras temporadas if not "No hay disponible ninguna Temporada adicional" in data: data_temp = scrapertools.find_single_match( data, '<div class="panel panel-success">(.*?)</table>') data_temp = re.sub(r"\n|\r|\t|\s{2}| ", "", data_temp) patron = '<tr><td><p class="item_name"><a href="([^"]+)".*?' \ '<p class="text-success"><strong>([^"]+)</strong>' matches = scrapertools.find_multiple_matches(data_temp, patron) for scrapedurl, scrapedtitle in matches: new_item = item.clone() url = urlparse.urljoin(URL_BASE, scrapedurl) scrapedtitle = scrapedtitle.capitalize() temporada = scrapertools.find_single_match(scrapedtitle, "Temporada (\d+)") if temporada != "": new_item.infoLabels['season'] = temporada new_item.infoLabels['episode'] = "" itemlist.append( new_item.clone(action="findvideos", title=scrapedtitle, url=url, text_color="red", contentType="season")) try: from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) except: pass new_item = item.clone() if config.is_xbmc(): new_item.contextual = True itemlist.append( new_item.clone(channel="trailertools", title="Buscar Tráiler", action="buscartrailer", context="", text_color="magenta")) return itemlist
def lista_anime(item): logger.info("[animeworld.py] lista_anime") itemlist = [] # Carica la pagina data = httptools.downloadpage(item.url).data data = re.sub(r'\n|\t', '', data) data = re.sub(r'>\s*<', '><', data) # Estrae i contenuti patron = r'<div class="item"><a href="([^"]+)".*?src="([^"]+)".*?data-jtitle="([^"]+)".*?>([^<]+)<\/a><p>(.*?)<\/p>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedthumb, scrapedoriginal, scrapedtitle, scrapedplot in matches: if scrapedoriginal == scrapedtitle: scrapedoriginal = '' else: scrapedoriginal = ' - [ ' + scrapedoriginal + ' ]' year = '' lang = '' if '(' in scrapedtitle: year = scrapertoolsV2.find_single_match(scrapedtitle, r'(\([0-9]+\))') lang = scrapertoolsV2.find_single_match(scrapedtitle, r'(\([a-zA-Z]+\))') title = scrapedtitle.replace(year, '').replace(lang, '') original = scrapedoriginal.replace(year, '').replace(lang, '') title = '[B]' + title + '[/B]' + year + lang + original itemlist.append( Item(channel=item.channel, extra=item.extra, contentType="tvshow", action="episodios", text_color="azure", title=title, url=scrapedurl, thumbnail=scrapedthumb, fulltitle=title, show=title, plot=scrapedplot, folder=True)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) autorenumber.renumber(itemlist) # Next page next_page = scrapertoolsV2.find_single_match( data, '<a class="page-link" href="([^"]+)" rel="next"') if next_page != '': itemlist.append( Item(channel=item.channel, action='lista_anime', title='[B]' + config.get_localized_string(30992) + ' >[/B]', url=next_page, contentType=item.contentType, thumbnail=thumb())) return itemlist
def entradas(item): logger.info() itemlist = [] item.text_color = color2 # Descarga la página data = httptools.downloadpage(item.url).data data = re.sub("\n", "", data) if "valores" in item and item.valores: itemlist.append( item.clone(action="", title=item.valores, text_color=color4)) # IF en caso de busqueda if item.extra == "Buscar": # Extrae las entradas entradas = scrapertools.find_multiple_matches( data, '<div class="col-mt-5 postsh">(.*?)</div></div></div>') patron = '<div class="poster-media-card([^"]+)">.*?<a href="([^"]+)" title="([^"]+)">' \ '.*?<img.*?src="([^"]+)"' for match in entradas: matches = scrapertools.find_multiple_matches(match, patron) for calidad, scrapedurl, scrapedtitle, scrapedthumbnail in matches: thumbnail = scrapedthumbnail.replace("w185", "original") title = scrapedtitle calidad = calidad.strip() itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=scrapedurl, thumbnail=thumbnail, contentTitle=scrapedtitle, contentTitle=scrapedtitle, context=["buscar_trailer"], contentType="movie")) else: # Extrae las entradas if item.extra == "Novedades": data2 = data.split(">Últimas Películas Agregadas y Actualizadas<", 1)[1] entradas = scrapertools.find_multiple_matches( data2, '<div class="col-mt-5 postsh">(.*?)</div></div></div>') else: entradas = scrapertools.find_multiple_matches( data, '<div class="col-mt-5 postsh">(.*?)</div></div></div>') patron = '<div class="poster-media-card([^"]+)">.*?<a href="([^"]+)" title="([^"]+)">' \ '.*?<div class="idiomes"><div class="(.*?)">.*?' \ '<img.*?src="([^"]+)".*?<span class="under-title">(.*?)</span>' for match in entradas: matches = scrapertools.find_multiple_matches(match, patron) for calidad, url, scrapedtitle, idioma, scrapedthumbnail, category in matches: # Salto entradas adultos if category == "Eroticas +18": continue idioma = idioma.strip() if idioma in IDIOMAS: idioma = IDIOMAS[idioma] else: idioma = IDIOMAS['Subtitulado'] calidad = calidad.strip() scrapedtitle = scrapedtitle.replace("Ver Pelicula ", "") title = scrapedtitle if idioma: title += " [" + idioma + "]" if calidad: title += " [" + calidad + "]" if 'class="proximamente"' in match: title += " [Próximamente]" thumbnail = scrapedthumbnail.replace("w185", "original") filtro_thumb = scrapedthumbnail.replace( "https://image.tmdb.org/t/p/w185", "") filtro_list = {"poster_path": filtro_thumb} filtro_list = filtro_list.items() itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=url, contentTitle=scrapedtitle, contentTitle=scrapedtitle, thumbnail=thumbnail, context=["buscar_trailer"], contentType="movie", infoLabels={'filtro': filtro_list})) tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) # Extrae la marca de la siguiente página next_page = scrapertools.find_single_match( data, '<span class="current">.*?<\/span><a href="([^"]+)"') if next_page: if item.extra == "Buscar": next_page = next_page.replace('&', '&') itemlist.append( Item(channel=item.channel, action="entradas", title="Siguiente", url=next_page, text_color=color3)) return itemlist
def video(item): logger.info("[animeworld.py] video") itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r'\n|\t', '', data) data = re.sub(r'>\s*<', '><', data) patron = r'<a href="([^"]+)" class="poster.*?><img src="([^"]+)"(.*?)data-jtitle="([^"]+)" .*?>(.*?)<\/a>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedthumb, scrapedinfo, scrapedoriginal, scrapedtitle in matches: # Cerca Info come anno o lingua nel Titolo year = '' lang = '' if '(' in scrapedtitle: year = scrapertoolsV2.find_single_match(scrapedtitle, r'( \([0-9]+\))') lang = scrapertoolsV2.find_single_match(scrapedtitle, r'( \([a-zA-Z]+\))') # Rimuove Anno e Lingua nel Titolo title = scrapedtitle.replace(year, '').replace(lang, '') original = scrapedoriginal.replace(year, '').replace(lang, '') # Compara Il Titolo con quello originale if original == title: original = '' else: original = ' - [ ' + scrapedoriginal + ' ]' # cerca info supplementari ep = '' ep = scrapertoolsV2.find_single_match(scrapedinfo, '<div class="ep">(.*?)<') if ep != '': ep = ' - ' + ep ova = '' ova = scrapertoolsV2.find_single_match(scrapedinfo, '<div class="ova">(.*?)<') if ova != '': ova = ' - (' + ova + ')' ona = '' ona = scrapertoolsV2.find_single_match(scrapedinfo, '<div class="ona">(.*?)<') if ona != '': ona = ' - (' + ona + ')' movie = '' movie = scrapertoolsV2.find_single_match(scrapedinfo, '<div class="movie">(.*?)<') if movie != '': movie = ' - (' + movie + ')' special = '' special = scrapertoolsV2.find_single_match( scrapedinfo, '<div class="special">(.*?)<') if special != '': special = ' - (' + special + ')' # Concatena le informazioni info = ep + lang + year + ova + ona + movie + special # Crea il title da visualizzare long_title = '[B]' + title + '[/B]' + info + original # Controlla se sono Episodi o Film if movie == '': contentType = 'tvshow' action = 'episodios' else: contentType = 'movie' action = 'findvideos' itemlist.append( Item(channel=item.channel, contentType=contentType, action=action, title=long_title, url=scrapedurl, fulltitle=title, show=title, thumbnail=scrapedthumb, context=autoplay.context)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) autorenumber.renumber(itemlist) # Next page next_page = scrapertoolsV2.find_single_match( data, '<a class="page-link" href=".*?page=([^"]+)" rel="next"') if next_page != '': itemlist.append( Item(channel=item.channel, action='video', title='[B]' + config.get_localized_string(30992) + ' >[/B]', url=re.sub('&page=([^"]+)', '', item.url) + '&page=' + next_page, contentType=item.contentType, thumbnail=thumb())) return itemlist
def episodios(item): logger.info() itemlist = [] if item.extra == "ultimos": data = httptools.downloadpage(item.url).data item.url = scrapertools.find_single_match( data, '<a href="([^"]+)" class="h1-like media-title"') item.url += "/episodios" data = httptools.downloadpage(item.url).data action = "findvideos" patron = '<div class="ep-list-number">.*?href="([^"]+)">([^<]+)</a>.*?<span class="name">([^<]+)</span>' matches = scrapertools.find_multiple_matches(data, patron) for scrapedurl, episode, scrapedtitle in matches: new_item = item.clone(action=action, url=scrapedurl, text_color=color2, contentType="episode") new_item.contentSeason = episode.split("x")[0] new_item.contentEpisodeNumber = episode.split("x")[1] new_item.title = episode + " - " + scrapedtitle new_item.extra = "episode" if "episodios" in item.extra or item.path: new_item.extra = "episode|" itemlist.append(new_item) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) itemlist.reverse() if "episodios" not in item.extra and not item.path: id = scrapertools.find_single_match(item.url, '/(\d+)/') data_trailer = httptools.downloadpage( host + "/media/trailer?idm=%s&mediaType=1" % id, ignore_response_code=True).data try: trailer_url = jsontools.load(data_trailer)["video"]["url"] if trailer_url != "": item.infoLabels["trailer"] = trailer_url except: pass itemlist.append( item.clone(channel="trailertools", action="buscartrailer", title="Buscar Tráiler", text_color="magenta")) if config.get_videolibrary_support(): itemlist.append( Item(channel=item.channel, action="add_serie_to_library", text_color=color5, title="Añadir serie a la videoteca", show=item.show, thumbnail=item.thumbnail, url=item.url, fulltitle=item.fulltitle, fanart=item.fanart, extra="episodios###episodios", contentTitle=item.fulltitle)) return itemlist
def lista(item): logger.info() itemlist = [] data = get_source(item.url) if item.extra == 'movie': patron = "<div class=moviesbox.*?><a href=(.*?)>.*?image:url\('(.*?)'\)>.*?<b>.*?>(.*?)<" matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedthumbnail, scrapedtitle in matches: url = scrapedurl thumbnail = scrapedthumbnail contentTitle = scrapedtitle.decode('latin1').encode('utf8') title = contentTitle filtro_thumb = scrapedthumbnail.replace( "https://image.tmdb.org/t/p/w396", "") filtro_list = {"poster_path": filtro_thumb} filtro_list = filtro_list.items() itemlist.append( item.clone(action='findvideos', title=title, url=url, thumbnail=thumbnail, contentTitle=contentTitle, infoLabels={'filtro': filtro_list}, extra=item.extra)) else: patron = "<div class=moviesbox.*?>.*?episode>(.*?)x(.*?)<.*?href=(.*?)>.*?image:url\('(.*?)'.*?href.*?>(.*?)<" matches = re.compile(patron, re.DOTALL).findall(data) for season, episode, scrapedurl, scrapedthumbnail, scrapedtitle in matches: url = scrapedurl thumbnail = scrapedthumbnail contentSerieName = scrapedtitle title = '%s' % contentSerieName filtro_thumb = scrapedthumbnail.replace( "https://image.tmdb.org/t/p/w396", "") filtro_list = {"poster_path": filtro_thumb} filtro_list = filtro_list.items() contentSeason = season contentEpisode = episode itemlist.append( item.clone(action='seasons', title=title, url=url, thumbnail=thumbnail, contentSerieName=contentSerieName, contentSeason=contentSeason, contentEpisode=contentEpisode, infoLabels={'filtro': filtro_list}, extra=item.extra)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) #Paginacion if itemlist != []: actual_page_url = item.url next_page = scrapertools.find_single_match( data, '<span class=pagination_next><a href=(.*?)>') import inspect if next_page != '': itemlist.append( item.clone( action="lista", title='Siguiente >>>', url=next_page, thumbnail='https://s32.postimg.cc/4zppxf5j9/siguiente.png') ) return itemlist
def lista_anime(item, nextpage=True, show_lang=True): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data blocco = scrapertools.find_single_match( data, r'<div class="post-list group">(.*?)</nav><!--/.pagination-->') # patron = r'<a href="([^"]+)" title="([^"]+)">\s*<img[^s]+src="([^"]+)"[^>]+>' # Patron con thumbnail, Kodi non scarica le immagini dal sito patron = r'<a href="([^"]+)" title="([^"]+)">' matches = re.compile(patron, re.DOTALL).findall(blocco) for scrapedurl, scrapedtitle in matches: scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) scrapedtitle = re.sub(r'\s+', ' ', scrapedtitle) # Pulizia titolo scrapedtitle = scrapedtitle.replace("Streaming", "").replace("&", "") scrapedtitle = scrapedtitle.replace("Download", "") lang = scrapertools.find_single_match( scrapedtitle, r"([Ss][Uu][Bb]\s*[Ii][Tt][Aa])") scrapedtitle = scrapedtitle.replace("Sub Ita", "").strip() eptype = scrapertools.find_single_match(scrapedtitle, "((?:Episodio?|OAV))") cleantitle = re.sub(r'%s\s*\d*\s*(?:\(\d+\)|)' % eptype, '', scrapedtitle) cleantitle = cleantitle.replace(lang, "").strip() itemlist.append( Item(channel=item.channel, action="episodi", contentType="tvshow" if 'oav' not in scrapedtitle.lower() else "movie", title=color( scrapedtitle.replace( lang, "(%s)" % support.color(lang, "red") if show_lang else "").strip(), 'azure'), fulltitle=cleantitle, url=scrapedurl, show=cleantitle, folder=True)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) if nextpage: patronvideos = r'<link rel="next" href="([^"]+)"\s*/>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: scrapedurl = matches[0] itemlist.append( Item( channel=item.channel, action="lista_anime", title="[COLOR lightgreen]" + config.get_localized_string(30992) + "[/COLOR]", url=scrapedurl, thumbnail= "http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png", folder=True)) return itemlist
def lista(item): logger.info() itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r'"|\n|\r|\t| |<br>', "", data) patron = "peli><a href=([^ ]+) title=(.*?)><img src=([^ ]+) alt=.*?><div class=([^>]+)>.*?<p>.*?<\/p>.*?flags ([" \ "^']+)'" matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedcalidad, scrapedidioma in matches: year = scrapertools.find_single_match(scrapedtitle, '.*?\((\d{4})\)') scrapedtitle = scrapertools.find_single_match(scrapedtitle, '(.*?)\(\.*?') url = scrapedurl thumbnail = scrapedthumbnail scrapedcalidad = scrapedcalidad.replace("'", "") scrapedcalidad = scrapedcalidad.lower() if scrapedcalidad in tcalidad: scrapedcalidad = tcalidad[scrapedcalidad] else: scrapedcalidad = '[COLOR orange]MULTI[/COLOR]' if scrapedidioma in taudio: scrapedidioma = taudio[scrapedidioma] else: scrapedidioma = '[COLOR orange]MULTI[/COLOR]' title = scrapedtitle + ' | ' + scrapedcalidad + ' | ' + scrapedidioma + ' | ' fanart = '' plot = '' itemlist.append( Item(channel=item.channel, action='findvideos', title=title, url=url, thumbnail=thumbnail, plot=plot, fanart=fanart, contentTitle=scrapedtitle, extra=item.extra, infoLabels={'year': year}, list_language=list_language, context=autoplay.context )) # #Paginacion tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) itemlist = fail_tmdb(itemlist) if itemlist != []: actual_page_url = item.url next_page = scrapertools.find_single_match(data, "class=previouspostslink' href='([^']+)'>Siguiente ›<\/a>") if next_page != '': itemlist.append( Item(channel=item.channel, action="lista", title='Siguiente >>>', url=next_page, thumbnail='https://s16.postimg.org/9okdu7hhx/siguiente.png', extra=item.extra )) return itemlist
def series(item): logger.info() itemlist = [] response = httptools.downloadpage("https://kproxy.com/") url = "https://kproxy.com/doproxy.jsp" post = "page=%s&x=34&y=14" % urllib.quote(item.url) response = httptools.downloadpage(url, post, follow_redirects=False).data url = scrapertools.find_single_match( response, '<meta http-equiv="refresh".*?url=([^"]+)"') data = httptools.downloadpage(url).data lista = jsontools.load_json(data) if item.extra == "next": lista_ = lista[25:] else: lista_ = lista[:25] for i in lista_: punt = i.get("puntuacio", "") valoracion = "" if punt and not 0: valoracion = " (Val: {punt})".format(punt=punt) title = "{nombre}{val}".format(nombre=i.get("nom", ""), val=valoracion) url = "{url}?id={id}".format(url=api_temp, id=i.get("id", "")) thumbnail = "" fanart = "" if i.get("posterurl", ""): thumbnail = "http://image.tmdb.org/t/p/w342{file}".format( file=i.get("posterurl", "")) if i.get("backurl", ""): fanart = "http://image.tmdb.org/t/p/w1280{file}".format( file=i.get("backurl", "")) plot = i.get("info", "") if plot is None: plot = "" infoLabels = { 'plot': plot, 'year': i.get("year"), 'tmdb_id': i.get("id"), 'mediatype': 'tvshow' } itemlist.append( Item(channel=item.channel, action="episodios", title=title, url=url, server="torrent", thumbnail=thumbnail, fanart=fanart, infoLabels=infoLabels, contentTitle=i.get("nom"), show=i.get("nom"))) from core import tmdb tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) if len(lista_) == 25 and item.extra == "next": url = re.sub( r'page=(\d+)', r'page=' + str(int(re.search('\d+', item.url).group()) + 1), item.url) itemlist.append( Item(channel=item.channel, action="series", title=">> Página siguiente", url=url)) elif len(lista_) == 25: itemlist.append( Item(channel=item.channel, action="series", title=">> Página siguiente", url=item.url, extra="next")) return itemlist
def episodios(item): logger.info() itemlist = [] data = get_source(item.url) try: id, type = scrapertools.find_single_match( data, 'data-id=(\d+) data-type=(.*?) ') post = { 'set': 'LoadOptionsEpisode', 'action': 'EpisodeList', 'id': id, 'type': '1' } data = get_data(post) patron = 'data-episode="(\d+)".*?title="(.*?)"' matches = re.compile(patron, re.DOTALL).findall(data) infoLabels = item.infoLabels for episode, title in matches: post = { 'set': 'LoadOptionsEpisode', 'action': 'Step1', 'id': id, 'type': '1', 'episode': episode } season = scrapertools.find_single_match(item.url, '.*?temp-(\d+)') if season == '': season = 1 infoLabels['season'] = season infoLabels['episode'] = episode if title[0].isdigit(): title = '%sx%s' % (season, title) else: title = '%sx%s - %s' % (season, episode, title) itemlist.append( Item(channel=item.channel, title=title, contentSeasonNumber=season, contentEpisodeNumber=episode, action='findvideos', post=post, type=item.type, infoLabels=infoLabels)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) if config.get_videolibrary_support() and len(itemlist) > 0: itemlist.append( Item( channel=item.channel, title= '[COLOR yellow]Añadir esta serie a la videoteca[/COLOR]', url=item.url, action="add_serie_to_library", extra="episodios", contentSerieName=item.contentSerieName, )) except: pass return itemlist
def list_all(item): logger.info() itemlist = list() next = True if not item.url.startswith(host): item.url = host + item.url if item.post: soup = BeautifulSoup(get_source(item.url, post=item.post), "html5lib", from_encoding="utf-8") else: soup = create_soup(item.url, referer=host) matches = soup.find_all("div", class_="span-6 inner-6 tt view") first = item.first last = first + 20 if last > len(matches): last = len(matches) next = False for elem in matches[first:last]: lang = list() url = elem.a["href"] title = elem.find("a", class_="link")["title"] title = re.sub(r"\..*", "", title) thumb = elem.img["src"] new_item = Item(channel=item.channel, title=title, url=host + url, thumbnail=thumb, infoLabels={}) if '/show/' in url: new_item.contentSerieName = title new_item.action = 'seasons' new_item.context = filtertools.context(item, list_language, list_quality) else: lang_data = elem.find("div", class_="left").find_all("img") for l in lang_data: if l["src"]: lang.append( IDIOMAS.get( lang_from_flag(l["src"], "/static/style/images/", "png"), '')) new_item.language = lang new_item.contentTitle = title new_item.infoLabels["year"] = "-" new_item.action = 'findvideos' itemlist.append(new_item) tmdb.set_infoLabels_itemlist(itemlist, True) if next: url_next_page = item.url first = last else: try: url_next_page = host + soup.find("ul", id="filter").find( "a", class_="current").next_sibling["href"] except: url_next_page = False pass first = 0 if url_next_page and len(matches) > 20: itemlist.append( Item(channel=item.channel, title="Siguiente >>", url=url_next_page, action='list_all', first=first)) return itemlist
def scrape(item, patron='', listGroups=[], headers="", blacklist="", data="", patron_block="", patronNext="", action="findvideos", url_host="", addVideolibrary=True): # patron: the patron to use for scraping page, all capturing group must match with listGroups # listGroups: a list containing the scraping info obtained by your patron, in order # accepted values are: url, title, thumb, quality, year, plot, duration, genre, rating # header: values to pass to request header # blacklist: titles that you want to exclude(service articles for example) # data: if you want to pass data manually, maybe because you need some custom replacement # patron_block: patron to get parts of the page (to scrape with patron attribute), # if you need a "block inside another block" you can create a list, please note that all matches # will be packed as string # patronNext: patron for scraping next page link # action: if you want results perform an action different from "findvideos", useful when scraping film by genres # url_host: string to prepend to scrapedurl, useful when url don't contain host # example usage: # import support # itemlist = [] # patron = 'blablabla' # headers = [['Referer', host]] # blacklist = 'Request a TV serie!' # return support.scrape(item, itemlist, patron, ['thumb', 'quality', 'url', 'title', 'year', 'plot'], # headers=headers, blacklist=blacklist) itemlist = [] if not data: data = httptools.downloadpage(item.url, headers=headers).data.replace("'", '"') data = re.sub('\n|\t', ' ', data) # replace all ' with " and eliminate newline, so we don't need to worry about log('DATA =', data) block = data if patron_block: if type(patron_block) == str: patron_block = [patron_block] for n, regex in enumerate(patron_block): blocks = scrapertoolsV2.find_multiple_matches(block, regex) block = "" for b in blocks: block += "\n" + b log('BLOCK ', n, '=', block) else: block = data if patron and listGroups: matches = scrapertoolsV2.find_multiple_matches(block, patron) log('MATCHES =', matches) for match in matches: if len(listGroups) > len(match): # to fix a bug match = list(match) match.extend([''] * (len(listGroups) - len(match))) scrapedurl = url_host + match[listGroups.index( 'url')] if 'url' in listGroups else '' scrapedtitle = match[listGroups.index( 'title')] if 'title' in listGroups else '' scrapedthumb = match[listGroups.index( 'thumb')] if 'thumb' in listGroups else '' scrapedquality = match[listGroups.index( 'quality')] if 'quality' in listGroups else '' scrapedyear = match[listGroups.index( 'year')] if 'year' in listGroups else '' scrapedplot = match[listGroups.index( 'plot')] if 'plot' in listGroups else '' scrapedduration = match[listGroups.index( 'duration')] if 'duration' in listGroups else '' scrapedgenre = match[listGroups.index( 'genre')] if 'genre' in listGroups else '' scrapedrating = match[listGroups.index( 'rating')] if 'rating' in listGroups else '' title = scrapertoolsV2.decodeHtmlentities(scrapedtitle) plot = scrapertoolsV2.decodeHtmlentities(scrapedplot) if scrapedquality: longtitle = '[B]' + title + '[/B] [COLOR blue][' + scrapedquality + '][/COLOR]' else: longtitle = '[B]' + title + '[/B]' if item.infoLabels[ "title"] or item.fulltitle: # if title is set, probably this is a list of episodes or video sources infolabels = item.infoLabels else: infolabels = {} if scrapedyear: infolabels['year'] = scrapedyear if scrapedplot: infolabels['plot'] = plot if scrapedduration: matches = scrapertoolsV2.find_multiple_matches( scrapedduration, r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)') for h, m in matches: scrapedduration = int(h) * 60 + int(m) infolabels['duration'] = int(scrapedduration) * 60 if scrapedgenre: genres = scrapertoolsV2.find_multiple_matches( scrapedgenre, '[A-Za-z]+') infolabels['genre'] = ", ".join(genres) if scrapedrating: infolabels['rating'] = scrapertoolsV2.decodeHtmlentities( scrapedrating) if not scrapedtitle in blacklist: itemlist.append( Item(channel=item.channel, action=action, contentType=item.contentType, title=longtitle, fulltitle=title, show=title, quality=scrapedquality, url=scrapedurl, infoLabels=infolabels, thumbnail=scrapedthumb)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) if patronNext: nextPage(itemlist, item, data, patronNext, 2) if addVideolibrary and (item.infoLabels["title"] or item.fulltitle): item.fulltitle = item.infoLabels["title"] videolibrary(itemlist, item) return itemlist
def list_all(self, item, postprocess=None): logger.info() itemlist = list() soup = self.create_soup(item.url) matches = soup.find("ul", class_="MovieList").find_all( "article", class_=re.compile("TPost C")) if not matches: return itemlist matches, next_limit, next_page = self.limit_results(item, matches) if not next_page: try: next_page = soup.find(class_="wp-pagenavi").find( class_="current").find_next_sibling()["href"] if next_page and not next_page.startswith("http"): next_page = item.url + next_page except: pass for elem in matches: url = elem.a["href"] if elem.a["href"].startswith( "http") else self.host + elem.a["href"] title = elem.a.h3.text thumb = elem.find("img") thumb = thumb["data-src"] if thumb.has_attr( "data-src") else thumb["src"] year = scrapertools.find_single_match(title, r'\((\d{4})\)') if not year: try: year = elem.find("span", class_="Year").text except: pass if not year: year = "-" new_item = Item(channel=item.channel, title=title, url=url, thumbnail=thumb, infoLabels={"year": year}) new_item = self.define_content_type(new_item) if postprocess: new_item = postprocess(soup, elem, new_item, item) itemlist.append(new_item) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) if next_page: itemlist.append( Item(channel=item.channel, title="Siguiente >>", url=next_page, action='list_all', next_limit=next_limit)) return itemlist
def episodios(item): #logger.info("%s episodios log: %s" % (__channel__, item)) itemlist = [] if not (item.lang): lang_season = {'ITA': 0, 'SUB ITA': 0} # Download pagina data = httptools.downloadpage(item.url).data #======== if 'clicca qui per aprire' in data.lower(): logger.info("%s CLICCA QUI PER APRIRE GLI EPISODI log: %s" % (__channel__, item)) item.url = scrapertoolsV2.find_single_match( data, '"go_to":"(.*?)"') item.url = item.url.replace("\\", "") # Carica la pagina data = httptools.downloadpage(item.url).data #logger.info("%s FINE CLICCA QUI PER APRIRE GLI EPISODI log: %s" % (__channel__, item)) elif 'clicca qui</span>' in data.lower(): logger.info("%s inizio CLICCA QUI</span> log: %s" % (__channel__, item)) item.url = scrapertoolsV2.find_single_match( data, '<h2 style="text-align: center;"><a href="(.*?)">') data = httptools.downloadpage(item.url).data #logger.info("%s fine CLICCA QUI</span> log: %s" % (__channel__, item)) #========= data = scrapertoolsV2.decodeHtmlentities(data) bloque = scrapertoolsV2.find_single_match( data, '<div class="su-accordion">(.*?)<div class="clear"></div>') patron = '<span class="su-spoiler-icon"></span>(.*?)</div>' matches = scrapertoolsV2.find_multiple_matches(bloque, patron) for scrapedseason in matches: #logger.info("%s scrapedseason log: %s" % (__channel__, scrapedseason)) if "(SUB ITA)" in scrapedseason.upper(): lang = "SUB ITA" lang_season['SUB ITA'] += 1 else: lang = "ITA" lang_season['ITA'] += 1 #logger.info("%s lang_dict log: %s" % (__channel__, lang_season)) for lang in sorted(lang_season): if lang_season[lang] > 0: itemlist.append( Item( channel=item.channel, action="episodios", #contentType = "episode", contentSerieName=item.title, title='%s (%s)' % (item.title, lang), url=item.url, fulltitle=item.title, data=data, lang=lang, show=item.show, folder=True, )) # locandine e trama e altro da tmdb se presente l'anno migliora la ricerca tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True, idioma_busqueda='it') return itemlist else: # qui ci vanno le puntate delle stagioni html = item.data logger.info("%s else log: [%s]" % (__channel__, item)) if item.lang == 'SUB ITA': item.lang = '\(SUB ITA\)' logger.info("%s item.lang log: %s" % (__channel__, item.lang)) bloque = scrapertoolsV2.find_single_match( html, '<div class="su-accordion">(.*?)<div class="clear"></div>') patron = '<span class="su-spoiler-icon"></span>.*?' + item.lang + '</div>(.*?)</div>' # leggo tutte le stagioni #logger.info("%s patronpatron log: %s" % (__channel__, patron)) matches = scrapertoolsV2.find_multiple_matches(bloque, patron) for scrapedseason in matches: #logger.info("%s scrapedseasonscrapedseason log: %s" % (__channel__, scrapedseason)) scrapedseason = scrapedseason.replace('<strong>', '').replace('</strong>', '') patron = '(\d+)×(\d+)(.*?)<(.*?)<br />' # stagione - puntanta - titolo - gruppo link matches = scrapertoolsV2.find_multiple_matches( scrapedseason, patron) for scrapedseason, scrapedpuntata, scrapedtitolo, scrapedgroupurl in matches: #logger.info("%s finale log: %s" % (__channel__, patron)) scrapedtitolo = scrapedtitolo.replace('–', '') itemlist.append( Item( channel=item.channel, action="findvideos", contentType="episode", #contentSerieName = item.contentSerieName, contentTitle=scrapedtitolo, title='%sx%s %s' % (scrapedseason, scrapedpuntata, scrapedtitolo), url=scrapedgroupurl, fulltitle=item.fulltitle, #show = item.show, #folder = True, )) logger.info("%s itemlistitemlist log: %s" % (__channel__, itemlist)) # Opción "Añadir esta película a la biblioteca de KODI" if item.extra != "library": if config.get_videolibrary_support( ) and len(itemlist) > 0 and item.extra != 'findvideos': itemlist.append( Item(channel=item.channel, title="%s" % config.get_localized_string(30161), text_color="green", extra="episodios", action="add_serie_to_library", url=item.url, thumbnail=get_thumb('videolibrary', auto=True), contentTitle=item.contentSerieName, lang=item.lang, show=item.show, data=html #, infoLabels = item.infoLabels )) return itemlist
def lista(item): logger.info() next = True itemlist = [] data = httptools.downloadpage(item.url).data data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) css_data = scrapertools.find_single_match( data, "<style id='page-skin-1' type='text/css'>(.*?)</style>") data = scrapertools.find_single_match( data, "itemprop='headline'>.*?</h2>.*?</ul>") patron = '<span class="([^"]+)">.*?<figure class="poster-bg">(.*?)<img src="([^"]+)" />' patron += '(.*?)</figure><h6>([^<]+)</h6><a href="([^"]+)"></a>' matches = scrapertools.find_multiple_matches(data, patron) first = int(item.first) last = first + 19 if last > len(matches): last = len(matches) next = False for scrapedtype, scrapedyear, scrapedthumbnail, scrapedquality, scrapedtitle, scrapedurl in matches[ first:last]: year = scrapertools.find_single_match(scrapedyear, '<span>(\d{4})</span>') if not year: class_year = scrapertools.find_single_match( scrapedyear, 'class="([^\"]+)"') year = scrapertools.find_single_match( css_data, "\." + class_year + ":after {content:'(\d{4})';}") if not year: year = scrapertools.find_single_match( data, "headline'>(\d{4})</h2>") qual = "" if scrapedquality: patron_qualities = '<i class="([^"]+)"></i>' qualities = scrapertools.find_multiple_matches( scrapedquality, patron_qualities) for quality in qualities: patron_desc = "\." + quality + ":after {content:'([^\']+)';}" quality_desc = scrapertools.find_single_match( css_data, patron_desc) qual = qual + "[" + quality_desc + "] " title = "%s [%s] %s" % (scrapedtitle, year, qual) new_item = Item(channel=item.channel, title=title, url=host + scrapedurl, thumbnail=scrapedthumbnail, type=scrapedtype, infoLabels={'year': year}) if scrapedtype.strip() == 'sr': new_item.contentSerieName = scrapedtitle new_item.action = 'episodios' else: new_item.contentTitle = scrapedtitle new_item.action = 'findvideos' if scrapedtype == item.type or item.type == 'cat': itemlist.append(new_item) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) #pagination url_next_page = item.url first = last if next: itemlist.append( item.clone(title="Siguiente >>", url=url_next_page, action='lista', first=first)) return itemlist
def episodesxseason(item): logger.info() itemlist = [] season = item.contentSeasonNumber action = "findvideos" server = '' data = get_source(item.url, ctype=item.tmod) data = scrapertools.find_single_match( data, 'Temporada %s(.*?)<div class="clear"></div></div></div>' % season) patron = 'data-numcap="(\d+)".*?data-titulo="([^"]+)".*?' #episodio, titulo patron += 'data-idiomas="([^"]+)" (.*?)><i.*?' #langs, url-langs patron += '<img src="([^"]+)".*?<p>(.*?)</p>' #thumbs, plot matches = re.compile(patron, re.DOTALL).findall(data) infoLabels = item.infoLabels for scrapedep, title, scrapedlangs, scrapedurls, thumb, plot in matches: language = "" contentEpisodeNumber = scrapedep if not ',' in scrapedlangs: language = " (%s)" % IDIOMAS.get(scrapedlangs, scrapedlangs) if direct_play: scrapedurls = scrapertools.find_single_match( scrapedurls, 'data-url-%s="([^"]+)"' % scrapedlangs) action = 'play' server = 'openload' if item.extra: action = 'findvideos' else: listl = scrapedlangs.split(",") prio = "" for lang in listl: lang = lang.strip() language += " (%s)" % IDIOMAS.get(lang, lang) if lang == 'es': prio = lang elif lang == 'es_la' and not prio: prio = 'es-la' elif lang == 'vose' and not prio: prio = lang if direct_play: scrapedurls = scrapertools.find_single_match( scrapedurls, 'data-url-%s="([^"]+)"' % prio) action = 'play' server = 'openload' if item.extra: action = 'findvideos' infoLabels['episode'] = scrapedep infoLabels = item.infoLabels title += '[COLOR springgreen]%s[/COLOR]' % language itemlist.append( Item(channel=item.channel, action=action, title=title, url=scrapedurls, plot=plot, thumbnail=thumb, contentEpisodeNumber=contentEpisodeNumber, infoLabels=infoLabels, server=server, direct_play=direct_play)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) return itemlist