def novedades(params,url,category): logger.info("[pelispekes.py] novedades") # ------------------------------------------------------ # Descarga la página # ------------------------------------------------------ data = scrapertools.cachePage(url) #logger.info(data) # ------------------------------------------------------ # Extrae las películas # ------------------------------------------------------ #patron = '<div class="thumb">[^<]+<a href="([^"]+)"><img src="([^"]+)".*?alt="([^"]+)"/></a>' patron = '<div class="post-content clearfix">.+?' patron += '<a href="([^"]+)">' #Url patron += '<img src="([^"]+)".+?' #Caratula patron += 'alt="([^"]+)"/></a>.+?' #Titulo patron += '<p>([^<]+)</p>' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[2] scrapedurl = match[0] scrapedthumbnail = match[1].replace(" ","%20") scrapedplot = match[3] if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") try: print scrapedtitle scrapedtitle = scrapedtitle #scrapedtitle = unicode(scrapedtitle, "utf-8" ) except: pass # Añade al listado de XBMC xbmctools.addnewfolder( CHANNELNAME , "listmirrors" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) # ------------------------------------------------------ # Extrae la página siguiente # ------------------------------------------------------ #patron = '<a href="([^"]+)" >\»\;</a>' patron = "class='current'>[^<]+</span><a href='([^']+)'" matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = "!Pagina siguiente" scrapedurl = match scrapedthumbnail = "" scrapeddescription = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, "novedades" ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def listcat(params,url,category): xbmc.output("[cineblog01.py] mainlist") if url =="": url = "http://cineblog01.com/film/" # Descarga la página data = scrapertools.cachePage(url) #xbmc.output(data) # Extrae las entradas (carpetas) patronvideos = '<div id="covershot".*?<a.*?<img src="(.*?)".*?' patronvideos += '<div id="post-title"><a href="(.*?)".*?' patronvideos += '<h3>(.*?)</h3>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo UnicodeDecodedTitle = match[2].decode("utf-8") unescapedTitle = unescape (UnicodeDecodedTitle) scrapedtitle = unescapedTitle.encode("latin1","ignore") # URL scrapedurl = urlparse.urljoin(url,match[1]) # Thumbnail scrapedthumbnail = urlparse.urljoin(url,match[0]) # Argumento # Depuracion if (DEBUG): xbmc.output("scrapedtitle="+scrapedtitle) xbmc.output("scrapedurl="+scrapedurl) xbmc.output("scrapedthumbnail="+scrapedthumbnail) # Añade al listado de XBMC xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, "detail" ) # Remove the next page mark patronvideos = '<a href="(http://www.cineblog01.com/film/category/[a-z]+' patronvideos += '/page/[0-9]+)">Avanti >' matches = re.compile (patronvideos, re.DOTALL).findall (data) scrapertools.printMatches (matches) if len(matches)>0: scrapedtitle = "Pagina seguente" scrapedurl = matches[0] scrapedthumbnail = "" scrapedplot = "" if (DEBUG): xbmc.output("scrapedtitle="+scrapedtitle) xbmc.output("scrapedurl="+scrapedurl) xbmc.output("scrapedthumbnail="+scrapedthumbnail) xbmctools.addnewfolder( CHANNELNAME , "listcat" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) # Disable sorting... xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def movielist(params,url,category): # pelis sin ficha (en listados por género) logger.info("[divxonline.py] movielist") # Descarga la página data = scrapertools.cachePage(url) #logger.info(data) data = stepinto(url,data,'Ver página:(.*?)</p>') # Extrae las entradas (carpetas) patronvideos = '<li><h2><a href="([^"]+?)">(.*?)</a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) #scrapertools.printMatches(matches) if (Generate): f = open(config.DATA_PATH+'/films.tab', 'w') # fichero para obtener las notas for match in matches: # Titulo scrapedtitle = remove_html_tags(match[1]) if (not Generate and Notas): score = anotador.getscore(remove_html_tags(match[1])) if (score != ""): scrapedtitle += " " + score # URL scrapedurl = urlparse.urljoin(url,match[0]) # url de la ficha divxonline scrapedurl = scrapedurl.replace("pelicula","pelicula-divx") # url de la página de reproducción # Thumbnail #scrapedthumbnail = urlparse.urljoin(url,match[1]) scrapedthumbnail = "" # procesa el resto scrapeddescription = "" # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) if (Generate): sanio = re.search('(.*?)\((.*?)\)',scrapedtitle) if (sanio): # si hay anio fareg = sanio.group(1) + "\t" + sanio.group(2) + "\t" + scrapedtitle else: fareg = scrapedtitle + "\t\t" + scrapedtitle f.write(fareg+"\n") # Añade al listado de XBMC xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, "listmirrors" ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True ) if (Generate): f.close()
def toplist(params,url,category): logger.info("[documaniatv.py] toplist") # Descarga la p�gina data = scrapertools.cachePage(url) #logger.info(data) # Extrae las entradas (carpetas) logger.info("[documaniatv.py] toplist "+url) if url== "http://www.documaniatv.com/topvideos.html?do=recent": patronvideos = '<tr>[^<]+<td[^>]+>([^<]+)</td>[^<]+<td' patronvideos += '[^>]+><a href="([^"]+)">' patronvideos += '<img src="([^"]+)" alt=[^>]+>' patronvideos += '</a></td>[^<]+<td[^>]+>([^<]+)</td>[^<]+<td[^>]+>' patronvideos += '<a href="[^"]+">([^<]+)</a>' patronvideos += '</td>[^<]+<td[^>]+>([^<]+)</td>' else: patronvideos = '<tr>[^>]+>([^<]+)</td>' patronvideos += '[^>]+><a href="([^"]+)">' patronvideos += '<img src="([^"]+)"' patronvideos += ' alt="([^"]+)"[^>]+>' patronvideos += '</a></td>[^>]+>([^<]+)</td>[^>]+>' patronvideos += '<a href="[^"]+">[^>]+></td>[^>]+>([^<]+)</td>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = acentos(match[3]) # URL scrapedurl = match[1] # Thumbnail scrapedthumbnail = match[2] # procesa el resto scrapedplot = match[4]+" - " + "Vistas : "+match[5]+" veces" xbmctools.addthumbnailfolder( CHANNELNAME , match[0]+") "+scrapedtitle+" - "+scrapedplot, scrapedurl , scrapedthumbnail, "detail" ) # Label (top-right)... xbmcplugin.setContent(int( sys.argv[ 1 ] ),"movies") xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )
def movielist(params,url,category): logger.info("[pintadibujos.py] mainlist") # Descarga la página data = scrapertools.cachePage(url) #logger.info(data) # Extrae las entradas (carpetas) patronvideos = '<td><a href="([^"]+)" target="_blank"><img SRC="([^"]+)"(| ALT=".*?") BORDER' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo if match[2] == "": longitud = len(match[1]) scrapedtitle = match[1][:longitud-4] scrapedtitle = scrapedtitle.replace("_"," ") else: scrapedtitle = match[2].replace(" ALT=","") scrapedtitle = scrapedtitle.replace('"','') # URL scrapedurl = urlparse.urljoin(url,match[0]) # Thumbnail scrapedthumbnail = urlparse.urljoin(url,match[1]) # procesa el resto scrapeddescription = "" # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) # Añade al listado de XBMC xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, "detail" ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) # Disable sorting... xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def paginasiguientes(patronvideos,data,category,cat): # ------------------------------------------------------ # Extrae la p�gina siguiente # ------------------------------------------------------ patron = '</span><a href="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) for match in matches: scrapedtitle = "Pagina siguiente" scrapedurl = "http://www.documaniatv.com/" + match scrapedthumbnail = os.path.join(IMAGES_PATH, 'next.png') scrapeddescription = "" if cat == 'tipo': xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, "listatipodocumental" ) elif cat == 'nuevo': xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, "documentalesnuevoslist" ) elif cat == 'tag': xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , "http://www.documaniatv.com"+match , scrapedthumbnail, "tagdocumentaleslist" ) elif cat == 'busca': xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, "searchresults" ) # Label (top-right)... xbmcplugin.setContent(int( sys.argv[ 1 ] ),"movies") xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def paginasiguientes(patronvideos,data,category,cat): # ------------------------------------------------------ # Extrae la p�gina siguiente # ------------------------------------------------------ patron = '</span><a href="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) #menutitle = "Volver Al Menu Principal" #menurl = "http://www.sonolatino.com/" if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = "Pagina siguiente" scrapedurl = "http://www.sonolatino.com/" + match scrapedthumbnail = os.path.join(IMAGES_PATH, 'next.png') scrapeddescription = "" # Depuracion if DEBUG: logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) if cat == 'tipo': # A�ade al listado de XBMC xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, "listatipoVideo" ) elif cat == 'nuevo': xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, "Videosnuevoslist" ) elif cat == 'tag': xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , "http://www.sonolatino.com.es/series"+match , scrapedthumbnail, "tagdocumentaleslist" ) elif cat == 'busca': xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, "searchresults" ) #xbmctools.addthumbnailfolder( CHANNELNAME , menutitle , menurl , "", "volvermenu" ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=pluginhandle, category=category ) # Disable sorting... xbmcplugin.addSortMethod( handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory( handle=pluginhandle, succeeded=True )
def listcat(params, url, category): xbmc.output("[cineblog01.py] mainlist") if url == "": url = "http://cineblog01.com/film/" # Descarga la página data = scrapertools.cachePage(url) #xbmc.output(data) # Extrae las entradas (carpetas) patronvideos = '<div id="covershot".*?<a.*?<img src="(.*?)".*?' patronvideos += '<div id="post-title"><a href="(.*?)".*?' patronvideos += '<h3>(.*?)</h3>' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo UnicodeDecodedTitle = match[2].decode("utf-8") unescapedTitle = unescape(UnicodeDecodedTitle) scrapedtitle = unescapedTitle.encode("latin1", "ignore") # URL scrapedurl = urlparse.urljoin(url, match[1]) # Thumbnail scrapedthumbnail = urlparse.urljoin(url, match[0]) # Argumento # Depuracion if (DEBUG): xbmc.output("scrapedtitle=" + scrapedtitle) xbmc.output("scrapedurl=" + scrapedurl) xbmc.output("scrapedthumbnail=" + scrapedthumbnail) # Añade al listado de XBMC xbmctools.addthumbnailfolder(CHANNELNAME, scrapedtitle, scrapedurl, scrapedthumbnail, "detail") # Remove the next page mark patronvideos = '<a href="(http://www.cineblog01.com/film/category/[a-z]+' patronvideos += '/page/[0-9]+)">Avanti >' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = "Pagina seguente" scrapedurl = matches[0] scrapedthumbnail = "" scrapedplot = "" if (DEBUG): xbmc.output("scrapedtitle=" + scrapedtitle) xbmc.output("scrapedurl=" + scrapedurl) xbmc.output("scrapedthumbnail=" + scrapedthumbnail) xbmctools.addnewfolder(CHANNELNAME, "listcat", category, scrapedtitle, scrapedurl, scrapedthumbnail, scrapedplot) # Label (top-right)... xbmcplugin.setPluginCategory(handle=pluginhandle, category=category) # Disable sorting... xbmcplugin.addSortMethod(handle=pluginhandle, sortMethod=xbmcplugin.SORT_METHOD_NONE) # End of directory... xbmcplugin.endOfDirectory(handle=pluginhandle, succeeded=True)
def toplist(params,url,category): logger.info("[sonolatino.py] toplist") # Descarga la p�gina data = scrapertools.cachePage(url) #logger.info(data) # Extrae las entradas (carpetas) logger.info("[sonolatino.py] toplist "+url) if url== "http://www.sonolatino.com/topvideos.html?do=recent": patronvideos = '<tr>[^<]+<td[^>]+>([^<]+)</td>[^<]+<td' patronvideos += '[^>]+><a href="([^"]+)">' patronvideos += '<img src="([^"]+)" alt=[^>]+>' patronvideos += '</a></td>[^<]+<td[^>]+>([^<]+)</td>[^<]+<td[^>]+>' patronvideos += '<a href="[^"]+">([^<]+)</a>' patronvideos += '</td>[^<]+<td[^>]+>([^<]+)</td>' else: patronvideos = '<tr>[^>]+>([^<]+)</td>' patronvideos += '[^>]+><a href="([^"]+)">' patronvideos += '<img src="([^"]+)"' patronvideos += ' alt="([^"]+)"[^>]+>' patronvideos += '</a></td>[^>]+>([^<]+)</td>[^>]+>' patronvideos += '<a href="[^"]+">[^>]+></td>[^>]+>([^<]+)</td>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = match[3] # URL scrapedurl = match[1] # Thumbnail scrapedthumbnail = match[2] # procesa el resto scrapedplot = match[4]+" - " + "Vistas : "+match[5]+" veces" # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) # A�ade al listado de XBMC # xbmctools.addnewvideo( CHANNELNAME , "detail" , category , "directo" , match[0]+") "+scrapedtitle + " - " + scrapedplot , scrapedurl , scrapedthumbnail , scrapedplot ) xbmctools.addthumbnailfolder( CHANNELNAME , match[0]+") "+scrapedtitle+" - "+scrapedplot, scrapedurl , scrapedthumbnail, "detail" ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) # Disable sorting... xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )
def pelisconfichaB(params,url,category): # fichas con formato en entradas alfabéticas logger.info("[divxonline.py] pelisconfichaB") t0 = time.time() # Descarga la página data = scrapertools.cachePage(url) #logger.info(data) # carga N páginas N = 10 match = re.search('(.*?)(\d+?)(\.html)',url) pag = int(match.group(2)) #logger.info("pag="+match.group(2)) for i in range(pag+1,pag+N): newurl = match.group(1) + str(i) + match.group(3) data += scrapertools.cachePage(newurl) nexturl = match.group(1) + str(pag+N) + match.group(3) # Extrae las entradas data=data[data.find('Películas online por orden alfabético'):] logger.info(data) patronvideos = '<td class="contenido"><img src="(.*?)"' # cartel patronvideos += '.*?alt="(.*?)"' # título patronvideos += '.*?<b>Sinopsis.*?<a href="(.*?)"' # url matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = removeacutes(match[1]) # 7.49 seg # scrapedtitle = match[1] # 7.33 seg if (not Generate and Notas): score = anotador.getscore(match[1]) if (score != ""): scrapedtitle += " " + score # URL scrapedurl = urlparse.urljoin(url,match[2]) # url de la ficha divxonline scrapedurl = scrapedurl.replace("pelicula","pelicula-divx") # url de la página de reproducción # Thumbnail scrapedthumbnail = "" if LoadThumbs: scrapedthumbnail = match[0] # procesa el resto scrapeddescription = "" # match[3] # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) # Añade al listado de XBMC xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, "listmirrors" ) # añade siguiente página xbmctools.addnewfolder( CHANNELNAME , "pelisconfichaB" , CHANNELNAME , "Siguiente" , nexturl , "", "" ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True ) if DEBUG: logger.info("Tiempo de ejecución = "+str(time.time()-t0))
def pelisconficha(params,url,category): # fichas en listados por año y en estrenos logger.info("[divxonline.py] pelisconficha") # Descarga la página data = scrapertools.cachePage(url) #logger.info(data) if(data.find('Películas del año') > 0): ##data=data[data.find('<!-- MENU IZQUIERDO -->'):] data=data[data.find('Películas del año'):] logger.info(data.find('<!-- MENU IZQUIERDO -->')) #logger.info(data) # Extrae las entradas patronvideos = '<td class="contenido"><a href="(.*?)">' # link patronvideos += '<img src="(.*?)"' # cartel patronvideos += '.*?title="(.*?)"' # título # patronvideos += '.*?<b>Descripción:</b>(.*?)\.\.\.' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for match in matches: # Titulo scrapedtitle = removeacutes(match[2]) if (not Generate and Notas): score = anotador.getscore(match[2]) if (score != ""): scrapedtitle += " " + score # URL scrapedurl = urlparse.urljoin(url,match[0]) # url de la ficha divxonline scrapedurl = scrapedurl.replace("pelicula","pelicula-divx") # url de la página de reproducción # Thumbnail scrapedthumbnail = "" if LoadThumbs: scrapedthumbnail = match[1] # procesa el resto scrapeddescription = "" #match[3] # Depuracion if (DEBUG): logger.info("scrapedtitle="+scrapedtitle) logger.info("scrapedurl="+scrapedurl) logger.info("scrapedthumbnail="+scrapedthumbnail) # Añade al listado de XBMC xbmctools.addthumbnailfolder( CHANNELNAME , scrapedtitle , scrapedurl , scrapedthumbnail, "listmirrors" ) # añade siguiente página match = re.search('(.*?)(\d+?)(\.html)',url) logger.info("url="+url) pag = match.group(2) newpag = match.group(1) + str(int(pag)+1) + match.group(3) logger.info("newpag="+newpag) xbmctools.addnewfolder( CHANNELNAME , "pelisconficha" , CHANNELNAME , "Siguiente" , newpag , "", "" ) # Label (top-right)... xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category ) # Disable sorting... xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE ) # End of directory... xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )