Python unescapeの例、core.scrapertools.unescape Pythonの例

コード例 #1

0

ファイルを表示

ファイル: cineblog01.py プロジェクト: kampanita/pelisalacarta

def listserie(item):
    logger.info("[cineblog01.py] mainlist")
    itemlist = []

    # Descarga la p�gina
    data = scrapertools.cache_page(item.url)
    if DEBUG: logger.info(data)

    # Extrae las entradas (carpetas)
    patronvideos  = '<div id="covershot"><a[^<]+<p[^<]+<img.*?src="([^"]+)".*?'
    patronvideos += '<div id="post-title"><a href="([^"]+)"><h3>([^<]+)</h3></a></div>[^<]+'
    patronvideos += '<div id="description"><p>(.*?)</p>'
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for match in matches:
        scrapedtitle = scrapertools.unescape(match[2])
        scrapedurl = urlparse.urljoin(item.url,match[1])
        scrapedthumbnail = urlparse.urljoin(item.url,match[0])
        scrapedplot = scrapertools.unescape(match[3])
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")

        # A�ade al listado de XBMC
        itemlist.append( Item(channel=item.channel, action="findvideos" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot))

    # Put the next page mark
    try:
        next_page = scrapertools.get_match(data,"<link rel='next' href='([^']+)'")
        itemlist.append( Item(channel=item.channel, action="listserie" , title=">> Next page" , url=next_page, thumbnail=scrapedthumbnail, plot=scrapedplot))
    except:
        pass

    return itemlist

コード例 #2

0

ファイルを表示

ファイル: discoverymx.py プロジェクト: jorik041/pelisalacarta-personal-fork

def listvideos(params,url,category):
    logger.info("[discoverymx.py] listvideos")
    
    scrapedthumbnail = ""
    scrapedplot = ""
    # Descarga la página
    data = scrapertools.cachePage(url)
    #logger.info(data)

    # Extrae las entradas (carpetas)
    patronvideos  = "<h3 class='post-title entry-title'>[^<]+"
    patronvideos += "<a href='([^']+)'>([^<]+)</a>.*?"
    patronvideos += "<div class='post-body entry-content'>(.*?)<div class='post-footer'>"
    
    
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for match in matches:
        scrapedtitle = match[1]
        scrapedtitle = re.sub("<[^>]+>"," ",scrapedtitle)
        scrapedtitle = scrapertools.unescape(scrapedtitle)
        scrapedurl = match[0]
        regexp = re.compile(r'src="(http[^"]+)"')
        matchthumb = regexp.search(match[2])
        if matchthumb is not None:
            scrapedthumbnail = matchthumb.group(1)
        matchplot = re.compile('<div align="center">(<img.*?)</span></div>',re.DOTALL).findall(match[2])
        if len(matchplot)>0:
            scrapedplot = matchplot[0]
            #print matchplot
        else:
            scrapedplot = ""
        scrapedplot = re.sub("<[^>]+>"," ",scrapedplot)
        scrapedplot = scrapertools.unescape(scrapedplot)


        #scrapedplot = scrapedplot.replace("â€¦","")
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")

        # Añade al listado de XBMC
        xbmctools.addnewfolder( CHANNELNAME , "detail" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot )

    # Extrae la marca de siguiente página
    patronvideos = "<a class='blog-pager-older-link' href='([^']+)'"
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    if len(matches)>0:
        scrapedtitle = "Página siguiente"
        scrapedurl = urlparse.urljoin(url,matches[0])
        scrapedthumbnail = ""
        scrapedplot = ""
        xbmctools.addnewfolder( CHANNELNAME , "listvideos" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot )

    # Propiedades
    xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category )
    xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE )
    xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )

コード例 #3

0

ファイルを表示

ファイル: cineblog01.py プロジェクト: dentaku65/plugin.video.italyalacarta

def peliculasrobalo(item):
    logger.info("[cineblog01.py] mainlist")
    itemlist = []

    if item.url == "":
        item.url = sito

    # Descarga la página
    data = scrapertools.cache_page(item.url)
    logger.info(data)

    # Extrae las entradas (carpetas)
    patronvideos = '<div class="span4".*?<a.*?<p><img src="([^"]+)".*?'
    patronvideos += '<div class="span8">.*?<a href="([^"]+)"> <h1>([^"]+)</h1></a>.*?'
    patronvideos += '<p><strong>.*?</strong>.*?<br />([^"]+)<a href'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)

    for match in matches:
        scrapedtitle = scrapertools.unescape(match.group(3))
        scrapedurl = urlparse.urljoin(item.url, match.group(2))
        scrapedthumbnail = urlparse.urljoin(item.url, match.group(1))
        scrapedthumbnail = scrapedthumbnail.replace(" ", "%20")
        scrapedplot = scrapertools.unescape(match.group(4))
        scrapedplot = scrapertools.htmlclean(scrapedplot).strip()
        if DEBUG: logger.info(
            "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]")
        itemlist.append(
            Item(channel=__channel__,
                 action="findvid",
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                 url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, viewmode="movie_with_plot",
                 fanart=scrapedthumbnail))

    # Next page mark
    try:
        bloque = scrapertools.get_match(data, "<div id='wp_page_numbers'>(.*?)</div>")
        patronvideos = '<a href="([^"]+)">></a></li>'
        matches = re.compile(patronvideos, re.DOTALL).findall(bloque)
        scrapertools.printMatches(matches)

        if len(matches) > 0:
            scrapedtitle = "[COLOR orange]Successivo>>[/COLOR]"
            scrapedurl = matches[0]
            scrapedthumbnail = ""
            scrapedplot = ""
            if (DEBUG): logger.info(
                "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]")

            itemlist.append(
                Item(channel=__channel__,
                     action="peliculasrobalo",
                     title=scrapedtitle,
                     url=scrapedurl,
                     thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png",
                     plot=scrapedplot))
    except:
        pass

    return itemlist

コード例 #4

0

ファイルを表示

ファイル: cb01anime.py プロジェクト: SpankyEsse/plugin.video.streamondemand

def novita(item):
    logger.info("[cb01anime.py] mainlist")
    itemlist = []

    # Descarga la página
    data = scrapertools.anti_cloudflare(item.url, headers)

    # Extrae las entradas (carpetas)
    patronvideos = '<div class="span4"> <a.*?<img src="(.*?)".*?'
    patronvideos += '<div class="span8">.*?<a href="(.*?)">.*?'
    patronvideos += '<h1>(.*?)</h1></a>.*?<br />(.*?)<br>.*?'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)

    for match in matches:
        scrapedthumbnail = match.group(1)
        scrapedurl = match.group(2)
        scrapedtitle = scrapertools.unescape(match.group(3))
        scrapedplot = scrapertools.unescape(match.group(4))
        scrapedplot = scrapertools.decodeHtmlentities(scrapedplot)
        if scrapedplot.startswith(""):
            scrapedplot = scrapedplot[64:]
        if DEBUG: logger.info(
            "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]")

        ## ------------------------------------------------
        scrapedthumbnail = httptools.get_url_headers(scrapedthumbnail)
        ## ------------------------------------------------				

        # Añade al listado de XBMC
        itemlist.append(
            Item(channel=__channel__,
                 action="listacompleta" if scrapedtitle == "Lista Alfabetica Completa Anime/Cartoon" else "episodios",
                 fulltitle=scrapedtitle,
                 show=scrapedtitle,
                 title=scrapedtitle,
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 viewmode="movie_with_plot",
                 plot=scrapedplot))

    # Put the next page mark
    try:
        next_page = scrapertools.get_match(data, "<link rel='next' href='([^']+)'")
        itemlist.append(
            Item(channel=__channel__,
                 action="HomePage",
                 title="[COLOR yellow]Torna Home[/COLOR]",
                 folder=True)),
        itemlist.append(
            Item(channel=__channel__,
                 action="novita",
                 title="[COLOR orange]Successivo>>[/COLOR]",
                 url=next_page,
                 thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png"))
    except:
        pass

    return itemlist

コード例 #5

0

ファイルを表示

ファイル: futuramaita.py プロジェクト: orione7/plugin.video.streamondemand-pureita

def mainlist(item):
    logger.info("[simpsonita.py] mainlist")
    itemlist = []

    # Descarga la página
    data = scrapertools.cachePage(sito)
    logger.info(data)


    itemlist.append(
        Item(channel=__channel__,
                action="mainlist",
                title="[COLOR green]Ricarica...[/COLOR]"))


    patronvideos = '<div class="random-article random-k2-article  ">\s*<div class="title">\s*<h4>\s*<a href="([^"]+)">([^<]+)<\/a>'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)
    for match in matches:
        scrapedtitle = scrapertools.unescape(match.group(2))
        scrapedurl = sito + match.group(1)
        if (DEBUG): logger.info(
            "title=[" + scrapedtitle + "], url=[" + scrapedurl + "]")

        itemlist.append(
            Item(channel=__channel__,
                 action="play",
                 fulltitle=scrapedtitle,
                 show=scrapedtitle,
                 title="[COLOR green]Puntata Random - " + scrapedtitle + "[/COLOR]",
                 url=scrapedurl))



    # Extrae las entradas (carpetas)
    patronvideos = '<li><a href="([^"]+)"><span class="catTitle">([^<]+)<\/span>'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)
    for match in matches:
        scrapedtitle = scrapertools.unescape(match.group(2))
        scrapedurl = sito + match.group(1)
        if (DEBUG): logger.info(
            "title=[" + scrapedtitle + "], url=[" + scrapedurl + "]")

        # Añade al listado de XBMC
        itemlist.append(
            Item(channel=__channel__,
                 action="listepisodes",
                 fulltitle=scrapedtitle,
                 show=scrapedtitle,
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                 url=scrapedurl))

    

    return itemlist

コード例 #6

0

ファイルを表示

ファイル: discoverymx.py プロジェクト: divadrbcn/prueba

def listvideos(item):
    logger.info("[discoverymx.py] listvideos")
    itemlist=[]

    scrapedthumbnail = ""
    scrapedplot = ""

    # Descarga la página
    data = scrapertools.cache_page(item.url)
    patronvideos  = "<h3 class='post-title entry-title'[^<]+"
    patronvideos += "<a href='([^']+)'>([^<]+)</a>.*?"
    patronvideos += "<div class='post-body entry-content'(.*?)<div class='post-footer'>"
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for match in matches:
        scrapedtitle = match[1]
        scrapedtitle = re.sub("<[^>]+>"," ",scrapedtitle)
        scrapedtitle = scrapertools.unescape(scrapedtitle)
        scrapedurl = match[0]
        regexp = re.compile(r'src="(http[^"]+)"')
        
        matchthumb = regexp.search(match[2])
        if matchthumb is not None:
            scrapedthumbnail = matchthumb.group(1)
        matchplot = re.compile('<div align="center">(<img.*?)</span></div>',re.DOTALL).findall(match[2])

        if len(matchplot)>0:
            scrapedplot = matchplot[0]
            #print matchplot
        else:
            scrapedplot = ""

        scrapedplot = re.sub("<[^>]+>"," ",scrapedplot)
        scrapedplot = scrapertools.unescape(scrapedplot)
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")

        # Añade al listado de XBMC
        #xbmctools.addnewfolder( __channel__ , "findevi" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot )
        itemlist.append( Item(channel=__channel__, action="findvideos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) )

    # Extrae la marca de siguiente página
    patronvideos = "<a class='blog-pager-older-link' href='([^']+)'"
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    if len(matches)>0:
        scrapedtitle = "Página siguiente"
        scrapedurl = urlparse.urljoin(item.url,matches[0])
        scrapedthumbnail = ""
        scrapedplot = ""
        itemlist.append( Item(channel=__channel__, action="listvideos", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) )

    return itemlist

コード例 #7

0

ファイルを表示

ファイル: clantve.py プロジェクト: Medisan/TVWeb

def programas(item):
    logger.info("[clantv.py] programas")

    itemlist = []

    # Descarga la página
    data = scrapertools.cache_page(item.url)


    # Extrae los programas
    patron  = '<div class="informacion-serie">[^<]+'
    patron += '<h3>[^<]+'
    patron += '<a href="([^"]+)">([^<]+)</a>[^<]+'
    patron += '</h3>[^<]+'
    patron += '<a[^>]+>[^<]+</a><img.*?src="([^"]+)"><div>(.*?)</div>'
    matches = re.compile(patron,re.DOTALL).findall(data)

    if len(matches) == 0:
    	patron  = '<div class="informacion-serie"><h3><a href="([^"]+)">([^<]+)</a></h3><a[^>]+>[^<]+</a><img.*?src="([^"]+)"><div>(.*?)</div>'
    	matches = re.compile(patron,re.DOTALL).findall(data)

    if DEBUG: scrapertools.printMatches(matches)
    for match in matches:
        scrapedtitle = match[1]
        scrapedtitle = scrapertools.unescape(scrapedtitle)

        scrapedurl = urlparse.urljoin(item.url,match[0])
        scrapedurl = urlparse.urljoin(scrapedurl,"videos")
        scrapedthumbnail = urlparse.urljoin(item.url,match[2])
        scrapedplot = match[3]
        scrapedplot = scrapertools.unescape(scrapedplot).strip()
        scrapedplot = scrapertools.htmlclean(scrapedplot).strip()

        scrapedpage = urlparse.urljoin(item.url,match[0])
        if (DEBUG): logger.info("scraped title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"] plot=["+scrapedplot+"]")
        #logger.info(scrapedplot)

        # Añade al listado
        itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="episodios" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , page=scrapedpage, show=scrapedtitle , folder=True) )

    # Añade el resto de páginas
    patron = '<li class="siguiente">[^<]+<a rel="next" title="Ir a la p&aacute;gina siguiente" href="([^"]+)">Siguiente'
    matches = re.compile(patron,re.DOTALL).findall(data)
    if DEBUG: scrapertools.printMatches(matches)

    if len(matches)>0:
        match = matches[0]
        newitem = Item(channel=CHANNELNAME,url=urlparse.urljoin(item.url,match))
        itemlist.extend(programas(newitem))

    return itemlist

コード例 #8

0

ファイルを表示

ファイル: cineblog01.py プロジェクト: Zanzibar82/FusionBox-repo

def listserie(item):
    logger.info("[cineblog01.py] mainlist")
    itemlist = []

    # Descarga la página
    data = anti_cloudflare(item.url)
    logger.info(data)

    # Extrae las entradas (carpetas)
    patronvideos = '<div class="span4">\s*<a href="([^"]+)"><img src="([^"]+)".*?<div class="span8">.*?<h1>([^<]+)</h1></a>(.*?)<br><a'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)

    for match in matches:
        scrapedtitle = scrapertools.unescape(match.group(3))
        scrapedurl = match.group(1)
        scrapedthumbnail = match.group(2)
        scrapedplot = scrapertools.unescape(match.group(4))
        scrapedplot = scrapertools.htmlclean(scrapedplot).strip()
        if (DEBUG): logger.info(
            "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]")

        # Añade al listado de XBMC
        itemlist.append(
            Item(channel=__channel__,
                 action="episodios",
                 fulltitle=scrapedtitle,
                 show=scrapedtitle,
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 plot=scrapedplot))

    # Put the next page mark
    try:
        next_page = scrapertools.get_match(data, "<link rel='next' href='([^']+)'")
        itemlist.append(
            Item(channel=__channel__,
                 action="HomePage",
                 title="[COLOR yellow]Torna Home[/COLOR]",
                 folder=True)),
        itemlist.append(
            Item(channel=__channel__,
                 action="listserie",
                 title="[COLOR orange]Successivo>>[/COLOR]",
                 url=next_page,
                 thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png", ))
    except:
        pass

    return itemlist

コード例 #9

0

ファイルを表示

ファイル: cineblog01.py プロジェクト: Reat0ide/pelis_backup

def peliculas(item):
    logger.info("[cineblog01.py] mainlist")
    itemlist = []

    if item.url =="":
        item.url = sito

    # Descarga la página
    data = scrapertools.cache_page(item.url)
    logger.info(data)

    # Extrae las entradas (carpetas)
    patronvideos  = '<div id="covershot".*?<a.*?<img src="(.*?)".*?'
    patronvideos += '<div id="post-title"><a href="(.*?)".*?'
    patronvideos += '<h3>(.*?)</h3>(.*?)</p>'
    #patronvideos += '<div id="description"><p>(.?*)</div>'
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for match in matches:
        scrapedtitle = scrapertools.unescape(match[2])
        scrapedurl = urlparse.urljoin(item.url,match[1])
        scrapedthumbnail = urlparse.urljoin(item.url,match[0])
        scrapedthumbnail = scrapedthumbnail.replace(" ", "%20");
        scrapedplot = scrapertools.unescape(match[3])
        scrapedplot = scrapertools.htmlclean(scrapedplot).strip()
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")
        itemlist.append( Item(channel=__channel__, action="findvideos" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, viewmode="movie_with_plot", fanart=scrapedthumbnail))

    # Next page mark
    try:
        bloque = scrapertools.get_match(data,"<div id='wp_page_numbers'>(.*?)</div>")
        # <a href="http://cineblog01.com/page/2/">Avanti
        # <a href="http://www.cineblog01.com/category/streaming/vk/animazione-vk/page/2/">Avanti > </a></li>
        patronvideos = '<a href="([^"]+)">Avanti'
        matches = re.compile (patronvideos, re.DOTALL).findall (data)
        scrapertools.printMatches (matches)
    
        if len(matches)>0:
            scrapedtitle = ">> Avanti"
            scrapedurl = matches[0]
            scrapedthumbnail = ""
            scrapedplot = ""
            if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")
    
            itemlist.append( Item(channel=__channel__, action="peliculas" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot))
    except:
        pass

    return itemlist

コード例 #10

0

ファイルを表示

ファイル: cineblog01.py プロジェクト: Fenice82/plugin.video.streamondemand-pureita

def listserie(item):
    logger.info("[cineblog01.py] mainlist")
    itemlist = []

    # Descarga la página
    data = scrapertools.anti_cloudflare(item.url, headers)

    # Extrae las entradas (carpetas)
    patronvideos = '<div class="span4">\s*<a href="([^"]+)"><img src="([^"]+)".*?<div class="span8">.*?<h1>([^<]+)</h1></a>(.*?)<br><a'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)

    for match in matches:
        scrapedtitle = scrapertools.unescape(match.group(3))
        scrapedurl = match.group(1)
        scrapedthumbnail = match.group(2)
        scrapedplot = scrapertools.unescape(match.group(4))
        scrapedplot = scrapertools.htmlclean(scrapedplot).strip()
        if (DEBUG): logger.info(
            "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]")
        itemlist.append(infoSod(
            Item(channel=__channel__,
                 action="episodios",
                 fulltitle=scrapedtitle,
                 show=scrapedtitle,
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 extra=item.extra,
                 plot=scrapedplot), tipo='tv'))

    # Put the next page mark
    try:
        next_page = scrapertools.get_match(data, "<link rel='next' href='([^']+)'")
        itemlist.append(
            Item(channel=__channel__,
                 action="HomePage",
                 title="[COLOR yellow]Torna Home[/COLOR]",
                 folder=True)),
        itemlist.append(
            Item(channel=__channel__,
                 action="listserie",
                 title="[COLOR orange]Successivo>>[/COLOR]",
                 url=next_page,
                 extra=item.extra,
                 thumbnail="https://raw.githubusercontent.com/orione7/Pelis_images/master/vari/successivo_P.png", ))
    except:
        pass

    return itemlist

コード例 #11

0

ファイルを表示

ファイル: bricocine.py プロジェクト: golaizola/pelisalacarta

def trailer(item):
    
    logger.info("pelisalacarta.bricocine trailer")
    
    itemlist = []
    data = get_page( item.url )
    
    
    #trailer
    patron = "<iframe width='570' height='400' src='//([^']+)"
    
    # Busca los enlaces a los videos
    listavideos = servertools.findvideos(data)
    if len(listavideos)==0 :
        itemlist.append( Item(channel=__channel__, title="[COLOR gold][B]Esta pelicula no tiene trailer,lo sentimos...[/B][/COLOR]", thumbnail ="http://s6.postimg.org/fay99h9ox/briconoisethumb.png", fanart ="http://s6.postimg.org/uie8tu1jl/briconoisefan.jpg",folder=False) )
    
    for video in listavideos:
        videotitle = scrapertools.unescape(video[0])
        url = video[1]
        server = video[2]
        
        #xbmctools.addnewvideo( __channel__ , "play" , category , server ,  , url , thumbnail , plot )
        title= "[COLOR crimson]Trailer - [/COLOR]"
        itemlist.append( Item(channel=__channel__, action="play", server=server, title=title + videotitle  , url=url , thumbnail=item.extra , plot=item.plot , fulltitle = item.title , fanart="http://s23.postimg.org/84vkeq863/movietrailers.jpg", folder=False) )
    return itemlist

コード例 #12

0

ファイルを表示

ファイル: sonolatino.py プロジェクト: Bycacha/BYCACHA

def Videosnuevoslist(params,url,category):
    logger.info("[sonolatino.py] VideoesNuevos")

    # Descarga la p�gina
    data = scrapertools.cachePage(url)
    #logger.info(data)

    # Extrae las entradas (carpetas)

    
    patronvideos  = '<tr><td.*?<a href="([^"]+)">'
    patronvideos += '<img src="([^"]+)" '
    patronvideos += 'alt="[^"]+".*?'
    patronvideos += 'width="250">([^<]+)<'
    patronvideos += 'td class.*?<a href="[^>]+>([^<]+)</a></td><td class.*?>([^<]+)</td></tr>'
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    #logger.info("matches = "+str(matches))
    if DEBUG:
        scrapertools.printMatches(matches)
    for match in matches:
        # Titulo
        # Titulo
        scrapedtitle = match[2] + " - " + match[3]+" - " + match[4].replace('&iacute;','i')
        print scrapedtitle
        scrapedtitle = scrapertools.unescape(scrapedtitle)

        # URL
        scrapedurl = match[0]
        
        # Thumbnail
        scrapedthumbnail = match[1]
        imagen = ""
        # procesa el resto
        scrapedplot = match[3]
        tipo = match[3]

        # Depuracion
        if (DEBUG):
            logger.info("scrapedtitle="+scrapedtitle)
            logger.info("scrapedurl="+scrapedurl)
            logger.info("scrapedthumbnail="+scrapedthumbnail)

        # A�ade al listado de XBMC
            #xbmctools.addthumbnailfolder( __channel__ , scrapedtitle, scrapedurl , scrapedthumbnail, "detail" )
            xbmctools.addnewfolder( __channel__ , "detail" , category , scrapedtitle ,scrapedurl , scrapedthumbnail , scrapedplot )
    

    # Busca enlaces de paginas siguientes...
    cat = "nuevo"
    patronvideo = patronvideos
    paginasiguientes(patronvideo,data,category,cat)     

    # Label (top-right)...
    xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category )

    # Disable sorting...
    xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE )

    # End of directory...
    xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )

コード例 #13

0

ファイルを表示

ファイル: doramastv.py プロジェクト: Jmlaguna89/miNuevoRepo

def pagina_(item):
    logger.info("pelisalacarta.channels.daramatv letras" + item.url)
    itemlist = []
    headers = DEFAULT_HEADERS[:]
    data = scrapertools.cache_page(item.url,headers=headers)
    data1 = scrapertools.get_match(data, '<div class="animes-bot">(.+?)<!-- fin -->')
    data1 = data1.replace('\n','')
    data1 = data1.replace('\r','')
    patron = 'href="(\/drama.+?)".+?<\/div>(.+?)<\/div>.+?src="(.+?)".+?titulo">(.+?)<'
    matches = re.compile(patron, re.DOTALL).findall(data1)
    for scrapedurl, scrapedplot, scrapedthumbnail, scrapedtitle in matches:
        title = scrapertools.unescape(scrapedtitle).strip()
        url = urlparse.urljoin(item.url, scrapedurl)
        thumbnail = urlparse.urljoin(host, scrapedthumbnail)
        plot = scrapertools.decodeHtmlentities(scrapedplot)
        itemlist.append( Item(channel=__channel__, action="episodios" , title=title , url=url, thumbnail=thumbnail, plot=plot, show=title))
	
    patron = 'href="([^"]+)" class="next"'
    matches = re.compile(patron, re.DOTALL).findall(data)
    for match in matches:
        if len(matches) > 0:
            scrapedurl = urlparse.urljoin(item.url, match)
            scrapedtitle = "Pagina Siguiente >>"
            scrapedthumbnail = ""
            scrapedplot = ""
            itemlist.append(Item(channel=__channel__, action="pagina_", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True))
    return itemlist

コード例 #14

0

ファイルを表示

ファイル: sonolatino.py プロジェクト: Bycacha/BYCACHA

def topVideos(params,url,category):
    url2=url
    data = scrapertools.cachePage(url)
    patron = '<option value="([^"]+)" >([^<]+)</option>'
    matches = re.compile(patron,re.DOTALL).findall(data)
    if len(matches)>0:
        opciones = []
        urls = []
        opciones.append("Todo el Tiempo")
        urls.append("http://www.sonolatino.com/topvideos.html")
        opciones.append("Ultimos 2 dias")
        urls.append("http://www.sonolatino.com/topvideos.html?do=recent")
        
        for match in matches:
            opciones.append(scrapertools.unescape(match[1]))
            urls.append(match[0])
            # Abre el di�logo de selecci�n
        
        dia = xbmcgui.Dialog()
        seleccion = dia.select("Elige Listar Top por :", opciones)
        logger.info("seleccion=%d" % seleccion) 
        if seleccion==-1:
            return
        
        url2 = urls[seleccion]
        toplist(params,url2,category)

コード例 #15

0

ファイルを表示

ファイル: descargacineclasico.py プロジェクト: Jmlaguna89/miNuevoRepo

def findvideos(item):
    logger.info("[pelisadicto.py] findvideos")

    itemlist = []

    data = scrapertools.cache_page(item.url)

    data = scrapertools.unescape(data)

    titulo = item.title
    titulo_tmdb = re.sub("([0-9+])", "", titulo.strip())

    oTmdb= Tmdb(texto_buscado=titulo_tmdb, idioma_busqueda="es")
    item.fanart=oTmdb.get_backdrop()

    # Descarga la pagina
#    data = scrapertools.cache_page(item.url)
    patron = '#div_\d_\D.+?<img id="([^"]+).*?<span>.*?</span>.*?<span>(.*?)</span>.*?imgdes.*?imgdes/([^\.]+).*?<a href=([^\s]+)'  #Añado calidad
    matches = re.compile(patron,re.DOTALL).findall(data)
    for scrapedidioma, scrapedcalidad, scrapedserver, scrapedurl in matches:

        title = titulo + "_" + scrapedidioma + "_"+ scrapedserver + "_" + scrapedcalidad
        itemlist.append( Item(channel=__channel__, action="play", title=title, fulltitle=title, url=scrapedurl, thumbnail=item.thumbnail, plot=item.plot, show=item.show, fanart=item.fanart) )

    return itemlist

コード例 #16

0

ファイルを表示

ファイル: shurweb.py プロジェクト: superberny70/plugin.video.pelisalacarta-3-9X

def novedades_series(item):
    logger.info("[shurweb.py] novedades_series")
    data = scrapertools.cachePage(item.url)
    data = scrapertools.unescape(data)
    data = scrapertools.get_match(
        data, '<div class="tab-pane fade in active" id="series">(.*?)<div class="tab-pane fade" id="pelis">'
    )
    patron = '<a class="video_thumb" href="([^"]+)" rel="bookmark" title="([^"]+)">.*?<img.*?src="([^"]+)"'
    matches = re.compile(patron, re.DOTALL).findall(data)
    if DEBUG:
        scrapertools.printMatches(matches)

    itemlist = []
    for url, title, thumbnail in matches:
        if DEBUG:
            logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]")
        itemlist.append(
            Item(
                channel=__channel__,
                action="findvideos",
                title=title,
                url=url,
                thumbnail=thumbnail,
                fanart="http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg",
            )
        )
    return itemlist

コード例 #17

0

ファイルを表示

ファイル: peliculasdk.py プロジェクト: leothecoldness/pelisalacarta

def play(item):
    logger.info("pelisalacarta.bricocine findvideos")

    itemlist = servertools.find_video_items(data=item.url)
    data = scrapertools.cache_page(item.url)

    listavideos = servertools.findvideos(data)

    for video in listavideos:
        videotitle = scrapertools.unescape(video[0])
        url = item.url
        server = video[2]

        # xbmctools.addnewvideo( __channel__ , "play" , category , server ,  , url , thumbnail , plot )
        itemlist.append(
            Item(
                channel=__channel__,
                action="play",
                server=server,
                title="Trailer - " + videotitle,
                url=url,
                thumbnail=item.thumbnail,
                plot=item.plot,
                fulltitle=item.title,
                fanart="http://s23.postimg.org/84vkeq863/movietrailers.jpg",
                folder=False,
            )
        )

    return itemlist

コード例 #18

0

ファイルを表示

ファイル: southparkita.py プロジェクト: neozeta/plugin.video.streamondemand

def mainlist(item):
    logger.info("[southparkita.py] mainlist")
    itemlist = []

    # Descarga la página
    data = scrapertools.cache_page(host)
    logger.info(data)

    itemlist.append(
        Item(channel=__channel__,
             action="mainlist",
             title="[COLOR green]Ricarica...[/COLOR]"))

    # Extrae las entradas (carpetas)
    patronvideos = '<li id="menu-item-\d{4}.*?\d{4}"><a href="([^"]+)">([^<]+)<\/a><\/li>'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)
    for match in matches:
        scrapedtitle = scrapertools.unescape(match.group(2))
        scrapedurl = match.group(1)
        if (DEBUG): logger.info(
            "title=[" + scrapedtitle + "], url=[" + scrapedurl + "]")

        # Añade al listado de XBMC
        itemlist.append(
            Item(channel=__channel__,
                 action="listepisodes",
                 fulltitle=scrapedtitle,
                 show=scrapedtitle,
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                 url=scrapedurl))

    return itemlist

コード例 #19

0

ファイルを表示

ファイル: cinevos.py プロジェクト: CarlosCondor/pelisalacarta-xbmc-plus

def findvideos(item):
    logger.info("[cinevos.py] findvideos")
    # Descarga la página
    data = scrapertools.cachePage(item.url)
    logger.info(data)
    # Busca si hay subtitulo
    patronvideos  = '<a href="(http://www.cinevos.com/sub/[^"]+)"'
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)
    sub = ""
    if len(matches):
        sub = matches[0]
        logger.info("con subtitulo :%s" %sub)
    # Busca la descripcion
    patronvideos  = '<p>(<div.*?</div>) </p>'
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)
    plot = ""
    if len(matches):
        plot = re.sub("<[^>]+>","",matches[0])
    # Busca los enlaces a los videos
    listavideos = servertools.findvideos(data)
    itemlist = []
    for video in listavideos:
        videotitle = scrapertools.unescape(video[0])
        #print videotitle
        url = video[1]
        server = video[2]
        if "Megaupload" in videotitle:
            videotitle = item.title + " - [Megaupload]"
        else:
            videotitle = item.title+ " - " +videotitle
        itemlist.append( Item(channel=CHANNELNAME, action="play", server=server, title=videotitle , url=url , thumbnail=item.thumbnail , plot=plot ,subtitle=sub, folder=False) )
    return itemlist

コード例 #20

0

ファイルを表示

ファイル: griffinita.py プロジェクト: Albrt2901/plugin.video.streamondemand

def mainlist(item):
    logger.info("[griffinita.py] mainlist")
    itemlist = []

    # Descarga la página
    data = scrapertools.cache_page(host)

    patronvideos = '<li><a href="([^"]+)"><span class="catTitle">([^<]+)<\/span>'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)
    for match in matches:
        scrapedtitle = scrapertools.unescape(match.group(2))
        scrapedurl = host + match.group(1)
        if DEBUG: logger.info(
            "title=[" + scrapedtitle + "], url=[" + scrapedurl + "]")

        # Añade al listado de XBMC
        itemlist.append(
            Item(channel=__channel__,
                 action="listepisodes",
                 fulltitle=scrapedtitle,
                 show=scrapedtitle,
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                 url=scrapedurl))

    return itemlist

コード例 #21

0

ファイルを表示

ファイル: futuramaita.py プロジェクト: orione7/plugin.video.streamondemand-pureita

def listepisodes(item):
    logger.info("[simpsonita.py] episodeslist")
    itemlist = []

    # Descarga la página
    data = scrapertools.cachePage(item.url)
    logger.info(data)

    # Extrae las entradas (carpetas)
    patronvideos = '<h3 class="catItemTitle">\s*<a href="([^"]+)">([^<]+)<\/a>'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)

    for match in matches:
        scrapedtitle = scrapertools.unescape(match.group(2)).strip()
        scrapedurl = sito + match.group(1)
        if (DEBUG): logger.info(
            "title=[" + scrapedtitle + "], url=[" + scrapedurl + "]")

        # Añade al listado de XBMC
        itemlist.append(
            Item(channel=__channel__,
                 action="play",
                 fulltitle=scrapedtitle,
                 show=scrapedtitle,
                 title="[COLOR azure]"+ scrapedtitle + "[/COLOR]",
                 url=scrapedurl))

    return itemlist

コード例 #22

0

ファイルを表示

ファイル: pornhub.py プロジェクト: divadrbcn/pelisalacarta-divadr

def peliculas(item):
    logger.info("[pornhub.py] peliculas")
    itemlist = []
       
    # Descarga la página
    data = scrapertools.downloadpage(item.url)
    data = scrapertools.unescape(data)
    data = scrapertools.find_single_match(data,'<ul class="nf-videos videos row-4-thumbs">(.*?)<div class="pre-footer">')
    # Extrae las peliculas
    patron = '<div class="phimage">.*?'
    patron += '<a href="/view_video.php\?viewkey=([^"]+)" title="([^"]+).*?'
    patron += '<var class="duration">([^<]+)</var>(.*?)</div>.*?'
    patron += 'data-smallthumb="([^"]+)"'
    
    matches = re.compile(patron,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)
    
    for viewkey,scrapedtitle,duration,scrapedhd,thumbnail in matches:       
        title=scrapedtitle
        scrapedhd = scrapertools.find_single_match(scrapedhd,'<span class="hd-thumbnail">(.*?)</span>')
        url= 'http://es.pornhub.com/embed/' + urllib.quote(viewkey, safe="%/:&?")
        thumbnail = urllib.quote(thumbnail, safe="%/:&?")        
        if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]")
        itemlist.append( Item(channel=__channel__, action="play", title=title , url=url , duration=duration, quality=scrapedhd,fanart=__fanart__, thumbnail=thumbnail) )
        
    # Paginador
    patron = '<li class="page_next"><a href="([^"]+)"'
    
    matches = re.compile(patron,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)
    
    if len(matches)>0:
        url=urlparse.urljoin("http://es.pornhub.com",matches[0])
        itemlist.append( Item(channel=__channel__, action="peliculas", title="Página siguiente >" ,fanart=__fanart__, url=url)) 
    return itemlist

コード例 #23

0

ファイルを表示

ファイル: hentaiid.py プロジェクト: neno1978/pelisalacarta

def series(item):
    logger.info()

    data = scrapertools.cache_page(item.url)

    patron = '<div class="post" id="post"[^<]+<center><h1 class="post-title entry-title"[^<]+<a href="([^"]+)">' \
             '(.*?)</a>[^<]+</h1></center>[^<]+<div[^<]+</div>[^<]+<div[^<]+<div.+?<img src="([^"]+)"'

    matches = re.compile(patron, re.DOTALL).findall(data)
    itemlist = []

    for scrapedurl, scrapedtitle, scrapedthumbnail in matches:
        title = scrapertools.unescape(scrapedtitle)
        fulltitle = title
        url = urlparse.urljoin(item.url, scrapedurl)
        thumbnail = urlparse.urljoin(item.url, scrapedthumbnail)
        show = title
        logger.debug("title=[{0}], url=[{1}], thumbnail=[{2}]".format(title, url, thumbnail))
        itemlist.append(Item(channel=item.channel, action="episodios", title=title, url=url, thumbnail=thumbnail,
                             show=show, fulltitle=fulltitle, fanart=thumbnail, folder=True))

    patron = '</span><a class="page larger" href="([^"]+)"'
    matches = re.compile(patron, re.DOTALL).findall(data)
    for match in matches:
        if len(matches) > 0:
            scrapedurl = match
            scrapedtitle = ">> Pagina Siguiente"

            itemlist.append(Item(channel=item.channel, action="series", title=scrapedtitle, url=scrapedurl,
                                 folder=True, viewmode="movies_with_plot"))

    return itemlist

コード例 #24

0

ファイルを表示

ファイル: pornhub.py プロジェクト: divadrbcn/pelisalacarta-divadr

def generos(item):
    logger.info("[pornhub.py] generos")
    itemlist = []
   
    # Descarga la página
    data = scrapertools.downloadpage(item.url)
    data = scrapertools.unescape(data)
    data = scrapertools.find_single_match(data,'<div id="categoriesStraightImages">(.*?)</ul>')
    # Extrae las categorias
    patron  = '<li class="cat_pic" data-category="\d+">.*?'
    patron += '<a href="([^"]+)">'
    patron += '<img src="([^"]+)" '
    patron += 'alt="([^"]+)"'
    matches = re.compile(patron,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)
    
    for scrapedurl,scrapedthumbnail,scrapedtitle in matches:
        title = scrapedtitle
        if "?" in scrapedurl:
          url = urlparse.urljoin(item.url,scrapedurl + "&o=cm")
        else:
          url = urlparse.urljoin(item.url,scrapedurl + "?o=cm")
          
        thumbnail = urllib.quote(scrapedthumbnail, safe="%/:&?")
        itemlist.append( Item(channel=__channel__, action="peliculas", title=title, url=url , fanart=__fanart__ , thumbnail=thumbnail) )
        itemlist.sort(key=lambda x: x.title)
    return itemlist

コード例 #25

0

ファイルを表示

ファイル: shurweb.py プロジェクト: superberny70/plugin.video.pelisalacarta-3-9X

def menuaz(item):
    logger.info("[shurweb.py] menuaz")
    data = scrapertools.cachePage(item.url)
    data = scrapertools.unescape(data)
    data = scrapertools.get_match(data, '<ul class="pagination pagination-lg">(.*?)</div>')
    patron = '<li><a href="(.*?)" rel="nofollow">(.*?)</a></li>'
    matches = re.compile(patron, re.DOTALL).findall(data)
    if DEBUG:
        scrapertools.printMatches(matches)
    itemlist = []
    for url, letra in matches:
        if "peliculas" in item.url or "documentales" in item.url:
            itemlist.append(
                Item(
                    channel=__channel__,
                    title=letra,
                    action="peliculas",
                    url=url,
                    fanart="http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg",
                )
            )
        else:
            itemlist.append(
                Item(
                    channel=__channel__,
                    title=letra,
                    action="series",
                    url=url,
                    fanart="http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg",
                )
            )
    return itemlist

コード例 #26

0

ファイルを表示

ファイル: shurweb.py プロジェクト: superberny70/plugin.video.pelisalacarta-3-9X

def novedades_peliculas(item):
    logger.info("[shurweb.py] novedades_peliculas")
    data = scrapertools.cachePage(item.url)
    data = scrapertools.unescape(data)
    data = scrapertools.get_match(
        data, '<div class="tab-pane fade" id="pelis">(.*?)<div class="tab-pane fade" id="docus"'
    )
    return peliculas(item, data=data)

コード例 #27

0

ファイルを表示

ファイル: cineblog01.py プロジェクト: dentaku65/plugin.video.italyalacarta

def listanime(item):
    logger.info("[cineblog01.py] mainlist")
    itemlist = []

    # Descarga la página
    data = scrapertools.cache_page(item.url)
    logger.info(data)

    # Extrae las entradas (carpetas)
    patronvideos = '<div class="span4"> <a.*?<img.*?src="(.*?)".*?'
    patronvideos += '<div class="span8">.*?<a href="(.*?)">.*?'
    patronvideos += '<h1>(.*?)</h1></a>.*?<br />(.*?)<br>.*?'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)

    for match in matches:
        scrapedthumbnail = match.group(1)
        scrapedurl = match.group(2)
        scrapedtitle = scrapertools.unescape(match.group(3))
        scrapedplot = scrapertools.unescape(match.group(4))
        if scrapedplot.startswith(""):
            scrapedplot = scrapedplot[149:]
        if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]")

        # Añade al listado de XBMC
        itemlist.append(
            Item(channel=__channel__,
                 action="findvid_anime",
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 plot=scrapedplot))

    # Put the next page mark
    try:
        next_page = scrapertools.get_match(data, "<link rel='next' href='([^']+)'")
        itemlist.append(
            Item(channel=__channel__,
                 action="listanime",
                 title="[COLOR orange]Successivo>>[/COLOR]",
                 url=next_page,
                 thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png"))
    except:
        pass

    return itemlist

コード例 #28

0

ファイルを表示

ファイル: southparkita.py プロジェクト: neozeta/plugin.video.streamondemand

def listepisodes(item):
    logger.info("[southparkita.py] episodeslist")
    logger.info(item.url)
    itemlist = []

    # Descarga la página
    data = scrapertools.cache_page(item.url)

    cicla = True
    cnt = 2
    while cicla:
        data = data + scrapertools.cache_page(item.url + 'page/' + str(cnt) + '/')
        logger.info(item.url + 'page/' + str(cnt) + '/')
        patronvideos = '<title>Pagina non trovata.*?<\/title>'
        matches = re.compile(patronvideos, re.DOTALL).finditer(data)
        cnt += 1
        logger.info(str(cnt))
        if matches: cicla = False

    logger.info(data)

    # Extrae las entradas (carpetas)
    patronvideos = '<h1 class="entry-title noMarginTop"><a href="([^"]+)".*?>([^<]+)<\/a><\/h1>'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)

    for match in matches:
        scrapedtitle = scrapertools.unescape(match.group(2)).strip()
        scrapedurl = match.group(1)
        if (DEBUG): logger.info(
            "title=[" + scrapedtitle + "], url=[" + scrapedurl + "]")

        # Añade al listado de XBMC
        itemlist.append(
            Item(channel=__channel__,
                 action="findvideos",
                 fulltitle=item.fulltitle,
                 show=item.show,
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                 url=scrapedurl))

    if config.get_library_support() and len(itemlist) != 0:
        itemlist.append(
            Item(channel=__channel__,
                 title=item.title,
                 url=item.url,
                 action="add_serie_to_library",
                 extra="listepisodes",
                 show=item.show))
        itemlist.append(
            Item(channel=item.channel,
                 title="Scarica tutti gli episodi della serie",
                 url=item.url,
                 action="download_all_episodes",
                 extra="listepisodes",
                 show=item.show))

    return itemlist

コード例 #29

0

ファイルを表示

ファイル: tvshack.py プロジェクト: jorik041/pelisalacarta-personal-fork

def devuelveListaEpisodios (params,url,category):
    """Scrapea la página de episodios y la devuelve como lista de diccionarios
    UPDATE 25-02-2011: Los últimos camibos de la web tvshack hacen este procedimiento innecesariamente complejo. Rediseñado
    --> Los habituales en los canales de pelisalacarta.
    <-- [{Episodio}]    Lista de diccionarios con los datos de cada episodio
            Al añadir los episodios como diccionarios nos permite añadir o borrar claves a voluntad
            dejando abierto el diseño a poder añadir información en los canales que
            ésta exista.
            Las clave básicas en el momento de escribir este canal son:
            'title' : Título del episodio - Para la biblioteca peferiblemente en formato
                                 NOMBRE_SERIE - TEMPORADAxEPISODIO TÍTULO_EPISODIO LOQUESEA
            'url'     : URL del episodio
            'plot'    : Resumen del episodio (o de la serie si éste no existe para este canal)
            'thumbnail' : Fotograma del episodio o carátula de la serie 
    """

    if params.has_key("Serie"):
        serie = params.get("Serie")
    else:
        serie = ""

    # Descarga la pï¿½gina
    data = scrapertools.cachePage(url)
    temporada = '0'
    # Extraemos los episodios por medio de expresiones regulares (patrón)
    # Ej. Serie:             <li class="listm"><a href="/tv/Family_Guy/season_1/episode_1/">ep1. Death Has a Shadow</a><a href=""><span>31/1/1999</span></a></li>
    # Ej. Anime: EN DESUSO   <li><a href="http://tvshack.bz/anime/07_Ghost/season_1/episode_5/">ep5. Episode 5</a><a href=""><span>??/??/????</span></a></li> 
    # Ej. Musica:EN DESUSO   <li><a href="http://tvshack.bz/music/Michael_Jackson/C85E8225E45E/">Black Or White<span>2,301 views</span></a></li><li><a 
    patronepisodios = '''(?x)                 #            Activa opción VERBOSE.
        <li\ class="listm"><a\ href="         #            Basura
        (?:http://tvshack\.bz)?([^"]+)"       #\g1 = Path (relativo) del episodio/video
        [^>]*>                                #            Basura
        ([0-9]+)                              #\g2 = Temporada
        x([0-9]+)                             #\g3 = Número de episodio
        \ ([^<]+)                             #\g4 = Nombre del episodio
        <\/a><\/li>                   #            Basura
    ''' 
    episodiosREO = re.compile(patronepisodios) ## Objeto de Expresión Regular (REO)

    listaEp = [] # Lista de Episodios
    Ep = {}            # Diccionario con la info de cada episodio

    # UPDATE 25-2-2011: La nueva web tampoco tiene infor de serie ni fechas de emisión
    Ep['thumbnail']= ""
    Ep['plot']= ""
    
    for match in episodiosREO.finditer (data):
        if category != 'Musica':
            title = match.expand (serie + ' - \g<2>x\g<3> - \g<4>') #con expand los grupos referenciados empiezan en 1
        else:
            title = match.expand ('\g<3> (visto \g<5> veces)') #con expand los grupos referenciaos empiezan en 1
        #URL del episodio
        Ep['title'] = scrapertools.unescape(title)
        Ep['url'] = TVSHACK_URL + match.group(1)
        listaEp.append(Ep.copy()) #Se añade el episodio a la lista (hay que copiarlo)

    return listaEp

コード例 #30

0

ファイルを表示

ファイル: peliculasid.py プロジェクト: gtfamily/gtfamily

def listvideos(item):
    logger.info("[peliculasid.py] listvideos")

    # Descarga la página
    data = scrapertools.cachePage(item.url)
    logger.info(data)

    # Extrae las entradas (carpetas)
    patronvideos  = 'id="filmga[^"]+" class="filmgal">(.*?<strong>Duraci[^<]+</strong>[^<]+</div>)'
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)
    itemlist = []
    for match in matches:
        # URL
        try:
            scrapedurl = re.compile(r'href="(.+?)"').findall(match)[0]
        except:
            continue
        # Titulo
        try:
            scrapedtitle = re.compile(r'<span class="titulotool">(.+?)</div>').findall(match.replace("\n",""))[0]
            scrapedtitle = re.sub("<[^>]+>","",scrapedtitle)
            try:
                scrapedtitle = scrapertools.unescape(scrapedtitle)
            except:pass
        except:
            scrapedtitle = "sin titulo"
        # Thumbnail
        try:
            scrapedthumbnail = re.compile(r'src="(.+?)"').findall(match)[0]
        except:
            scrapedthumbnail = ""
        # Argumento
        try:
            scrapedplot = re.compile(r'<div class="sinopsis">(.+?)</div>').findall(match)[0]
            scrapedplot = re.sub("<[^>]+>"," ",scrapedplot).strip()
        except:
            scrapedplot = ""

        # Añade al listado de XBMC
        itemlist.append( Item(channel=__channel__, action="play" , title=scrapedtitle.strip() , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, extra = scrapedplot , context= "4|5", viewmode="movie", folder=False  ))
        

    # Extrae la marca de siguiente página
    
    patronvideos  = "<span class='current'>[^<]+</span><a href='(.+?)' class='page larger'>"
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    if len(matches)>0:
        scrapedtitle = "Página siguiente"
        scrapedurl = urlparse.urljoin(item.url,matches[0])
        scrapedthumbnail = ""
        scrapedplot = ""
        itemlist.append( Item(channel=__channel__, action="listvideos" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot ))
        
    return itemlist

コード例 #31

0

ファイルを表示

ファイル: hdstreamingit.py プロジェクト: umbvitt/plugin.video.streamondemand

def peliculas(item):
    logger.info("streamondemand.hdstreamingit peliculas")
    itemlist = []

    # Descarga la pagina
    data = scrapertools.cache_page(item.url, timeout=5)

    # Extrae las entradas (carpetas)
    patronvideos = '<a class="link_image" href="[^"]+" title="Permalink to (.*?)">.*?'
    patronvideos += '<img src="([^"]+)" alt="">.*?'
    patronvideos += '<div class="button_yellow"><a(?: target="_blank")? href="([^"]+)"'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)

    for match in matches:
        scrapedurl = urlparse.urljoin(item.url, match.group(3))
        scrapedthumbnail = urlparse.urljoin(item.url, match.group(2))
        scrapedtitle = scrapertools.unescape(match.group(1))
        scrapedplot = ""
        if (DEBUG):
            logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl +
                        "], thumbnail=[" + scrapedthumbnail + "]")
        if 'adfoc.us' in scrapedurl:
            scrapedurl = importio_url + scrapedurl
        if 'adf.ly' in scrapedurl:
            scrapedurl = dec_fly + scrapedurl
        itemlist.append(
            infoSod(Item(
                channel=__channel__,
                action="episodios" if item.extra == "serie" else "findvideos",
                fulltitle=scrapedtitle,
                show=scrapedtitle,
                title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                url=scrapedurl,
                thumbnail=scrapedthumbnail,
                plot=scrapedplot,
                extra=item.extra,
                folder=True),
                    tipo='movie'))

    # Extrae el paginador
    patronvideos = "<link rel='next' href='([^']+)' />"
    matches = re.compile(patronvideos, re.DOTALL).findall(data)

    if len(matches) > 0:
        scrapedurl = urlparse.urljoin(item.url, matches[0])
        itemlist.append(
            Item(channel=__channel__,
                 action="HomePage",
                 title="[COLOR yellow]Torna Home[/COLOR]",
                 folder=True)),
        itemlist.append(
            Item(
                channel=__channel__,
                action="peliculas",
                extra=item.extra,
                title="[COLOR orange]Successivo >>[/COLOR]",
                url=scrapedurl,
                thumbnail=
                "http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png",
                folder=True))

    return itemlist

コード例 #32

0

ファイルを表示

ファイル: cineblog01.py プロジェクト: Zanzibar82/plugin.video.pelisalacarta_ui.pureita

def animestream(item):
    logger.info("[cineblog01.py] mainlist")
    itemlist = []

    if item.url == "":
        item.url = sito

    # Descarga la página
    data = scrapertools.cache_page(item.url)
    logger.info(data)

    # Extrae las entradas (carpetas)
    patronvideos = '<div class="span4"> <a.*?<p><img.*?src="(.*?)".*?'
    patronvideos += '<div class="span8">.*?<a href="(.*?)">.*?'
    patronvideos += '<h1>(.*?)</h1></a>.*?<br>-->(.*?)<br>.*?'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)

    for match in matches:
        scrapedtitle = scrapertools.unescape(match.group(3))
        scrapedurl = urlparse.urljoin(item.url, match.group(2))
        scrapedthumbnail = urlparse.urljoin(item.url, match.group(1))
        scrapedthumbnail = scrapedthumbnail.replace(" ", "%20")
        scrapedplot = scrapertools.unescape(match.group(4))
        scrapedplot = scrapertools.htmlclean(scrapedplot).strip()
        if (DEBUG): logger.info(
            "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]")
        itemlist.append(
            Item(channel=__channel__,
                 action="findvid_anime",
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR]",
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 plot=scrapedplot,
                 viewmode="movie_with_plot",
                 fanart=scrapedthumbnail))

    # Next page mark
    try:
        bloque = scrapertools.get_match(data, "<div id='wp_page_numbers'>(.*?)</div>")
        patronvideos = '<a href="([^"]+)">></a></li>'
        matches = re.compile(patronvideos, re.DOTALL).findall(bloque)
        scrapertools.printMatches(matches)

        if len(matches) > 0:
            scrapedtitle = "[COLOR orange]Successivo>>[/COLOR]"
            scrapedurl = matches[0]
            scrapedthumbnail = ""
            scrapedplot = ""
            if (DEBUG): logger.info(
                "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]")

            itemlist.append(
                Item(channel=__channel__,
                     action="animestream",
                     title=scrapedtitle,
                     url=scrapedurl,
                     thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png",
                     plot=scrapedplot))
    except:
        pass

    return itemlist

コード例 #33

0

ファイルを表示

ファイル: streaminghd.py プロジェクト: Zaldor/plugin.video.streamondemand-pureita

def peliculas_tmdb(item):
    logger.info("streamondemand-pureita streaminghd peliculas")
    itemlist = []

    # Descarga la pagina
    data = httptools.downloadpage(item.url).data

    # Extrae las entradas (carpetas)
    patron = '<img src="([^"]+)" alt="([^<]+)">\s*<div class="mepo"><span class="quality">\s*([^<]+).*?'
    patron += '</div><div class="rating"><span class="icon-star2"></span>\s*(.*?)</div>\s*'
    patron += '<a href="([^"]+)">'
    matches = re.compile(patron, re.DOTALL).finditer(data)

    for match in matches:

        scrapedurl = urlparse.urljoin(item.url, match.group(5))
        votes = scrapertools.unescape(match.group(4))
        quality = scrapertools.unescape(match.group(3))
        scrapedtitle = scrapertools.unescape(match.group(2))
        scrapedthumbnail = urlparse.urljoin(item.url, match.group(1))
        scrapedthumbnail = httptools.get_url_headers(scrapedthumbnail)

        if "SUB-ITA" in scrapedtitle or "sub-ita" in scrapedtitle:
            lang = " ([COLOR yellow]Sub-Ita[/COLOR])"
        else:
            lang = ""
        if votes:
            votes = " ([COLOR yellow]" + votes.strip() + "[/COLOR])"
        if quality:
            quality = " ([COLOR yellow]" + quality.lower().strip(
            ) + "[/COLOR])"

        scrapedtitle = scrapedtitle.replace("’", "'").replace(
            " &amp; ", " ").replace("[SUN-ITA]", "").strip()
        itemlist.append(
            infoSod(Item(channel=__channel__,
                         action="findvideos",
                         contentType="movie",
                         fulltitle=scrapedtitle,
                         show=scrapedtitle,
                         title=scrapedtitle + quality + votes,
                         url=scrapedurl,
                         thumbnail=scrapedthumbnail,
                         plot="",
                         folder=True),
                    tipo='movie'))

    # Extrae el paginador
    patronvideos = '<a href="([^"]+)"><span class="icon-chevron-right">'
    matches = re.compile(patronvideos, re.DOTALL).findall(data)

    if len(matches) > 0:
        scrapedurl = urlparse.urljoin(item.url, matches[0])
        itemlist.append(
            Item(
                channel=__channel__,
                action="peliculas",
                title="[COLOR orange]Successivi >>[/COLOR]",
                url=scrapedurl,
                thumbnail=
                "https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png",
                folder=True))
    return itemlist

コード例 #34

0

ファイルを表示

ファイル: sonolatino.py プロジェクト: marco1234567890/plugin.video.pelisalacarta

def listatipoVideo(params,url,category):
    logger.info("[sonolatino.py] listatipodocumental")

    # busca tipo de listado por : FECHA | VISTAS | RATING #
    url = listarpor(params,url,category) 
    if len(url)==0 :
       return  
    # Descarga la p�gina
    data = scrapertools.cachePage(url)
    
    # Extrae las entradas (carpetas)
            

    if url == "http://www.sonolatino.com/index.html":
        patronvideos = '<li class="item">[^<]+<a href="([^"]+)"><img src="([^"]+)" alt="([^"]+)" class="imag".*?/></a>'
        cat = "viendose"
    else:  
        patronvideos  = '<li class="video">[^<]+<div class="video_i">[^<]+<a href="([^"]+)"'
        patronvideos += '>[^<]+<img src="([^"]+)"  alt="([^"]+)".*?<span class="artist_name">([^<]+)</span>'
        cat = "tipo"
            
            
    
    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)
    #logger.info("matches = "+matches[0])
    scrapedplot = ""
    for match in matches:
        # Titulo
        scrapedtitle = scrapertools.unescape(match[2])

        # URL
        scrapedurl = match[0]
        
        # Thumbnail
        scrapedthumbnail = match[1]
        
        # procesa el resto
        if cat == "tipo":
           scrapedplot = scrapertools.unescape(match[3])
        else:
           for campo in re.findall("/(.*?)/",match[0]):
            scrapedplot = campo
        # Depuracion
        if (DEBUG):
            logger.info("scrapedtitle="+scrapedtitle)
            logger.info("scrapedurl="+scrapedurl)
            logger.info("scrapedthumbnail="+scrapedthumbnail)

        # A�ade al listado de XBMC
        xbmctools.addnewfolder( __channel__ , "detail" , category , scrapedtitle + " - " + scrapedplot , scrapedurl , scrapedthumbnail , scrapedplot )
 #  -------------------------------------------
 #     Busqueda de la siguiente pagina
    
    if cat == "tipo":
           patron_pagina_sgte = '</span><a href="([^"]+)"'
           paginasiguientes(patron_pagina_sgte,data,category,cat)

    # Label (top-right)...
    xbmcplugin.setPluginCategory( handle=int( sys.argv[ 1 ] ), category=category )

    # Disable sorting...
    xbmcplugin.addSortMethod( handle=int( sys.argv[ 1 ] ), sortMethod=xbmcplugin.SORT_METHOD_NONE )

    # End of directory...
    xbmcplugin.endOfDirectory( handle=int( sys.argv[ 1 ] ), succeeded=True )

コード例 #35

0

ファイルを表示

ファイル: animeflv.py プロジェクト: MoRgUiJu/morguiju.repo

def series(item):
    logger.info("pelisalacarta.channels.animeflv series")

    data = scrapertools.anti_cloudflare(item.url,
                                        headers=CHANNEL_DEFAULT_HEADERS,
                                        host=CHANNEL_HOST)
    '''
    <div class="aboxy_lista">
        <a href="/ova/nurarihyon-no-mago-ova.html" title="Nurarihyon no Mago OVA">
            <img class="lazy portada" src="/img/blank.gif"
                data-original="http://cdn.animeflv.net/img/portada/1026.jpg" alt="Nurarihyon no Mago OVA"/>
        </a>
        <span style="float: right; margin-top: 0px;" class="tipo_1"></span>
        <a href="/ova/nurarihyon-no-mago-ova.html" title="Nurarihyon no Mago OVA" class="titulo">
            Nurarihyon no Mago OVA
        </a>
        <div class="generos_links">
            <b>Generos:</b>
            <a href="/animes/genero/accion/">Acci&oacute;n</a>,
            <a href="/animes/genero/shonen/">Shonen</a>,
            <a href="/animes/genero/sobrenatural/">Sobrenatural</a>
        </div>
        <div class="sinopsis">
            La historia empieza en alrededor de 100 a&ntilde;os despu&eacute;s de la desaparici&oacute;n de
            Yamabuki Otome, la primera esposa Rihan Nura. Rihan por fin recobr&oacute; la compostura y la vida
            vuelve a la normalidad. A medida que la cabeza del Clan Nura, est&aacute; ocupado trabajando en la
            construcci&oacute;n de un mundo armonioso para los seres humanos y youkai. Un d&iacute;a, &eacute;l
            ve a Setsura molesta por lo que decide animarla tomando el clan para ir a disfrutar de las aguas
            termales &hellip;
        </div>
    </div>
    '''

    patron = '<div class="aboxy_lista"[^<]+'
    patron += '<a href="([^"]+)"[^<]+<img class="[^"]+" src="[^"]+" data-original="([^"]+)"[^<]+</a[^<]+'
    patron += '<span[^<]+</span[^<]+'
    patron += '<a[^>]+>([^<]+)</a.*?'
    patron += '<div class="sinopsis">(.*?)</div'
    matches = re.compile(patron, re.DOTALL).findall(data)
    itemlist = []

    for scrapedurl, scrapedthumbnail, scrapedtitle, scrapedplot in matches:
        title = scrapertools.unescape(scrapedtitle)
        fulltitle = title
        url = urlparse.urljoin(item.url, scrapedurl)
        thumbnail = urlparse.urljoin(item.url, scrapedthumbnail)
        plot = scrapertools.htmlclean(scrapedplot)
        show = title
        #if DEBUG:logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format(title, url, thumbnail))
        itemlist.append(
            Item(channel=item.channel,
                 action="episodios",
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 plot=plot,
                 show=show,
                 fulltitle=fulltitle,
                 fanart=thumbnail,
                 folder=True))

    patron = '<a href="([^"]+)">\&raquo\;</a>'
    matches = re.compile(patron, re.DOTALL).findall(data)
    for match in matches:
        if len(matches) > 0:
            scrapedurl = urlparse.urljoin(item.url, match)
            scrapedtitle = ">> Pagina Siguiente"
            scrapedthumbnail = ""
            scrapedplot = ""

            itemlist.append(
                Item(channel=item.channel,
                     action="series",
                     title=scrapedtitle,
                     url=scrapedurl,
                     thumbnail=scrapedthumbnail,
                     plot=scrapedplot,
                     folder=True,
                     viewmode="movies_with_plot"))

    return itemlist

コード例 #36

0

ファイルを表示

def findvideos(item):
    logger.info()

    itemlist = list()

    data = httptools.downloadpage(item.url).data
    data = scrapertools.unescape(data)
    soup = BeautifulSoup(data, "html5lib", from_encoding="utf-8")
    lang = soup.find_all("b")
    lang_list = get_langs(lang)
    lang_count = 0

    for tab_soup in soup.find_all("div", class_="contenedor_tab"):

        lang = lang_list[lang_count]

        for elem in tab_soup.find_all("iframe"):

            title = ""

            enc_url = scrapertools.find_single_match(elem["data-data"],
                                                     '([^\+]+)\+(.+)?')
            s = base64.b64decode(enc_url[0]).decode('utf-8')
            i = enc_url[1]
            hidden_url = "https://encriptando.com" + s + i
            hidden_data = httptools.downloadpage(hidden_url,
                                                 follow_redirects=False,
                                                 headers={
                                                     'Referer': host
                                                 }).data
            var, val = scrapertools.find_single_match(
                hidden_data.replace("'", '"'), 'var (k|s)="([^"]+)";')
            url = decrypt(var, val)
            if var == "k":
                url += "|%s" % item.url

            if not config.get_setting('unify'):
                title = ' [%s]' % lang

            itemlist.append(
                Item(channel=item.channel,
                     title='%s' + title,
                     url=url,
                     action='play',
                     language=lang,
                     infoLabels=item.infoLabels))

        lang_count += 1
    itemlist = servertools.get_servers_itemlist(
        itemlist, lambda x: x.title % x.server.capitalize())

    # Requerido para FilterTools

    itemlist = filtertools.get_links(itemlist, item, list_language)

    # Requerido para AutoPlay

    autoplay.start(itemlist, item)

    if config.get_videolibrary_support(
    ) and len(itemlist) > 0 and item.extra != 'findvideos':
        itemlist.append(
            Item(channel=item.channel,
                 title=
                 '[COLOR yellow]Añadir esta pelicula a la videoteca[/COLOR]',
                 url=item.url,
                 action="add_pelicula_to_library",
                 extra="findvideos",
                 contentTitle=item.contentTitle))

    return itemlist

コード例 #37

0

ファイルを表示

ファイル: altadefinizione01_zone.py プロジェクト: Tindaas/plugin.video.streamondemand-pureita

def peliculas_list(item):
    logger.info(
        "[streamondemand-pureita altadefinizione01_zone] peliculas_list")
    itemlist = []

    # Descarga la pagina
    data = httptools.downloadpage(item.url, headers=headers).data

    # Extrae las entradas (carpetas)
    patron = '<img\s*[^>]+src="([^"]+)[^>]+>\s*</a>\s*</td>\s*[^>]+>'
    patron += '<h2>\s*<a href="([^"]+)"\s*title=".*?">([^<]+)</a>\s*</h2></td>.*?'
    patron += '<td class="mlnh-3">(.*?)</td>.*?<td class="mlnh-4">(.*?)</td>'
    matches = re.compile(patron, re.DOTALL).finditer(data)

    for match in matches:
        scrapedplot = ""
        quality = scrapertools.unescape(match.group(5))
        year = scrapertools.unescape(match.group(4))
        scrapedtitle = scrapertools.unescape(match.group(3))
        scrapedurl = scrapertools.unescape(match.group(2))
        scrapedthumbnail = urlparse.urljoin(item.url, match.group(1))

        if year:
            scrapetitle = scrapedtitle.strip() + " (" + year + ")"
        else:
            scrapetitle = scrapedtitle
        if quality:
            quality = " ([COLOR yellow]" + quality + "[/COLOR])"
        if year:
            year = " ([COLOR yellow]" + year + "[/COLOR])"

        itemlist.append(
            infoSod(Item(channel=__channel__,
                         action="findvideos",
                         contentType="movie",
                         fulltitle=scrapetitle,
                         show=scrapetitle,
                         title="[COLOR azure]" + scrapedtitle + "[/COLOR]" +
                         year + quality,
                         url=scrapedurl,
                         thumbnail=scrapedthumbnail,
                         plot=scrapedplot,
                         folder=True),
                    tipo='movie'))

    # Extrae el paginador
    patronvideos = 'href="([^"]+)">&raquo;</a></i>'
    matches = re.compile(patronvideos, re.DOTALL).findall(data)

    if len(matches) > 0:
        scrapedurl = urlparse.urljoin(item.url, matches[0])

        itemlist.append(
            Item(
                channel=__channel__,
                action="peliculas_list",
                title="[COLOR orange]Successivi >>[/COLOR]",
                url=scrapedurl,
                thumbnail=
                "https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png",
                folder=True))

    return itemlist

コード例 #38

0

ファイルを表示

ファイル: canalpelis.py プロジェクト: tamamma/addon

def episodios(item):
    logger.info()
    itemlist = []

    data = httptools.downloadpage(item.url).data
    datas = re.sub(r"\n|\r|\t|&nbsp;|<br>", "", data)
    # logger.info(datas)
    patron = '<div class="imagen"><a href="([^"]+)">.*?'  # url cap, img
    patron += '<div class="numerando">(.*?)</div>.*?'  # numerando cap
    patron += '<a href="[^"]+">([^<]+)</a>'  # title de episodios

    matches = scrapertools.find_multiple_matches(datas, patron)

    for scrapedurl, scrapedtitle, scrapedname in matches:
        scrapedtitle = scrapedtitle.replace('--', '0')
        patron = '(\d+) - (\d+)'
        match = re.compile(patron, re.DOTALL).findall(scrapedtitle)
        season, episode = match[0]

        if 'season' in item.infoLabels and int(
                item.infoLabels['season']) != int(season):
            continue

        title = "%sx%s: %s" % (season, episode.zfill(2),
                               scrapertools.unescape(scrapedname))
        new_item = item.clone(title=title,
                              url=scrapedurl,
                              action="findvideos",
                              text_color=color3,
                              fulltitle=title,
                              contentType="episode")
        if 'infoLabels' not in new_item:
            new_item.infoLabels = {}

        new_item.infoLabels['season'] = season
        new_item.infoLabels['episode'] = episode.zfill(2)

        itemlist.append(new_item)

    # TODO no hacer esto si estamos añadiendo a la videoteca
    if not item.extra:
        # Obtenemos los datos de todos los capitulos de la temporada mediante multihilos
        tmdb.set_infoLabels(itemlist, __modo_grafico__)
        for i in itemlist:
            if i.infoLabels['title']:
                # Si el capitulo tiene nombre propio añadirselo al titulo del item
                i.title = "%sx%s %s" % (i.infoLabels['season'],
                                        i.infoLabels['episode'],
                                        i.infoLabels['title'])
            if i.infoLabels.has_key('poster_path'):
                # Si el capitulo tiene imagen propia remplazar al poster
                i.thumbnail = i.infoLabels['poster_path']

    itemlist.sort(key=lambda it: int(it.infoLabels['episode']),
                  reverse=config.get_setting('orden_episodios', __channel__))
    tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__)
    # Opción "Añadir esta serie a la videoteca"
    if config.get_videolibrary_support() and len(itemlist) > 0:
        itemlist.append(
            Item(channel=__channel__,
                 title="Añadir esta serie a la videoteca",
                 url=item.url,
                 action="add_serie_to_library",
                 extra="episodios",
                 show=item.show,
                 category="Series",
                 text_color=color1,
                 thumbnail=thumbnail_host,
                 fanart=fanart_host))

    return itemlist

コード例 #39

0

ファイルを表示

ファイル: cineblog01.py プロジェクト: kodirepositoryluxy/KM17_15.01.18

def findvid_film(item):
    def load_links(itemlist, re_txt, color, desc_txt):
        streaming = scrapertools.find_single_match(data, re_txt)
        patron = '<td><a[^h]href="([^"]+)"[^>]+>([^<]+)<'
        matches = re.compile(patron, re.DOTALL).findall(streaming)
        for scrapedurl, scrapedtitle in matches:
            logger.debug("##### findvideos %s ## %s ## %s ##" %
                         (desc_txt, scrapedurl, scrapedtitle))
            title = "[COLOR " + color + "]" + desc_txt + ":[/COLOR] " + item.title + " [COLOR grey]" + QualityStr + "[/COLOR] [COLOR blue][" + scrapedtitle + "][/COLOR]"
            itemlist.append(
                Item(channel=__channel__,
                     action="play",
                     title=title,
                     url=scrapedurl,
                     fulltitle=item.fulltitle,
                     thumbnail=item.thumbnail,
                     show=item.show,
                     folder=False))

    logger.info("[cineblog01.py] findvid_film")

    itemlist = []

    # Carica la pagina
    data = httptools.downloadpage(item.url, headers=headers).data
    data = scrapertools.decodeHtmlentities(data)

    # Extract the quality format
    patronvideos = '>([^<]+)</strong></div>'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)
    QualityStr = ""
    for match in matches:
        QualityStr = scrapertools.unescape(match.group(1))[6:]

    # STREAMANGO
    # matches = []
    # u = scrapertools.find_single_match(data, '(?://|\.)streamango\.com/(?:f/|embed/)?[0-9a-zA-Z]+')
    # if u: matches.append((u, 'Streamango'))

    # Estrae i contenuti - Streaming
    load_links(itemlist, '<strong>Streaming:</strong>(.*?)<table height="30">',
               "orange", "Streaming")

    # Estrae i contenuti - Streaming HD
    load_links(itemlist,
               '<strong>Streaming HD[^<]+</strong>(.*?)<table height="30">',
               "yellow", "Streaming HD")

    # Estrae i contenuti - Streaming 3D
    load_links(itemlist,
               '<strong>Streaming 3D[^<]+</strong>(.*?)<table height="30">',
               "pink", "Streaming 3D")

    # Estrae i contenuti - Download
    load_links(itemlist, '<strong>Download:</strong>(.*?)<table height="30">',
               "aqua", "Download")

    # Estrae i contenuti - Download HD
    load_links(
        itemlist,
        '<strong>Download HD[^<]+</strong>(.*?)<table width="100%" height="20">',
        "azure", "Download HD")

    if len(itemlist) == 0:
        itemlist = servertools.find_video_items(item=item)

    return itemlist

コード例 #40

0

ファイルを表示

ファイル: pornoactricesx.py プロジェクト: divadrbcn/pelisalacarta-divadr

def videos(item):
    logger.info("[pornoactricesx.py] videos")
    itemlist = []
    mas = True
    data = ""
    url = item.url
    while len(itemlist) < 25 and mas == True:
        data = scrapertools.cachePage(url)
        data = scrapertools.unescape(data)
        patron = '<div class="field field-name-title field-type-ds field-label-hidden view-mode-teaser"><div class="field-items"><div class="field-item even"><h1><a href="([^"]+)">([^"]+)</a></h1></div></div></div>  </div>'
        patron += '[^<]{4}<div class="group-left">[^<]{5}<div class="field field-name-field-imagen-del-video field-type-image field-label-hidden view-mode-teaser"><div class="field-items">'
        patron += '<figure class="clearfix field-item even"><a href="([^"]+)"><img class="image-style-medium" src="([^"]+)"'
        matches = re.compile(patron, re.DOTALL).findall(data)
        for url, title, url2, thumbnail in matches:

            scrapedtitle = title.replace(" Vídeo porno completo.", "")
            scrapedurl = urlparse.urljoin("http://www.pornoactricesx.com", url)
            scrapedthumbnail = thumbnail
            scrapedplot = ""
            # Depuracion
            if (DEBUG):
                logger.info("title=[" + scrapedtitle + "], url=[" +
                            scrapedurl + "], thumbnail=[" + scrapedthumbnail +
                            "]")
            itemlist.append(
                Item(channel=__channel__,
                     action='play',
                     title=scrapedtitle,
                     url=scrapedurl,
                     thumbnail=scrapedthumbnail,
                     plot=scrapedplot))

        #Patron 2 para busquedas
        patron = '<div class="field field-name-title field-type-ds field-label-hidden view-mode-search_result">'
        patron += '<div class="field-items"><div class="field-item even"><h1><a href="([^"]+)">([^"]+)</a></h1></div></div></div>  </div>'
        patron += '[^<]{4}<div class="group-left">[^<]{5}<div class="field field-name-field-imagen-del-video field-type-image field-label-hidden view-mode-search_result"><div class="field-items"><figure class="clearfix field-item even"><a href="([^"]+)"><img class="image-style-medium" src="([^"]+)" width='
        matches = re.compile(patron, re.DOTALL).findall(data)
        for url, title, url2, thumbnail in matches:

            scrapedtitle = title.replace(" Vídeo porno completo.", "")
            scrapedurl = urlparse.urljoin("http://www.pornoactricesx.com", url)
            scrapedthumbnail = thumbnail
            scrapedplot = ""
            # Depuracion
            if (DEBUG):
                logger.info("title=[" + scrapedtitle + "], url=[" +
                            scrapedurl + "], thumbnail=[" + scrapedthumbnail +
                            "]")
            itemlist.append(
                Item(channel=__channel__,
                     action='play',
                     title=scrapedtitle,
                     url=scrapedurl,
                     thumbnail=scrapedthumbnail,
                     plot=scrapedplot))
        patron = '<a title="Ir a la página siguiente" href="([^<]+)">siguiente ›</a>'
        matches = re.compile(patron, re.DOTALL).findall(data)
        if len(matches) > 0:
            url = "http://www.pornoactricesx.com" + matches[0]
            mas = True
        else:
            mas = False

    #Paginador
    patron = '<a title="Ir a la página siguiente" href="([^<]+)">siguiente ›</a>'
    matches = re.compile(patron, re.DOTALL).findall(data)
    if len(matches) > 0:
        scrapedurl = "http://www.pornoactricesx.com" + matches[0]
        itemlist.append(
            Item(channel=__channel__,
                 action="videos",
                 title="Página Siguiente",
                 url=scrapedurl,
                 thumbnail="",
                 folder=True))

    return itemlist

コード例 #41

0

ファイルを表示

ファイル: estrenosdoramas.py プロジェクト: gacj22/WizardGacj22

def findvideos(item):
    logger.info()

    itemlist = []
    data = get_source(item.url)
    data = scrapertools.find_single_match(
        data, '<div id="marco-post">.*?<div id="sidebar">')
    data = scrapertools.unescape(data)
    data = scrapertools.decodeHtmlentities(data)

    options_regex = '<a href="#tab.*?">.*?<b>(.*?)</b>'
    option_matches = re.compile(options_regex, re.DOTALL).findall(data)

    video_regex = '<iframe.*?src="(.*?)".*?</iframe>'
    video_matches = re.compile(video_regex, re.DOTALL).findall(data)

    # for option, scrapedurl in matches:
    for option, scrapedurl in map(None, option_matches, video_matches):
        if scrapedurl is None:
            continue

        scrapedurl = scrapedurl.replace('"', '').replace('&#038;', '&')

        try:
            data_video = get_source(scrapedurl)
        except Exception as e:
            logger.info('Error en url: ' + scrapedurl)
            continue

        # logger.info(data_video)

        # Este sitio pone multiples páginas intermedias, cada una con sus reglas.
        source_headers = dict()
        source_headers[
            "Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8"
        source_headers["X-Requested-With"] = "XMLHttpRequest"
        if scrapedurl.find("https://repro") != 0:
            logger.info("Caso 0: url externa")
            url = scrapedurl
            itemlist.append(
                Item(channel=item.channel,
                     title=option,
                     url=url,
                     action='play',
                     language=IDIOMA))
        elif scrapedurl.find("pi76823.php") > 0:
            logger.info("Caso 1")
            source_data = get_source(scrapedurl)
            source_regex = 'post\( "(.*?)", { acc: "(.*?)", id: \'(.*?)\', tk: \'(.*?)\' }'
            source_matches = re.compile(source_regex,
                                        re.DOTALL).findall(source_data)
            for source_page, source_acc, source_id, source_tk in source_matches:
                source_url = scrapedurl[0:scrapedurl.find("pi76823.php"
                                                          )] + source_page
                source_result = httptools.downloadpage(
                    source_url,
                    post='acc=' + source_acc + '&id=' + source_id + '&tk=' +
                    source_tk,
                    headers=source_headers)
                if source_result.code == 200:
                    source_json = jsontools.load(source_result.data)
                    itemlist.append(
                        Item(channel=item.channel,
                             title=option,
                             url=source_json['urlremoto'],
                             action='play',
                             language=IDIOMA))
        elif scrapedurl.find("pi7.php") > 0:
            logger.info("Caso 2")
            source_data = get_source(scrapedurl)
            source_regex = 'post\( "(.*?)", { acc: "(.*?)", id: \'(.*?)\', tk: \'(.*?)\' }'
            source_matches = re.compile(source_regex,
                                        re.DOTALL).findall(source_data)
            for source_page, source_acc, source_id, source_tk in source_matches:
                source_url = scrapedurl[0:scrapedurl.find("pi7.php"
                                                          )] + source_page
                source_result = httptools.downloadpage(
                    source_url,
                    post='acc=' + source_acc + '&id=' + source_id + '&tk=' +
                    source_tk,
                    headers=source_headers)
                if source_result.code == 200:
                    source_json = jsontools.load(source_result.data)
                    itemlist.append(
                        Item(channel=item.channel,
                             title=option,
                             url=source_json['urlremoto'],
                             action='play',
                             language=IDIOMA))
        elif scrapedurl.find("reproducir120.php") > 0:
            logger.info("Caso 3")
            source_data = get_source(scrapedurl)

            videoidn = scrapertools.find_single_match(
                source_data, 'var videoidn = \'(.*?)\';')
            tokensn = scrapertools.find_single_match(
                source_data, 'var tokensn = \'(.*?)\';')

            source_regex = 'post\( "(.*?)", { acc: "(.*?)"'
            source_matches = re.compile(source_regex,
                                        re.DOTALL).findall(source_data)
            for source_page, source_acc in source_matches:
                source_url = scrapedurl[0:scrapedurl.find("reproducir120.php"
                                                          )] + source_page
                source_result = httptools.downloadpage(
                    source_url,
                    post='acc=' + source_acc + '&id=' + videoidn + '&tk=' +
                    tokensn,
                    headers=source_headers)
                if source_result.code == 200:
                    source_json = jsontools.load(source_result.data)
                    urlremoto_regex = "file:'(.*?)'"
                    urlremoto_matches = re.compile(
                        urlremoto_regex,
                        re.DOTALL).findall(source_json['urlremoto'])
                    if len(urlremoto_matches) == 1:
                        itemlist.append(
                            Item(channel=item.channel,
                                 title=option,
                                 url=urlremoto_matches[0],
                                 action='play',
                                 language=IDIOMA))
        elif scrapedurl.find("reproducir14.php") > 0:
            logger.info("Caso 4")
            source_data = get_source(scrapedurl)

            source_regex = '<div id="player-contenido" vid="(.*?)" name="(.*?)"'
            source_matches = re.compile(source_regex,
                                        re.DOTALL).findall(source_data)
            videoidn = source_matches[0][0]
            tokensn = source_matches[0][1]

            source_regex = 'post\( "(.*?)", { acc: "(.*?)"'
            source_matches = re.compile(source_regex,
                                        re.DOTALL).findall(source_data)
            for source_page, source_acc in source_matches:
                source_url = scrapedurl[0:scrapedurl.find("reproducir14.php"
                                                          )] + source_page
                source_result = httptools.downloadpage(
                    source_url,
                    post='acc=' + source_acc + '&id=' + videoidn + '&tk=' +
                    tokensn,
                    headers=source_headers)
                if source_result.code == 200:
                    source_json = jsontools.load(source_result.data)
                    itemlist.append(
                        Item(channel=item.channel,
                             title=option,
                             url=source_json['urlremoto'],
                             action='play',
                             language=IDIOMA))
        else:
            logger.info("Caso nuevo")

    itemlist = servertools.get_servers_itemlist(itemlist)

    # Requerido para FilterTools
    itemlist = filtertools.get_links(itemlist, item, list_language)

    # Requerido para AutoPlay
    autoplay.start(itemlist, item)

    if config.get_videolibrary_support(
    ) and len(itemlist) > 0 and item.extra != 'findvideos':
        itemlist.append(
            Item(channel=item.channel,
                 title=
                 '[COLOR yellow]Añadir esta pelicula a la videoteca[/COLOR]',
                 url=item.url,
                 action="add_pelicula_to_library",
                 extra="findvideos",
                 contentTitle=item.contentTitle))

    return itemlist

コード例 #42

0

ファイルを表示

def peliculas(item):
    logger.info("streamondemand-pureita.cineblogrun peliculas")
    itemlist = []

    # Descarga la pagina
    data = httptools.downloadpage(item.url, headers=headers).data

    # Extrae las entradas (carpetas)
    patron = r'<a href="([^"]+)">\s*<div class="Image">\s*<figure clas[^>]+><img[^>]+src="([^"]+)"\s*'
    patron += r'class[^>]+><\/figure>\s*<\/div>\s*<h3 class="Title">(.*?)<\/h3>.*?'
    patron += r'<span[^>]+>([^<]+)</span><span class="Qlty">([^<]+)</span>.*?'
    patron += r'.*?<p>(.*?)</p>'
    matches = re.compile(patron, re.DOTALL).finditer(data)

    for match in matches:
        scrapedplot = scrapertools.unescape(match.group(6))
        quality = scrapertools.unescape(match.group(5))
        year = scrapertools.unescape(match.group(4))
        scrapedtitle = scrapertools.unescape(match.group(3))
        scrapedthumbnail = scrapertools.unescape(match.group(2))
        scrapedurl = urlparse.urljoin(item.url, match.group(1))
        scrapedtitle = scrapedtitle.replace("&", "e")
        if "." in year or "h" in year:
            year = ""
        else:
            year = " ([COLOR yellow]" + year + "[/COLOR])"
        if "1080" in quality or "720" in quality:
            quality = " ([COLOR yellow]HD[/COLOR])"
        else:
            if "Unknown" in quality:
                quality = " ([COLOR yellow]NA[/COLOR])"
            else:
                quality = " ([COLOR yellow]LQ[/COLOR])"
        itemlist.append(
            infoSod(Item(channel=__channel__,
                         action="findvideos",
                         contentType="movie",
                         fulltitle=scrapedtitle,
                         show=scrapedtitle,
                         title="[COLOR azure]" + scrapedtitle + "[/COLOR]" +
                         year + quality,
                         url=scrapedurl,
                         thumbnail=scrapedthumbnail,
                         plot=scrapedplot,
                         folder=True),
                    tipo='movie'))

    patronvideos = '<a class="next page-numbers" href="([^"]+)">'
    matches = re.compile(patronvideos, re.DOTALL).findall(data)

    if len(matches) > 0:
        scrapedurl = urlparse.urljoin(item.url, matches[0])
        itemlist.append(
            Item(
                channel=__channel__,
                action="peliculas",
                title="[COLOR orange]Successivi >>[/COLOR]",
                url=scrapedurl,
                thumbnail=
                "https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png",
                folder=True))

    return itemlist

コード例 #43

0

ファイルを表示

ファイル: hdmario.py プロジェクト: whiplash78/addon

def registerOrLogin(page_url):
    if config.get_setting('username', server='hdmario') and config.get_setting(
            'password', server='hdmario'):
        if login():
            return True

    if platformtools.dialog_yesno(
            'HDmario',
            'Questo server necessita di un account, ne hai già uno oppure vuoi tentare una registrazione automatica?',
            yeslabel='Accedi',
            nolabel='Tenta registrazione'):
        from specials import setting
        from core.item import Item
        user_pre = config.get_setting('username', server='hdmario')
        password_pre = config.get_setting('password', server='hdmario')
        setting.server_config(Item(config='hdmario'))
        user_post = config.get_setting('username', server='hdmario')
        password_post = config.get_setting('password', server='hdmario')

        if user_pre != user_post or password_pre != password_post:
            return registerOrLogin(page_url)
        else:
            return False
    else:
        import random
        import string
        logger.debug('Registrazione automatica in corso')
        mailbox = Gmailnator()
        randPsw = ''.join(
            random.choice(string.ascii_letters + string.digits)
            for i in range(10))
        captcha = httptools.downloadpage(baseUrl + '/captchaInfo').json
        logger.debug('email: ' + mailbox.address)
        logger.debug('pass: '******'/register/',
                                            email=True,
                                            password=True,
                                            email_default=mailbox.address,
                                            password_default=randPsw,
                                            captcha_img=captcha['captchaUrl'])
        if not reg:
            return False
        regPost = httptools.downloadpage(baseUrl + '/register/',
                                         post={
                                             'email':
                                             reg['email'],
                                             'email_confirmation':
                                             reg['email'],
                                             'password':
                                             reg['password'],
                                             'password_confirmation':
                                             reg['password'],
                                             'captchaUuid':
                                             captcha['captchaUuid'],
                                             'captcha':
                                             reg['captcha']
                                         })
        if '/register' in regPost.url:
            error = scrapertools.htmlclean(
                scrapertools.find_single_match(
                    regPost.data, 'Impossibile proseguire.*?</div>'))
            error = scrapertools.unescape(
                scrapertools.re.sub('\n\s+', ' ', error))
            platformtools.dialog_ok('HDmario', error)
            return False
        if reg['email'] == mailbox.address:
            mail = mailbox.waitForMail()
            if mail:
                checkUrl = scrapertools.find_single_match(
                    mail.body, 'href="([^"]+)">Premi qui').replace(r'\/', '/')
                logger.debug('CheckURL: ' + checkUrl)
                httptools.downloadpage(checkUrl)
                config.set_setting('username',
                                   mailbox.address,
                                   server='hdmario')
                config.set_setting('password', randPsw, server='hdmario')
                platformtools.dialog_ok(
                    'HDmario',
                    'Registrato automaticamente con queste credenziali:\nemail:'
                    + mailbox.address + '\npass: '******'HDmario', 'Impossibile registrarsi automaticamente')
                return False
        else:
            platformtools.dialog_ok(
                'HDmario',
                'Hai modificato la mail quindi KoD non sarà in grado di effettuare la verifica in autonomia, apri la casella '
                + reg['email'] + ' e clicca sul link. Premi ok quando fatto')
        logger.debug('Registrazione completata')

    return True

コード例 #44

0

ファイルを表示

def listado(item):
    logger.info("pelisalacarta.channels.pelispedia listado")
    itemlist = []

    action = "findvideos"
    if item.extra == 'serie':
        action = "episodios"

    data = anti_cloudflare(item.url)
    data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;|<Br>|<BR>|<br>|<br/>|<br />|-\s", "",
                  data)
    # logger.info("data -- {}".format(data))

    patron = '<li[^>]+><a href="([^"]+)" alt="([^<]+).*?<img src="([^"]+).*?>.*?<span>\(([^)]+).*?' \
             '<p class="font12">(.*?)</p>'
    matches = re.compile(patron, re.DOTALL).findall(data)

    for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedyear, scrapedplot in matches:
        title = "{title} ({year})".format(title=scrapertools.unescape(
            scrapedtitle.strip()),
                                          year=scrapedyear)
        plot = scrapertools.entityunescape(scrapedplot)
        itemlist.append(
            Item(channel=__channel__,
                 title=title,
                 url=urlparse.urljoin(HOST, scrapedurl),
                 action=action,
                 thumbnail=scrapedthumbnail,
                 plot=plot,
                 context="",
                 show=scrapertools.unescape(scrapedtitle.strip()),
                 extra=item.extra))

    # no se muestra ordenado porque la paginación de la página no se hace correctamente
    # itemlist.sort(key=lambda item: item.title)

    # numero de registros que se muestran por página, es fijo por cada paginación
    if len(matches) == 48:

        file_php = "more"
        tipo_serie = ""

        if item.extra == "movies":
            anio = scrapertools.find_single_match(item.url, "(?:year=)(\w+)")
            letra = scrapertools.find_single_match(item.url, "(?:letra=)(\w+)")
            genero = scrapertools.find_single_match(item.url,
                                                    "(?:gender=|genre=)(\w+)")
            params = "letra={letra}&year={year}&genre={genero}".format(
                letra=letra, year=anio, genero=genero)

        else:
            tipo2 = scrapertools.find_single_match(item.url,
                                                   "(?:series/|tipo2=)(\w+)")
            tipo_serie = "&tipo=serie"

            if tipo2 != "all":
                file_php = "letra"
                tipo_serie += "&tipo2=" + tipo2

            genero = ""
            if tipo2 == "anio":
                genero = scrapertools.find_single_match(
                    item.url, "(?:anio/|genre=)(\w+)")
            if tipo2 == "genero":
                genero = scrapertools.find_single_match(
                    item.url, "(?:genero/|genre=)(\w+)")
            if tipo2 == "letra":
                genero = scrapertools.find_single_match(
                    item.url, "(?:letra/|genre=)(\w+)")

            params = "genre={genero}".format(genero=genero)

        url = "http://www.pelispedia.tv/api/{file}.php?rangeStart=48&rangeEnd=48{tipo_serie}&{params}".\
            format(file=file_php, tipo_serie=tipo_serie, params=params)

        if "rangeStart" in item.url:
            ant_inicio = scrapertools.find_single_match(
                item.url, "rangeStart=(\d+)&")
            inicio = str(int(ant_inicio) + 48)
            url = item.url.replace("rangeStart=" + ant_inicio,
                                   "rangeStart=" + inicio)

        itemlist.append(
            Item(channel=__channel__,
                 action="listado",
                 title=">> Página siguiente",
                 extra=item.extra,
                 url=url))

    return itemlist

コード例 #45

0

ファイルを表示

ファイル: altadefinizione01_zone.py プロジェクト: Tindaas/plugin.video.streamondemand-pureita

def peliculas(item):
    logger.info("[streamondemand-pureita altadefinizione01_zone] peliculas")
    itemlist = []

    # Descarga la pagina
    data = httptools.downloadpage(item.url, headers=headers).data

    # Extrae las entradas (carpetas)
    patron = '<h2>\s*<a href="([^"]+)">([^"]+)<\/a>\s*<\/h2>\s*[^>]+>[^>]+.*?\s*'
    patron += '</div>\s*<a href[^>]+>[^>]+src="([^"]+)"[^>]+>\s*</a>\s*'
    patron += '<div class="trdublaj">\s*(.*?)</div>\s*[^>]+>(.*?)\s*<'
    patron += '.*?<li>\s*<span class="ml[^"]+">(.*?)<\/.*?span>\s*<\/li>\s*'
    patron += '<li><span class="ml-label">([^<]+)</span></li>.*?<p>(.*?)</p>'
    matches = re.compile(patron, re.DOTALL).finditer(data)

    for match in matches:
        scrapedplot = scrapertools.unescape(match.group(8))
        year = scrapertools.unescape(match.group(7))
        rating = scrapertools.unescape(match.group(6))
        sub = scrapertools.unescape(match.group(5))
        quality = scrapertools.unescape(match.group(4))
        scrapedthumbnail = urlparse.urljoin(item.url, match.group(3))
        scrapedtitle = scrapertools.unescape(match.group(2))
        scrapedurl = scrapertools.unescape(match.group(1))
        if year:
            scrapetitle = scrapedtitle.strip() + " (" + year + ")"
        else:
            scrapetitle = scrapedtitle
        if sub:
            sub = " ([COLOR yellow]" + sub + "[/COLOR])"
        if quality:
            quality = " ([COLOR yellow]" + quality + "[/COLOR])"
        if year:
            year = " ([COLOR yellow]" + year + "[/COLOR])"

        if rating:
            rating = rating.replace("<b>", "")
            rating = " ([COLOR yellow]" + rating + "[/COLOR])"
        itemlist.append(
            infoSod(Item(channel=__channel__,
                         action="findvideos",
                         contentType="movie",
                         fulltitle=scrapetitle,
                         show=scrapetitle,
                         title="[COLOR azure]" + scrapedtitle + "[/COLOR] " +
                         sub + year + quality + rating,
                         url=scrapedurl,
                         thumbnail=scrapedthumbnail,
                         plot=scrapedplot,
                         folder=True),
                    tipo='movie'))

    # Extrae el paginador
    patronvideos = 'href="([^"]+)">&raquo;</a></i>'
    matches = re.compile(patronvideos, re.DOTALL).findall(data)

    if len(matches) > 0:
        scrapedurl = urlparse.urljoin(item.url, matches[0])
        itemlist.append(
            Item(
                channel=__channel__,
                action="peliculas",
                title="[COLOR orange]Successivi >>[/COLOR]",
                url=scrapedurl,
                thumbnail=
                "https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png",
                folder=True))

    return itemlist

コード例 #46

0

ファイルを表示

ファイル: filmsenzalimiti.py プロジェクト: saturnodigits/plugin.video.streamondemand-pureita

def novedades_tv(item):
    logger.info("[streamondemand-pureita filmsenzalimiti] novedades")
    itemlist = []

    # Descarga la pagina
    data = httptools.downloadpage(item.url, headers=headers).data

    # Extrae las entradas (carpetas)
    patron = '<li><a href="([^"]+)" data-thumbnail="([^"]+)"><div>\s*'
    patron += '<div class="title">([^<]+)</div>\s*'
    patron += '<div class="episode" title="Voto IMDb">([^<]+)</div>'

    matches = re.compile(patron, re.DOTALL).finditer(data)

    for match in matches:
        imdb = scrapertools.unescape(match.group(4))
        scrapedtitle = scrapertools.unescape(match.group(3))
        scrapedthumbnail = scrapertools.unescape(match.group(2))
        scrapedurl = urlparse.urljoin(item.url, match.group(1))

        if "[HD]" in scrapedtitle:
            quality = " ([COLOR yellow]HD[/COLOR])"
        else:
            quality = ""

        if "HD" in imdb or "N/A" in imdb or "N/D" in imdb:
            imdb = ""
        else:
            imdb = " ([COLOR yellow]" + imdb + "[/COLOR])"

        scrapedplot = ""
        scrapedtitle = scrapedtitle.replace(" [HD]", "").replace(" & ", " e ")
        scrapedtitle = scrapedtitle.replace(" – ", " - ").replace("’", "'")
        scrapedtitle = scrapedtitle.strip()
        scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle)
        itemlist.append(
            infoSod(Item(channel=__channel__,
                         action="episodios",
                         contentType="tv",
                         fulltitle=scrapedtitle,
                         show=scrapedtitle,
                         title="[COLOR azure]" + scrapedtitle + "[/COLOR]" +
                         quality + imdb,
                         url=scrapedurl,
                         thumbnail=scrapedthumbnail,
                         plot=scrapedplot,
                         folder=True),
                    tipo='tv'))

    # Extrae el paginador
    patronvideos = '<li><a href="([^"]+)" >Pagina successiva'
    matches = re.compile(patronvideos, re.DOTALL).findall(data)

    if len(matches) > 0:
        scrapedurl = urlparse.urljoin(item.url, matches[0])
        itemlist.append(
            Item(
                channel=__channel__,
                action="novedades_tv",
                title="[COLOR orange]Successivi >>[/COLOR]",
                url=scrapedurl,
                thumbnail=
                "https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png",
                folder=True))

    return itemlist

コード例 #47

0

ファイルを表示

ファイル: altadefinizione01_zone.py プロジェクト: saturnodigits/plugin.video.streamondemand-pureita

def peliculas_update(item):
    logger.info("[streamondemand-pureita altadefinizione01_zone] peliculas_update")
    itemlist = []

    # Descarga la pagina
    data = httptools.downloadpage(item.url, headers=headers).data

    patron = '<div class="son_eklenen_head"></div>(.*?)<div id="right_bar">'
    data = scrapertools.find_single_match(data, patron)

    # Extrae las entradas (carpetas)
    patron = '</div>\s*<a href="([^"]+)">\s*' \
             '<img width=".*?"\s*height=".*?" src="([^"]+)" [^>]+ alt="([^<]+)"\s*title="".*?/>.*?' \
             '</a>\s*<div class="trdublaj">\s*(.*?)</div>\s*[^>]+>(.*?)\s*<' \
             '.*?<li><span class="ml-label">([^<]+)</span></li>.*?<p>(.*?)</p>'
    matches = re.compile(patron, re.DOTALL).finditer(data)

    for match in matches:
        scrapedplot = scrapertools.unescape(match.group(7))
        year = scrapertools.unescape(match.group(6))
        sub = scrapertools.unescape(match.group(5))
        quality = scrapertools.unescape(match.group(4))
        scrapedtitle = scrapertools.unescape(match.group(3))
        scrapedthumbnail = urlparse.urljoin(item.url, match.group(2))
        scrapedurl = scrapertools.unescape(match.group(1))

        if sub:
         sub = " ([COLOR yellow]" + sub + "[/COLOR])"
        if quality:
         quality = " ([COLOR yellow]" + quality + "[/COLOR])"
        if year:
         year = " ([COLOR yellow]" + year + "[/COLOR])"

         
        itemlist.append(infoSod(
            Item(channel=__channel__,
                 action="findvideos",
                 contentType="movie",
                 fulltitle=scrapedtitle,
                 show=scrapedtitle,
                 title="[COLOR azure]" + scrapedtitle + "[/COLOR] " + sub + year + quality,
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 plot=scrapedplot,
                 folder=True), tipo='movie'))

    # Extrae el paginador
    patronvideos = 'href="([^"]+)">&raquo;</a></i>'
    matches = re.compile(patronvideos, re.DOTALL).findall(data)

    if len(matches) > 0:
        scrapedurl = urlparse.urljoin(item.url, matches[0])
        itemlist.append(
            Item(channel=__channel__,
                 action="peliculas_update",
                 title="[COLOR orange]Successivi >>[/COLOR]",
                 url=scrapedurl,
                 thumbnail="https://raw.githubusercontent.com/orione7/Pelis_images/master/channels_icon_pureita/next_1.png",
                 folder=True))

    return itemlist

コード例 #48

0

ファイルを表示

def findvideos(item):
    logger.info("pelisalacarta.sinluces findvideos")
    itemlist = []

    # Descarga la pagina
    data = scrapertools.cache_page(item.url)
    data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;", "", data)

    #extra enlaces

    patron = '<div class="play-c">(.*?)<div class="datos">'

    matches = re.compile(patron, re.DOTALL).findall(data)

    if not "hqq" in data:
        itemlist.append(
            Item(
                channel=__channel__,
                title=
                "[COLOR orange][B]Sin servidores para Pelisalacarta...[/B][/COLOR]",
                thumbnail="http://s6.postimg.org/55zljwr4h/sinnoisethumb.png",
                fanart="http://s6.postimg.org/avfu47xap/sinnoisefan.jpg",
                folder=False))

    for bloque_enlaces_idiomas in matches:
        patronenlaces = '<div id="play-(.*?)".*?src="([^"]+)"'
        matchesenlaces = re.compile(patronenlaces,
                                    re.DOTALL).findall(bloque_enlaces_idiomas)
        patronidiomas = '<a href="#play-(.*?)">([^<]+)'
        matchesidiomas = re.compile(patronidiomas,
                                    re.DOTALL).findall(bloque_enlaces_idiomas)
        for numero, scrapedurl in matchesenlaces:
            url = scrapedurl
            for numero2, idiomas in matchesidiomas:
                if numero == numero2:
                    title = idiomas
                    idiomas = re.sub(r"[0-9]", "", idiomas)
                    listavideos = servertools.findvideos(url)
                    for video in listavideos:

                        idiomas = idiomas.replace(
                            idiomas, "[COLOR white]" + idiomas + "[/COLOR]")
                        videotitle = scrapertools.unescape(
                            video[0]) + "-" + idiomas
                        url = video[1]
                        server = video[2]
                        videotitle = videotitle.replace(
                            videotitle,
                            "[COLOR skyblue]" + videotitle + "[/COLOR]")
                        title_first = "[COLOR gold]Ver en--[/COLOR]"
                        title = title_first + videotitle

                        itemlist.append(
                            Item(channel=__channel__,
                                 action="play",
                                 server=server,
                                 title=title,
                                 url=url,
                                 thumbnail=item.extra,
                                 fulltitle=item.title,
                                 fanart=item.show,
                                 folder=False))

        #otro patronenlaces
        patronenlaces = '<div id="play-(.*?)".*?src=\'([^\']+)\''
        matchesenlaces = re.compile(patronenlaces,
                                    re.DOTALL).findall(bloque_enlaces_idiomas)
        patronidiomas = '<a href="#play-(.*?)">([^<]+)'
        matchesidiomas = re.compile(patronidiomas,
                                    re.DOTALL).findall(bloque_enlaces_idiomas)
        for numero, url in matchesenlaces:
            pepe = url
            for numero2, idiomas in matchesidiomas:
                if numero == numero2:
                    title = idiomas
                    idiomas = re.sub(r"[0-9]", "", idiomas)
                    listavideos = servertools.findvideos(pepe)
                    for video in listavideos:

                        idiomas = idiomas.replace(
                            idiomas, "[COLOR white]" + idiomas + "[/COLOR]")
                        videotitle = scrapertools.unescape(
                            video[0]) + "-" + idiomas
                        url = video[1]
                        server = video[2]
                        videotitle = videotitle.replace(
                            videotitle,
                            "[COLOR skyblue]" + videotitle + "[/COLOR]")
                        title_first = "[COLOR gold]Ver en--[/COLOR]"
                        title = title_first + videotitle

                        itemlist.append(
                            Item(channel=__channel__,
                                 action="play",
                                 server=server,
                                 title=title,
                                 url=url,
                                 thumbnail=item.extra,
                                 fulltitle=item.title,
                                 fanart=item.show,
                                 folder=False))

        patron = '<em>opción \d+, ([^<]+)</em>.*?'
        # Datos que contienen los enlaces para sacarlos con servertools.findvideos
        patron += '<div class="contenedor_tab">(.*?)<div style="clear:both;">'
        matches = re.compile(patron, re.DOTALL).findall(data)

        for idioma, datosEnlaces in matches:

            listavideos = servertools.findvideos(datosEnlaces)

            for video in listavideos:
                videotitle = scrapertools.unescape(video[0]) + "-" + idioma
                url = video[1]
                server = video[2]
                videotitle = videotitle.replace(
                    videotitle, "[COLOR skyblue]" + videotitle + "[/COLOR]")
                title_first = "[COLOR gold]Ver en--[/COLOR]"
                title = title_first + videotitle
                idioma = idioma.replace(idioma,
                                        "[COLOR white]" + idioma + "[/COLOR]")
                itemlist.append(
                    Item(channel=__channel__,
                         action="play",
                         server=server,
                         title=title,
                         url=url,
                         thumbnail=item.extra,
                         fulltitle=item.title,
                         fanart=item.show,
                         folder=False))

    return itemlist

コード例 #49

0

ファイルを表示

ファイル: animeflv.py プロジェクト: MoRgUiJu/morguiju.repo

def episodios(item):
    logger.info("pelisalacarta.channels.animeflv episodios")
    itemlist = []

    data = scrapertools.anti_cloudflare(item.url,
                                        headers=CHANNEL_DEFAULT_HEADERS,
                                        host=CHANNEL_HOST)
    '''
    <div class="tit">Listado de episodios <span class="fecha_pr">Fecha Pr&oacute;ximo: 2013-06-11</span></div>
    <ul class="anime_episodios" id="listado_epis">
        <li><a href="/ver/aiura-9.html">Aiura 9</a></li>
        <li><a href="/ver/aiura-8.html">Aiura 8</a></li>
        <li><a href="/ver/aiura-7.html">Aiura 7</a></li>
        <li><a href="/ver/aiura-6.html">Aiura 6</a></li>
        <li><a href="/ver/aiura-5.html">Aiura 5</a></li>
        <li><a href="/ver/aiura-4.html">Aiura 4</a></li>
        <li><a href="/ver/aiura-3.html">Aiura 3</a></li>
        <li><a href="/ver/aiura-2.html">Aiura 2</a></li>
        <li><a href="/ver/aiura-1.html">Aiura 1</a></li>
    </ul>
    '''

    data = scrapertools.find_single_match(
        data, '<div class="tit">Listado de episodios.*?</div>(.*?)</ul>')
    patron = '<li><a href="([^"]+)">([^<]+)</a></li>'
    matches = re.compile(patron, re.DOTALL).findall(data)

    for scrapedurl, scrapedtitle in matches:
        title = scrapertools.unescape(scrapedtitle)
        url = urlparse.urljoin(item.url, scrapedurl)
        thumbnail = item.thumbnail
        plot = item.plot

        # TODO crear funcion que pasandole el titulo y buscando en un array de series establezca el valor el nombre
        # y temporada / capitulo para que funcione con trak.tv

        season = 1
        episode = 1
        patron = re.escape(item.show) + "\s+(\d+)"
        # logger.info("title {0}".format(title))
        # logger.info("patron {0}".format(patron))

        try:
            episode = scrapertools.get_match(title, patron)
            episode = int(episode)
            # logger.info("episode {0}".format(episode))
        except IndexError:
            pass
        except ValueError:
            pass

        episode_title = scrapertools.find_single_match(title, "\d+:\s*(.*)")
        if episode_title == "":
            episode_title = "Episodio " + str(episode)

        season, episode = numbered_for_tratk(item.show, season, episode)

        if len(str(episode)) == 1:
            title = str(season) + "x0" + str(episode)
        else:
            title = str(season) + "x" + str(episode)

        title = item.show + " - " + title + " " + episode_title

        #if DEBUG: logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format(title, url, thumbnail))

        itemlist.append(
            Item(channel=item.channel,
                 action="findvideos",
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 plot=plot,
                 show=item.show,
                 fulltitle="{0} {1}".format(item.show, title),
                 fanart=thumbnail,
                 viewmode="movies_with_plot",
                 folder=True))

    if config.get_library_support() and len(itemlist) > 0:
        itemlist.append(
            Item(channel=item.channel,
                 title="Añadir esta serie a la biblioteca de XBMC",
                 url=item.url,
                 action="add_serie_to_library",
                 extra="episodios",
                 show=item.show))
        itemlist.append(
            Item(channel=item.channel,
                 title="Descargar todos los episodios de la serie",
                 url=item.url,
                 action="download_all_episodes",
                 extra="episodios",
                 show=item.show))

    return itemlist

コード例 #50

0

ファイルを表示

ファイル: cineblog01.py プロジェクト: bladerunner82/plugin.video.streamondemand

def peliculas(item):
    logger.info("[cineblog01.py] mainlist")
    itemlist = []

    if item.url == "":
        item.url = sito

    # Descarga la página
    data = scrapertools.anti_cloudflare(item.url, headers)

    # Extrae las entradas (carpetas)
    patronvideos = '<div class="span4".*?<a.*?<p><img src="([^"]+)".*?'
    patronvideos += '<div class="span8">.*?<a href="([^"]+)"> <h1>([^"]+)</h1></a>.*?'
    patronvideos += '<strong>([^<]*)</strong>.*?<br />([^<+]+)'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)

    for match in matches:
        scrapedtitle = scrapertools.unescape(match.group(3))
        scrapedurl = urlparse.urljoin(item.url, match.group(2))
        scrapedthumbnail = urlparse.urljoin(item.url, match.group(1))
        scrapedthumbnail = scrapedthumbnail.replace(" ", "%20")
        scrapedplot = scrapertools.unescape("[COLOR orange]" + match.group(4) +
                                            "[/COLOR]\n" +
                                            match.group(5).strip())
        scrapedplot = scrapertools.htmlclean(scrapedplot).strip()
        if DEBUG:
            logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl +
                        "], thumbnail=[" + scrapedthumbnail + "]")
        itemlist.append(
            infoSod(Item(channel=__channel__,
                         action="findvideos",
                         fulltitle=scrapedtitle,
                         show=scrapedtitle,
                         title=scrapedtitle,
                         url=scrapedurl,
                         thumbnail=scrapedthumbnail,
                         plot=scrapedplot,
                         extra=item.extra,
                         viewmode="movie_with_plot"),
                    tipo='movie'))

    # Next page mark
    try:
        bloque = scrapertools.get_match(
            data, "<div id='wp_page_numbers'>(.*?)</div>")
        patronvideos = '<a href="([^"]+)">></a></li>'
        matches = re.compile(patronvideos, re.DOTALL).findall(bloque)
        scrapertools.printMatches(matches)

        if len(matches) > 0:
            scrapedtitle = "[COLOR orange]Successivo>>[/COLOR]"
            scrapedurl = matches[0]
            scrapedthumbnail = ""
            scrapedplot = ""
            if (DEBUG):
                logger.info("title=[" + scrapedtitle + "], url=[" +
                            scrapedurl + "], thumbnail=[" + scrapedthumbnail +
                            "]")
            itemlist.append(
                Item(channel=__channel__,
                     action="HomePage",
                     title="[COLOR yellow]Torna Home[/COLOR]",
                     folder=True)),
            itemlist.append(
                Item(
                    channel=__channel__,
                    action="peliculas",
                    title=scrapedtitle,
                    url=scrapedurl,
                    thumbnail=
                    "http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png",
                    extra=item.extra,
                    plot=scrapedplot))
    except:
        pass

    return itemlist

コード例 #51

0

ファイルを表示

ファイル: pelispedia.py プロジェクト: Reunion90/raiz

def listado(item):
    logger.info()
    itemlist = []

    # ~ data = httptools.downloadpage(item.url).data
    data = obtener_data(item.url)
    data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;|<Br>|<BR>|<br>|<br/>|<br />|-\s", "",
                  data)

    if item.extra == 'movies':
        action = "findvideos"
        content_type = "movie"

        patron = '<li[^>]+><a href="([^"]+)" alt="([^<|\(]+).*?<img src="([^"]+).*?>.*?<span>\(([^)]+).*?' \
                 '<p class="font12">(.*?)</p>'
        matches = re.compile(patron, re.DOTALL).findall(data)

        for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedyear, scrapedplot in matches:
            title = "%s (%s)" % (scrapertools.unescape(
                scrapedtitle.strip()), scrapedyear)
            plot = scrapertools.entityunescape(scrapedplot)

            new_item = Item(channel=__channel__,
                            title=title,
                            url=urlparse.urljoin(CHANNEL_HOST, scrapedurl),
                            action=action,
                            thumbnail=scrapedthumbnail,
                            plot=plot,
                            context="",
                            extra=item.extra,
                            contentType=content_type)
            new_item.fulltitle = scrapertools.unescape(scrapedtitle.strip())
            new_item.infoLabels = {'year': scrapedyear}
            itemlist.append(new_item)

    else:
        action = "temporadas"
        content_type = "tvshow"

        patron = '<li[^>]+><a href="([^"]+)" alt="([^<|\(]+).*?<img src="([^"]+)'
        matches = re.compile(patron, re.DOTALL).findall(data)

        for scrapedurl, scrapedtitle, scrapedthumbnail in matches:
            title = scrapertools.unescape(scrapedtitle.strip())

            new_item = Item(channel=__channel__,
                            title=title,
                            url=urlparse.urljoin(CHANNEL_HOST, scrapedurl),
                            action=action,
                            thumbnail=scrapedthumbnail,
                            context="",
                            extra=item.extra,
                            contentType=content_type,
                            fulltitle=title)
            new_item.show = title
            # fix en algunos casos la url está mal
            new_item.url = new_item.url.replace(CHANNEL_HOST + "pelicula",
                                                CHANNEL_HOST + "serie")
            itemlist.append(new_item)

    # Obtenemos los datos basicos de todas las peliculas mediante multihilos
    tmdb.set_infoLabels(itemlist, __modo_grafico__)

    if '<ul class="pagination"' in data:
        url_next = scrapertools.find_single_match(data,
                                                  'href="([^"]*)" rel="next"')
        if url_next:
            url = urlparse.urljoin(CHANNEL_HOST, url_next)

            itemlist.append(
                Item(channel=__channel__,
                     action="listado",
                     title=">> Página siguiente",
                     extra=item.extra,
                     url=url,
                     thumbnail=thumbnail_host,
                     fanart=fanart_host))

    return itemlist

コード例 #52

0

ファイルを表示

ファイル: cineblog01.py プロジェクト: bladerunner82/plugin.video.streamondemand

def findvid_film(item):
    logger.info("[cineblog01.py] findvideos")

    itemlist = []

    # Descarga la página
    data = scrapertools.anti_cloudflare(item.url, headers)
    data = scrapertools.decodeHtmlentities(data).replace(
        'http://cineblog01.pw', 'http://k4pp4.pw')

    # Extract the quality format
    patronvideos = '>([^<]+)</strong></div>'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)
    QualityStr = ""
    for match in matches:
        QualityStr = scrapertools.unescape(match.group(1))[6:]

    # Extrae las entradas
    streaming = scrapertools.find_single_match(
        data, '<strong>Streaming:</strong>(.*?)<table height="30">')
    patron = '<td><a href="([^"]+)" target="_blank">([^<]+)</a></td>'
    matches = re.compile(patron, re.DOTALL).findall(streaming)
    for scrapedurl, scrapedtitle in matches:
        print "##### findvideos Streaming ## %s ## %s ##" % (scrapedurl,
                                                             scrapedtitle)
        title = "[COLOR orange]Streaming:[/COLOR] " + item.title + " [COLOR grey]" + QualityStr + "[/COLOR] [COLOR blue][" + scrapedtitle + "][/COLOR]"
        itemlist.append(
            Item(channel=__channel__,
                 action="play",
                 title=title,
                 url=scrapedurl,
                 fulltitle=item.fulltitle,
                 thumbnail=item.thumbnail,
                 show=item.show,
                 folder=False))

    streaming_hd = scrapertools.find_single_match(
        data, '<strong>Streaming HD[^<]+</strong>(.*?)<table height="30">')
    patron = '<td><a href="([^"]+)" target="_blank">([^<]+)</a></td>'
    matches = re.compile(patron, re.DOTALL).findall(streaming_hd)
    for scrapedurl, scrapedtitle in matches:
        print "##### findvideos Streaming HD ## %s ## %s ##" % (scrapedurl,
                                                                scrapedtitle)
        title = "[COLOR yellow]Streaming HD:[/COLOR] " + item.title + " [COLOR grey]" + QualityStr + "[/COLOR] [COLOR blue][" + scrapedtitle + "][/COLOR]"
        itemlist.append(
            Item(channel=__channel__,
                 action="play",
                 title=title,
                 url=scrapedurl,
                 fulltitle=item.fulltitle,
                 thumbnail=item.thumbnail,
                 show=item.show,
                 folder=False))

    streaming_3D = scrapertools.find_single_match(
        data, '<strong>Streaming 3D[^<]+</strong>(.*?)<table height="30">')
    patron = '<td><a href="([^"]+)" target="_blank">([^<]+)</a></td>'
    matches = re.compile(patron, re.DOTALL).findall(streaming_3D)
    for scrapedurl, scrapedtitle in matches:
        print "##### findvideos Streaming 3D ## %s ## %s ##" % (scrapedurl,
                                                                scrapedtitle)
        title = "[COLOR pink]Streaming 3D:[/COLOR] " + item.title + " [COLOR grey]" + QualityStr + "[/COLOR] [COLOR blue][" + scrapedtitle + "][/COLOR]"
        itemlist.append(
            Item(channel=__channel__,
                 action="play",
                 title=title,
                 url=scrapedurl,
                 fulltitle=item.fulltitle,
                 thumbnail=item.thumbnail,
                 show=item.show,
                 folder=False))

    download = scrapertools.find_single_match(
        data, '<strong>Download:</strong>(.*?)<table height="30">')
    patron = '<td><a href="([^"]+)" target="_blank">([^<]+)</a></td>'
    matches = re.compile(patron, re.DOTALL).findall(download)
    for scrapedurl, scrapedtitle in matches:
        print "##### findvideos Download ## %s ## %s ##" % (scrapedurl,
                                                            scrapedtitle)
        title = "[COLOR aqua]Download:[/COLOR] " + item.title + " [COLOR grey]" + QualityStr + "[/COLOR] [COLOR blue][" + scrapedtitle + "][/COLOR]"
        itemlist.append(
            Item(channel=__channel__,
                 action="play",
                 title=title,
                 url=scrapedurl,
                 fulltitle=item.fulltitle,
                 thumbnail=item.thumbnail,
                 show=item.show,
                 folder=False))

    download_hd = scrapertools.find_single_match(
        data,
        '<strong>Download HD[^<]+</strong>(.*?)<table width="100%" height="20">'
    )
    patron = '<td><a href="([^"]+)" target="_blank">([^<]+)</a></td>'
    matches = re.compile(patron, re.DOTALL).findall(download_hd)
    for scrapedurl, scrapedtitle in matches:
        print "##### findvideos Download HD ## %s ## %s ##" % (scrapedurl,
                                                               scrapedtitle)
        title = "[COLOR azure]Download HD:[/COLOR] " + item.title + " [COLOR grey]" + QualityStr + "[/COLOR] [COLOR blue][" + scrapedtitle + "][/COLOR]"
        itemlist.append(
            Item(channel=__channel__,
                 action="play",
                 title=title,
                 url=scrapedurl,
                 fulltitle=item.fulltitle,
                 thumbnail=item.thumbnail,
                 show=item.show,
                 folder=False))

    if len(itemlist) == 0:
        itemlist = servertools.find_video_items(item=item)

    return itemlist

コード例 #53

0

ファイルを表示

def novedades_documentales(item):
    logger.info("[shurweb.py] novedades_documentales")
    data = scrapertools.cachePage(item.url)
    data = scrapertools.unescape(data)
    data = scrapertools.get_match(data,'<div class="tab-pane fade" id="docus">(.*?)<div class="panel panel-primary">')
    return peliculas(item,data=data)

コード例 #54

0

ファイルを表示

ファイル: pelispedia.py プロジェクト: ndcg91/addon

def listado(item):
    logger.info()
    itemlist = []

    action = "findvideos"
    content_type = "movie"

    if item.extra == 'serie':
        action = "temporadas"
        content_type = "tvshow"

    # ~ data = httptools.downloadpage(item.url).data
    data = obtener_data(item.url)
    data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;|<Br>|<BR>|<br>|<br/>|<br />|-\s", "",
                  data)
    # logger.info("data -- {}".format(data))

    patron = '<li[^>]+><a href="([^"]+)" alt="([^<|\(]+).*?<img src="([^"]+).*?>.*?<span>\(([^)]+).*?' \
             '<p class="font12">(.*?)</p>'
    matches = re.compile(patron, re.DOTALL).findall(data)

    for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedyear, scrapedplot in matches[:
                                                                                        28]:
        title = "%s (%s)" % (scrapertools.unescape(
            scrapedtitle.strip()), scrapedyear)
        plot = scrapertools.entityunescape(scrapedplot)

        new_item = Item(channel=__channel__,
                        title=title,
                        url=urlparse.urljoin(CHANNEL_HOST, scrapedurl),
                        action=action,
                        thumbnail=scrapedthumbnail,
                        plot=plot,
                        context="",
                        extra=item.extra,
                        contentType=content_type,
                        fulltitle=title)

        if item.extra == 'serie':
            new_item.show = scrapertools.unescape(scrapedtitle.strip())
            # fix en algunos casos la url está mal
            new_item.url = new_item.url.replace(CHANNEL_HOST + "pelicula",
                                                CHANNEL_HOST + "serie")
        else:
            new_item.fulltitle = scrapertools.unescape(scrapedtitle.strip())
            new_item.infoLabels = {'year': scrapedyear}
            # logger.debug(new_item.tostring())

        itemlist.append(new_item)

    # Obtenemos los datos basicos de todas las peliculas mediante multihilos
    tmdb.set_infoLabels(itemlist, __modo_grafico__)

    # numero de registros que se muestran por página, se fija a 28 por cada paginación
    if len(matches) >= 28 and '/buscar/?' not in item.url:

        file_php = "666more"
        tipo_serie = ""

        if item.extra == "movies":
            anio = scrapertools.find_single_match(item.url, "(?:year=)(\w+)")
            letra = scrapertools.find_single_match(item.url, "(?:letra=)(\w+)")
            genero = scrapertools.find_single_match(item.url,
                                                    "(?:gender=|genre=)(\w+)")
            params = "letra=%s&year=%s&genre=%s" % (letra, anio, genero)

        else:
            tipo2 = scrapertools.find_single_match(item.url,
                                                   "(?:series/|tipo2=)(\w+)")
            tipo_serie = "&tipo=serie"

            if tipo2 != "all":
                file_php = "letra"
                tipo_serie += "&tipo2=" + tipo2

            genero = ""
            if tipo2 == "anio":
                genero = scrapertools.find_single_match(
                    item.url, "(?:anio/|genre=)(\w+)")
            if tipo2 == "genero":
                genero = scrapertools.find_single_match(
                    item.url, "(?:genero/|genre=)(\w+)")
            if tipo2 == "letra":
                genero = scrapertools.find_single_match(
                    item.url, "(?:letra/|genre=)(\w+)")

            params = "genre=%s" % genero

        url = "http://www.pelispedia.tv/api/%s.php?rangeStart=28&rangeEnd=28%s&%s" % (
            file_php, tipo_serie, params)

        if "rangeStart" in item.url:
            ant_inicio = scrapertools.find_single_match(
                item.url, "rangeStart=(\d+)&")
            inicio = str(int(ant_inicio) + 28)
            url = item.url.replace("rangeStart=" + ant_inicio,
                                   "rangeStart=" + inicio)

        itemlist.append(
            Item(channel=__channel__,
                 action="listado",
                 title=">> Página siguiente",
                 extra=item.extra,
                 url=url,
                 thumbnail=thumbnail_host,
                 fanart=fanart_host))

    return itemlist

コード例 #55

0

ファイルを表示

ファイル: pelispedia.py プロジェクト: ndcg91/addon

def episodios(item):
    logger.info()

    itemlist = []

    # ~ data = httptools.downloadpage(item.url).data
    data = obtener_data(item.url)
    data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;|<Br>|<BR>|<br>|<br/>|<br />|-\s", "",
                  data)

    patron = '<li class="clearfix gutterVertical20"><a href="([^"]+)".*?><small>(.*?)</small>.*?' \
             '<span class.+?>(.*?)</span>'
    matches = re.compile(patron, re.DOTALL).findall(data)

    for scrapedurl, scrapedtitle, scrapedname in matches:
        # logger.info("scrap {}".format(scrapedtitle))
        patron = 'Season\s+(\d),\s+Episode\s+(\d+)'
        match = re.compile(patron, re.DOTALL).findall(scrapedtitle)
        season, episode = match[0]

        if 'season' in item.infoLabels and int(
                item.infoLabels['season']) != int(season):
            continue

        title = "%sx%s: %s" % (season, episode.zfill(2),
                               scrapertools.unescape(scrapedname))
        new_item = item.clone(title=title,
                              url=scrapedurl,
                              action="findvideos",
                              fulltitle=title,
                              contentType="episode")
        if 'infoLabels' not in new_item:
            new_item.infoLabels = {}

        new_item.infoLabels['season'] = season
        new_item.infoLabels['episode'] = episode.zfill(2)

        itemlist.append(new_item)

    # TODO no hacer esto si estamos añadiendo a la videoteca
    if not item.extra:
        # Obtenemos los datos de todos los capitulos de la temporada mediante multihilos
        tmdb.set_infoLabels(itemlist, __modo_grafico__)
        for i in itemlist:
            if i.infoLabels['title']:
                # Si el capitulo tiene nombre propio añadirselo al titulo del item
                i.title = "%sx%s %s" % (i.infoLabels['season'],
                                        i.infoLabels['episode'],
                                        i.infoLabels['title'])
            if i.infoLabels.has_key('poster_path'):
                # Si el capitulo tiene imagen propia remplazar al poster
                i.thumbnail = i.infoLabels['poster_path']

    itemlist.sort(key=lambda it: int(it.infoLabels['episode']),
                  reverse=config.get_setting('orden_episodios', __channel__))

    # Opción "Añadir esta serie a la videoteca"
    if config.get_videolibrary_support() and len(itemlist) > 0:
        itemlist.append(
            Item(channel=__channel__,
                 title="Añadir esta serie a la videoteca",
                 url=item.url,
                 action="add_serie_to_library",
                 extra="episodios",
                 show=item.show,
                 category="Series",
                 thumbnail=thumbnail_host,
                 fanart=fanart_host))

    return itemlist

コード例 #56

0

ファイルを表示

ファイル: cb01anime.py プロジェクト: thehutt61/plugin.video.streamondemand

def novita(item):
    logger.info("[cb01anime.py] mainlist")
    itemlist = []

    # Descarga la página
    data = scrapertools.anti_cloudflare(item.url, headers)

    ## ------------------------------------------------
    cookies = ""
    matches = config.get_cookie_data(item.url).splitlines()[4:]
    for cookie in matches:
        name = cookie.split('\t')[5]
        value = cookie.split('\t')[6]
        cookies += name + "=" + value + ";"
    headers.append(['Cookie', cookies[:-1]])
    import urllib
    _headers = urllib.urlencode(dict(headers))
    ## ------------------------------------------------

    # Extrae las entradas (carpetas)
    patronvideos = '<div class="span4"> <a.*?<img src="(.*?)".*?'
    patronvideos += '<div class="span8">.*?<a href="(.*?)">.*?'
    patronvideos += '<h1>(.*?)</h1></a>.*?<br />(.*?)<br>.*?'
    matches = re.compile(patronvideos, re.DOTALL).finditer(data)

    for match in matches:
        scrapedthumbnail = match.group(1)
        scrapedurl = match.group(2)
        scrapedtitle = scrapertools.unescape(match.group(3))
        scrapedplot = scrapertools.unescape(match.group(4))
        scrapedplot = scrapertools.decodeHtmlentities(scrapedplot)
        if scrapedplot.startswith(""):
            scrapedplot = scrapedplot[64:]
        if DEBUG: logger.info(
            "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]")

        ## ------------------------------------------------
        scrapedthumbnail += "|" + _headers
        ## ------------------------------------------------				

        # Añade al listado de XBMC
        itemlist.append(
            Item(channel=__channel__,
                 action="listacompleta" if scrapedtitle == "Lista Alfabetica Completa Anime/Cartoon" else "episodi",
                 fulltitle=scrapedtitle,
                 show=scrapedtitle,
                 title=scrapedtitle,
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 viewmode="movie_with_plot",
                 plot=scrapedplot))

    # Put the next page mark
    try:
        next_page = scrapertools.get_match(data, "<link rel='next' href='([^']+)'")
        itemlist.append(
            Item(channel=__channel__,
                 action="HomePage",
                 title="[COLOR yellow]Torna Home[/COLOR]",
                 folder=True)),
        itemlist.append(
            Item(channel=__channel__,
                 action="novita",
                 title="[COLOR orange]Successivo>>[/COLOR]",
                 url=next_page,
                 thumbnail="http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png"))
    except:
        pass

    return itemlist

コード例 #57

0

ファイルを表示

def downloadpage(url, **opt):
    # logger.info()
    """
       Open a url and return the data obtained

        @param url: url to open.
        @type url: str
        @param post: If it contains any value, it is sent by POST.
        @type post: str
        @param headers: Headers for the request, if it contains nothing the default headers will be used.
        @type headers: dict, list
        @param timeout: Timeout for the request.
        @type timeout: int
        @param follow_redirects: Indicates if redirects are to be followed.
        @type follow_redirects: bool
        @param cookies: Indicates whether cookies are to be used.
        @type cookies: bool
        @param replace_headers: If True, headers passed by the "headers" parameter will completely replace the default headers.
                                If False, the headers passed by the "headers" parameter will modify the headers by default.
        @type replace_headers: bool
        @param add_referer: Indicates whether to add the "Referer" header using the domain of the url as a value.
        @type add_referer: bool
        @param only_headers: If True, only headers will be downloaded, omitting the content of the url.
        @type only_headers: bool
        @param random_headers: If True, use the method of selecting random headers.
        @type random_headers: bool
        @param ignore_response_code: If True, ignore the method for WebErrorException for error like 404 error in veseriesonline, but it is a functional data
        @type ignore_response_code: bool
        @return: Result of the petition
        @rtype: HTTPResponse
        @param use_requests: Use requests.session()
        @type: bool

                Parameter Type Description
                -------------------------------------------------- -------------------------------------------------- ------------
                HTTPResponse.success: bool True: Request successful | False: Error when making the request
                HTTPResponse.code: int Server response code or error code if an error occurs
                HTTPResponse.error: str Description of the error in case of an error
                HTTPResponse.headers: dict Dictionary with server response headers
                HTTPResponse.data: str Response obtained from server
                HTTPResponse.json: dict Response obtained from the server in json format
                HTTPResponse.time: float Time taken to make the request

        """
    url = scrapertools.unescape(url)
    parse = urlparse.urlparse(url)
    domain = parse.netloc

    if opt.get('cloudscraper'):
        from lib import cloudscraper
        session = cloudscraper.create_scraper()
    else:
        from lib import requests
        session = requests.session()

        if config.get_setting('resolver_dns') and not opt.get(
                'use_requests', False):
            from core import resolverdns
            session.mount('https://', resolverdns.CipherSuiteAdapter(domain))

    req_headers = default_headers.copy()

    # Headers passed as parameters
    if opt.get('headers', None) is not None:
        if not opt.get('replace_headers', False):
            req_headers.update(dict(opt['headers']))
        else:
            req_headers = dict(opt['headers'])

    if domain in directIP.keys() and not opt.get('disable_directIP', False):
        req_headers['Host'] = domain
        url = urlparse.urlunparse(parse._replace(netloc=directIP.get(domain)))

    if opt.get('random_headers', False) or HTTPTOOLS_DEFAULT_RANDOM_HEADERS:
        req_headers['User-Agent'] = random_useragent()
    url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")

    opt['url_save'] = url
    opt['post_save'] = opt.get('post', None)

    response = {}
    info_dict = []
    payload = dict()
    files = {}
    file_name = ''

    session.verify = opt.get('verify', True)

    if opt.get('cookies', True):
        session.cookies = cj
    session.headers.update(req_headers)

    proxy_data = {'dict': {}}

    inicio = time.time()

    if opt.get(
            'timeout',
            None) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None:
        opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
    if opt['timeout'] == 0: opt['timeout'] = None

    if len(url) > 0:
        try:
            if opt.get('post', None) is not None or opt.get('file',
                                                            None) is not None:
                if opt.get('post', None) is not None:
                    # Convert string post in dict
                    try:
                        json.loads(opt['post'])
                        payload = opt['post']
                    except:
                        if not isinstance(opt['post'], dict):
                            post = urlparse.parse_qs(opt['post'],
                                                     keep_blank_values=1)
                            payload = dict()

                            for key, value in post.items():
                                try:
                                    payload[key] = value[0]
                                except:
                                    payload[key] = ''
                        else:
                            payload = opt['post']

                # Verify 'file' and 'file_name' options to upload a buffer or file
                if opt.get('file', None) is not None:
                    if os.path.isfile(opt['file']):
                        if opt.get('file_name', None) is None:
                            path_file, opt['file_name'] = os.path.split(
                                opt['file'])
                        files = {
                            'file': (opt['file_name'], open(opt['file'], 'rb'))
                        }
                        file_name = opt['file']
                    else:
                        files = {
                            'file': (opt.get('file_name',
                                             'Default'), opt['file'])
                        }
                        file_name = opt.get('file_name',
                                            'Default') + ', Buffer de memoria'

                info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
                if opt.get('only_headers', False):
                    # Makes the request with HEAD method
                    req = session.head(url,
                                       allow_redirects=opt.get(
                                           'follow_redirects', True),
                                       timeout=opt['timeout'])
                else:
                    # Makes the request with POST method
                    req = session.post(url,
                                       data=payload,
                                       allow_redirects=opt.get(
                                           'follow_redirects', True),
                                       files=files,
                                       timeout=opt['timeout'])

            elif opt.get('only_headers', False):
                info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
                # Makes the request with HEAD method
                req = session.head(url,
                                   allow_redirects=opt.get(
                                       'follow_redirects', True),
                                   timeout=opt['timeout'])
            else:
                info_dict = fill_fields_pre(url, opt, proxy_data, file_name)
                # Makes the request with GET method
                req = session.get(url,
                                  allow_redirects=opt.get(
                                      'follow_redirects', True),
                                  timeout=opt['timeout'])
        except Exception as e:
            from lib import requests
            req = requests.Response()
            if not opt.get('ignore_response_code',
                           False) and not proxy_data.get('stat', ''):
                response['data'] = ''
                response['success'] = False
                info_dict.append(('Success', 'False'))
                import traceback
                response['code'] = traceback.format_exc()
                info_dict.append(('Response code', str(e)))
                info_dict.append(('Finished in', time.time() - inicio))
                if not opt.get('alfa_s', False):
                    show_infobox(info_dict)
                return type('HTTPResponse', (), response)
            else:
                req.status_code = str(e)

    else:
        response['data'] = ''
        response['success'] = False
        response['code'] = ''
        return type('HTTPResponse', (), response)

    response_code = req.status_code
    response['url'] = req.url

    response['data'] = req.content if req.content else ''

    if type(response['data']) != str:
        try:
            response['data'] = response['data'].decode('utf-8')
        except:
            response['data'] = response['data'].decode('ISO-8859-1')

    if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403]\
            and not opt.get('CF', False) and 'Ray ID' in response['data'] and not opt.get('post', None):
        logger.debug("CF retry... for domain: %s" % domain)
        from lib import proxytranslate
        gResp = proxytranslate.process_request_proxy(url)
        if gResp:
            req = gResp['result']
            response_code = req.status_code
            response['url'] = gResp['url']
            response['data'] = gResp['data']

    if not response['data']:
        response['data'] = ''

    try:
        response['json'] = to_utf8(req.json())
    except:
        response['json'] = dict()

    response['code'] = response_code
    response['headers'] = req.headers
    response['cookies'] = req.cookies

    info_dict, response = fill_fields_post(info_dict, req, response,
                                           req_headers, inicio)

    if opt.get('cookies', True):
        save_cookies(alfa_s=opt.get('alfa_s', False))

    if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
        show_infobox(info_dict)
    if not config.get_setting("debug"): logger.info('Page URL:', url)
    return type('HTTPResponse', (), response)

コード例 #58

0

ファイルを表示

def get_episodios(item,recursion):
    logger.info("[rtve.py] get_episodios_documentales")

    itemlist = []
    data = scrapertools.cachePage(item.url)

    # Extrae los vídeos
    '''
    <li class="odd">
    <span class="col_tit" id="2851919" name="progname">
    <a href="/alacarta/videos/atencion-obras/atencion-obras-josep-maria-flotats-ferran-adria-sanchis-sinisterra/2851919/">Atención Obras - 07/11/14</a>
    </span>
    <span class="col_tip">
    <span>Completo</span>
    </span>
    <span class="col_dur">55:35</span>
    <span class="col_pop"><span title="32% popularidad" class="pc32"><em><strong><span>32%</span></strong></em></span></span>
    <span class="col_fec">07 nov 2014</span>

    <div id="popup2851919" class="tultip hddn"> 
    <span id="progToolTip" class="tooltip curved">
    <span class="pointer"></span>
    <span class="cerrar" id="close2851919"></span>
    <span class="titulo-tooltip"><a href="/alacarta/videos/atencion-obras/atencion-obras-josep-maria-flotats-ferran-adria-sanchis-sinisterra/2851919/" title="Ver Atención Obras - 07/11/14">Atención Obras - 07/11/14</a></span>
    <span class="fecha">07 nov 2014</span>
    <span class="detalle">Josep María Flotats&#160;trae al Teatro María Guerrero de Madrid&#160;&#8220;El juego del amor y del azar&#8221;&#160;de Pierre de Marivaux. Un texto que ya ha sido estrenado en el Teatre Nacional de Catalunya. C...</span>
    '''
    patron  = '<li class="[^"]+">.*?'
    patron += '<span class="col_tit"[^<]+'
    patron += '<a href="([^"]+)">(.*?)</a[^<]+'
    patron += '</span>[^<]+'
    patron += '<span class="col_tip"[^<]+<span>([^<]+)</span[^<]+</span[^<]+'
    patron += '<span class="col_dur">([^<]+)</span>.*?'
    patron += '<span class="col_fec">([^<]+)</span>.*?'
    patron += '<span class="detalle">([^>]+)</span>'
    
    matches = re.findall(patron,data,re.DOTALL)
    if DEBUG: scrapertools.printMatches(matches)

    # Crea una lista con las entradas
    for match in matches:
        if not "developer" in config.get_platform():
            scrapedtitle = match[1]+" ("+match[2].strip()+") ("+match[3].strip()+") ("+match[4]+")"
        else:
            scrapedtitle = match[1]
        scrapedtitle = scrapedtitle.replace("<em>Nuevo</em>&nbsp;","")
        scrapedtitle = scrapertools.unescape(scrapedtitle)
        scrapedtitle = scrapedtitle.strip()
        scrapedurl = urlparse.urljoin(item.url,match[0])
        scrapedthumbnail = item.thumbnail
        scrapedplot = scrapertools.unescape(match[5].strip())
        scrapedplot = scrapertools.htmlclean(scrapedplot).strip()
        scrapedextra = match[2]
        
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")
        itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="play" , server="rtve" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , show=item.show, category = item.category, extra=scrapedextra, folder=False) )

    # Paginación

    if len(itemlist)>0:
        
        next_page_url = scrapertools.find_single_match(data,'<a name="paginaIR" href="([^"]+)"><span>Siguiente</span></a>')
        if next_page_url!="":
            next_page_url = urlparse.urljoin(item.url,next_page_url).replace("&amp;","&")
            #http://www.rtve.es/alacarta/interno/contenttable.shtml?pbq=2&modl=TOC&locale=es&pageSize=15&ctx=36850&advSearchOpen=false
            if not next_page_url.endswith("&advSearchOpen=false"):
                next_page_url = next_page_url + "&advSearchOpen=false"

            siguiente_item = Item(channel=CHANNELNAME,action="episodios",url=urlparse.urljoin(item.url,next_page_url),title=item.title,show=item.show,category=item.category)
            logger.info("siguiente_item="+siguiente_item.tostring())

            # Para evitar listas eternas, si tiene más de 3 páginas añade el item de "siguiente"
            if recursion<=3:
                itemlist.extend( get_episodios(siguiente_item,recursion+1) )
            else:
                siguiente_item.title=">> Página siguiente"
                itemlist.append(siguiente_item)

    return itemlist

コード例 #59

0

ファイルを表示

ファイル: discoverymx.py プロジェクト: divadrbcn/pelisalacarta-divadr

def listvideos(item):
    logger.info("[discoverymx.py] listvideos")
    itemlist = []

    scrapedthumbnail = ""
    scrapedplot = ""

    # Descarga la página
    data = scrapertools.cache_page(item.url)
    patronvideos = "<h3 class='post-title entry-title'[^<]+"
    patronvideos += "<a href='([^']+)'>([^<]+)</a>.*?"
    patronvideos += "<div class='post-body entry-content'(.*?)<div class='post-footer'>"
    matches = re.compile(patronvideos, re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for match in matches:
        scrapedtitle = match[1]
        scrapedtitle = re.sub("<[^>]+>", " ", scrapedtitle)
        scrapedtitle = scrapertools.unescape(scrapedtitle)
        scrapedurl = match[0]
        regexp = re.compile(r'src="(http[^"]+)"')

        matchthumb = regexp.search(match[2])
        if matchthumb is not None:
            scrapedthumbnail = matchthumb.group(1)
        matchplot = re.compile('<div align="center">(<img.*?)</span></div>',
                               re.DOTALL).findall(match[2])

        if len(matchplot) > 0:
            scrapedplot = matchplot[0]
            #print matchplot
        else:
            scrapedplot = ""

        scrapedplot = re.sub("<[^>]+>", " ", scrapedplot)
        scrapedplot = scrapertools.unescape(scrapedplot)
        if (DEBUG):
            logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl +
                        "], thumbnail=[" + scrapedthumbnail + "]")

        # Añade al listado de XBMC
        #xbmctools.addnewfolder( __channel__ , "findevi" , category , scrapedtitle , scrapedurl , scrapedthumbnail, scrapedplot )
        itemlist.append(
            Item(channel=__channel__,
                 action="findvideos",
                 title=scrapedtitle,
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 plot=scrapedplot,
                 folder=True))

    # Extrae la marca de siguiente página
    patronvideos = "<a class='blog-pager-older-link' href='([^']+)'"
    matches = re.compile(patronvideos, re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    if len(matches) > 0:
        scrapedtitle = "Página siguiente"
        scrapedurl = urlparse.urljoin(item.url, matches[0])
        scrapedthumbnail = ""
        scrapedplot = ""
        itemlist.append(
            Item(channel=__channel__,
                 action="listvideos",
                 title=scrapedtitle,
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 plot=scrapedplot,
                 folder=True))

    return itemlist

コード例 #60

0

ファイルを表示

ファイル: justintv.py プロジェクト: golaizola/pelisalacarta-xbmc

def getlistchannel(item):
    logger.info("[justintv.py] getlistchannel")

    url = item.url
    title = item.title
    if "|Next Page >>" in item.title:
        item.title = item.title.split('|')[0]
    if item.title == 'favorites':
        context = '|9'  # Eliminar un canal de favoritos, en el listado de favoritos solo remover
    else:
        context = '|8'  # Añade un canal a favoritos, en los demas listados solo añadir
    data = scrapertools.cache_page(url)
    logger.info(data)
    datadict = json.loads(data)
    totalItems = len(datadict)
    itemlist = []
    #print item.action
    c = 0
    try:
        datadict = sorted(datadict,
                          key=lambda k: k['video_bitrate'],
                          reverse=True)
    except:
        pass
    for match in datadict:
        try:
            name = match['name'].split('user_')[-1]
        except:
            try:
                name = match['channel']['login']
                if name is None or name == '':
                    raise
            except:
                name = match['login']
        try:
            title = match['channel']['title']
            if title is None or title == '':
                raise
        except:
            try:
                title = match['title']
                if title is None:
                    title = ''
            except:
                title = ''
        try:
            title = title
            if title is None or title == '':
                raise
        except:
            title = name

        try:
            tags = scrapertools.unescape(match['channel']['tags'])
            if tags is None or tags == '':
                raise
        except:
            try:
                tags = scrapertools.unescape(match['tags']).strip()
                if tags is None or tags == '':
                    raise
            except:
                tags = ''
        try:
            status = scrapertools.unescape(match['channel']['status']).strip()
            if status is None or status == '':
                raise
        except:
            try:
                status = scrapertools.unescape(match['status']).strip()
                if status is None or status == '':
                    raise
            except:
                status = ''
        try:
            subcat = match['channel']['category_title']
            if subcat is None or subcat == '':
                raise
        except:
            try:
                subcat = match['category']
                if subcat is None:
                    raise
            except:
                subcat = ''
        try:
            views = match['channel']['views_count']
        except:
            try:
                views = match['channel_view_count']
            except:
                views = ''

        try:
            bitrate = str(match['video_bitrate']).split('.')[0]
        except:
            bitrate = ''
        try:
            lang = match['language']
        except:
            lang = ''
        try:
            scrapedthumbnail = match['channel']['screen_cap_url_medium']

        except:
            scrapedthumbnail = match['screen_cap_url_medium']
        try:
            fanart_thumb = match['channel']['image_url_huge']
        except:
            try:
                fanart_thumb = match['image_url_huge']
            except:
                fanart_thumb = fanart
        scrapedurl = name

        idx = abbrev.index(lang)
        lang = languages[idx].decode('utf-8')
        scrapedplot = title + '\nStatus: ' + status + '\nTags: ' + tags + '\nChannel Name: ' + name + '\nBitrate: ' + bitrate + '\nLanguage: ' + lang + '\nViews: ' + views

        if config.get_setting("streamlive") == "true":
            scrapedtitle = title + ' [%s] BitRate: %s  (%s)' % (name, bitrate,
                                                                lang)
            itemlist.append(
                Item(
                    channel=item.channel,
                    action="playVideo",
                    title=scrapedtitle.encode("utf-8"),
                    url=scrapedurl,
                    thumbnail=scrapedthumbnail,
                    plot=scrapedplot.encode("utf-8"),
                    category=item.plot,
                    totalItems=totalItems,
                    fanart=scrapedthumbnail,
                    context='7',  # 7 Lista videos archivados 
                    folder=False))
        else:
            scrapedtitle = title + ' [%s]  (%s)' % (name, lang)
            itemlist.append(
                Item(
                    channel=item.channel,
                    action="listarchives",
                    title=scrapedtitle.encode("utf-8"),
                    url=scrapedurl,
                    thumbnail=scrapedthumbnail,
                    plot=scrapedplot.encode("utf-8"),
                    category=item.plot,
                    totalItems=totalItems,
                    fanart=fanart_thumb,
                    extra=fanart_thumb,
                    context='6',  # 6 ver canal en vivo 
                    folder=True))
        if (DEBUG):
            logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl +
                        "], thumbnail=[" + scrapedthumbnail + "]")

    if totalItems >= limit:
        offset1 = re.compile('offset=(.+?)&').findall(url)[0]
        offset2 = str(int(offset1) + limit + 1)
        scrapedurl = item.url.replace("offset=" + offset1, "offset=" + offset2)
        scrapedtitle = item.title + "|Next Page >>"
        scrapedthumbnail = ''
        scrapedplot = ''
        itemlist.append(
            Item(channel=item.channel,
                 action="listchannel",
                 title=scrapedtitle,
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 plot=scrapedplot,
                 category=item.category,
                 fanart=fanart))
    return itemlist