コード例 #1
0
ファイル: rtpa.py プロジェクト: Jmlaguna89/miNuevoRepo
def episodios(item):
    logger.info("tvalacarta.channels.rtpa episodios")
    itemlist = []

    if "&fin=" not in item.url:
        item.url = item.url + "&fin=1000"

    data = scrapertools.cache_page(item.url)
    json_object = jsontools.load_json(data)
    #logger.info("json_object="+repr(json_object))
    #logger.info("VOD="+repr(json_object["VOD"]))

    for vod in json_object["VOD"]:
        logger.info("vod="+repr(vod))
        title = vod["nombre_programa"]
        if vod["titulo"]!="":
            title = title + " - " + vod["titulo"]
        if vod["fecha_emision"]!="":
            title = title + " ("+scrapertools.htmlclean(vod["fecha_emision"])+")"
        url = "http://www.rtpa.es/video:"+urllib.quote(vod["nombre_programa"])+"_"+vod["id_generado"]+".html"

        try:
            url_imagen = vod["url_imagen"]
            thumbnail = urllib.quote(url_imagen).replace("//","/").replace("http%3A/","http://")
        except:
            thumbnail = ""

        aired_date = scrapertools.parse_date( vod["fecha_emision"] )
        
        plot = scrapertools.htmlclean(vod["sinopsis"])
        itemlist.append( Item(channel=CHANNELNAME, title=title , url=url,  thumbnail=thumbnail , plot=plot, fanart=thumbnail, server="rtpa", action="play" , show = item.show , viewmode="movie_with_plot", aired_date=aired_date, folder=False) )

    return itemlist
コード例 #2
0
ファイル: extremaduratv.py プロジェクト: erral/tvalacarta
def episodios(item):
    logger.info("extremaduratv.episodios")
    itemlist = []

    # Descarga la página
    data = scrapertools.cachePage(item.url)

    '''
    <div class="modal-video-ajax modal-video modal fade color-tv" id="modalTV2_33318" tabindex="-1" role="dialog" aria-labelledby="modalTV2_33318Label" aria-hidden="true"
    data-video-imagen-modal="http://www.canalextremadura.es/sites/default/files/styles/nuevo_dise_o_-_grande/public/imagenes-nuevo-disenio/tv-a-la-carta/_desdeelaire.jpg?itok=FmvSbPkH"
    data-video-video-mobile="http://iphonevod.canalextremadura.es/S-B4583-009.mp4"
    data-video-url="/alacarta/tv/videos/extremadura-desde-el-aire-3"
    data-video-titulo-modal="El Reino del Pata Negra"
    data-video-id-nodo="33318"
    data-video-video-modal="rtmp://canalextremadura.cdn.canalextremadura.es/canalextremadura/tv/S-B4583-009.mp4"
    '''
    patron  = '<div class="modal-video-ajax(.*?</blockquote>)'
    matches = re.findall(patron,data,re.DOTALL)

    for match in matches:
        title = scrapertools.find_single_match(match,'data-video-titulo-modal="([^"]+)"')
        url = urlparse.urljoin(item.url,scrapertools.find_single_match(match,'data-video-url="([^"]+)"'))
        thumbnail = urlparse.urljoin(item.url,scrapertools.find_single_match(match,'data-video-imagen-modal="([^"]+)"'))
        plot = scrapertools.find_single_match(match,'<blockquote class="nomargin">(.*?)</blockquote>').strip()
        aired_date = scrapertools.parse_date(title)
        extra = urlparse.urljoin(item.url,scrapertools.find_single_match(match,'data-video-video-modal="([^"]+)"'))

        itemlist.append( Item(channel=CHANNELNAME, title=title , action="play" , server="extremaduratv" , plot=plot, url=url, thumbnail=thumbnail, fanart=thumbnail, show=item.show, aired_date=aired_date, extra=extra, view="videos", folder=False) )

    return itemlist
コード例 #3
0
ファイル: xiptv.py プロジェクト: tvalacarta/tvalacarta
def detalle_episodio(item):

    data = scrapertools.cache_page(item.url)

    scrapedplot = scrapertools.find_single_match(
        data, '<meta content="([^"]+)" property="og\:description"')
    item.plot = scrapertools.htmlclean(scrapedplot).strip()

    scrapedthumbnail = scrapertools.find_single_match(
        data, '<meta content="([^"]+)" property="og\:image"')
    item.thumbnail = scrapedthumbnail.strip()

    scrapeddate = scrapertools.find_single_match(
        data, '<span class="date">([^<]+)</span>')
    item.aired_date = scrapertools.parse_date(scrapeddate.strip())

    item.duration = scrapertools.find_single_match(
        data, '<span class="duration">([^<]+)</span>')

    item.geolocked = "0"

    try:
        from servers import xiptv as servermodule
        video_urls = servermodule.get_video_url(item.url)
        item.media_url = video_urls[0][1]
    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #4
0
ファイル: mitele.py プロジェクト: neopack1/tvalacarta
def get_episodes(episodes, category, items):
    logger.info("[mitele.py] get_episodes")
    for episode in episodes:
        title = "%s - %s" % (episode["subtitle"], episode["title"])
        thumbnail = episode["images"]["thumbnail"]["src"]
        url = "https://www.mitele.es" + episode["link"]["href"]
        plot = episode["info"]["synopsis"] if "synopsis" in episode[
            "info"] else ""
        duration = episode["info"]["duration"] if "duration" in episode[
            "info"] else None
        creation_date = episode["info"][
            "creation_date"] if "creation_date" in episode["info"] else None
        aired_date = scrapertools.parse_date(
            creation_date) if creation_date else None
        items.append(
            Item(channel=CHANNEL,
                 server=CHANNEL,
                 action="play",
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 category=category,
                 plot=plot,
                 duration=duration,
                 aired_date=aired_date,
                 folder=False))
コード例 #5
0
ファイル: rtvcm.py プロジェクト: Jmlaguna89/miNuevoRepo
def detalle_episodio(item):
    logger.info("tvalacarta.rtvcm.detalle_episodio")

    idvideo = scrapertools.find_single_match(item.url,"video-(\d+)$")
    url = "http://api.rtvcm.webtv.flumotion.com/pods/"+idvideo+"?extended=true"
    data = scrapertools.cache_page(url)

    try:
        json_object = jsontools.load_json(data)

        item.thumbnail = json_object["video_image_url"].split("?")[0]
        item.geolocked = "0"
        item.duration = scrapertools.parse_duration_secs( json_object["duration"] )
        item.aired_date = scrapertools.parse_date(item.title)

        from servers import rtvcm as servermodule
        video_urls = servermodule.get_video_url(item.url)
        item.media_url = video_urls[0][1]

    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #6
0
def detalle_episodio(item):

    data = scrapertools.cache_page(item.url)

    item.plot = scrapertools.htmlclean(
        scrapertools.find_single_match(
            data, '<meta content="([^"]+)" itemprop="description')).strip()
    item.thumbnail = scrapertools.find_single_match(
        data, '<meta content="([^"]+)" itemprop="thumbnailUrl')

    #<meta content="miércoles, 16 de septiembre de 2015 3:30" itemprop="datePublished"
    scrapeddate = scrapertools.find_single_match(
        data, '<meta content="([^"]+)" itemprop="datePublished')

    item.aired_date = scrapertools.parse_date(scrapeddate)

    item.geolocked = "0"

    media_item = play(item)
    try:
        item.media_url = media_item[0].url.replace("\\", "/")
    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #7
0
ファイル: pakapaka.py プロジェクト: erral/tvalacarta
def detalle_episodio(item):

    # Saca de conectate la duración y fecha
    rec_id = scrapertools.find_single_match(item.url,"videos/(\d+)")
    data = scrapertools.cache_page("http://www.conectate.gob.ar/sitios/conectate/busqueda/buscar?rec_id="+rec_id)
    scrapeddate = scrapertools.find_single_match(data,'"fecha_creacion"\:"([^"]+)"')

    if scrapeddate=="":
        scrapeddate = scrapertools.find_single_match(data,'"fecha"\:"([^"]+)"')

    item.aired_date = scrapertools.parse_date(scrapeddate.replace("\\/","/"))

    scrapedduration = scrapertools.find_single_match(data,'"duracion_segundos":"(\d+)"')
    item.duration = scrapertools.parse_duration_secs(scrapedduration)

    # Ahora saca de PakaPaka la URL
    data = scrapertools.cache_page(item.url)

    item.geolocked = "0"    
    try:
        from servers import pakapaka as servermodule
        video_urls = servermodule.get_video_url(item.url)
        item.media_url = video_urls[0][1]
    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #8
0
ファイル: upvtv.py プロジェクト: tvalacarta/tvalacarta
def episodios(item):
    logger.info("tvalacarta.channels.upvtv episodios")
    itemlist=[]

    # Descarga la página
    data = scrapertools.cachePage(item.url)
    data = scrapertools.find_single_match(data,'<h1>Programas anteriores(.*?)</ul')

    # Extrae los capitulos
    patron  = '<li[^<]+'
    patron += '<span class="enlace"><a href="([^"]+)" >([^<]+)</a>'
    matches = re.compile(patron,re.DOTALL).findall(data)

    for scrapedurl,scrapedtitle in matches:
        title = scrapedtitle.replace("\n"," ")
        title = re.compile("\s+",re.DOTALL).sub(" ",title)
        title = title.decode('iso-8859-1').encode("utf8","ignore")
        thumbnail = ""
        plot = ""
        url = urlparse.urljoin(item.url,scrapedurl)
        aired_date = scrapertools.parse_date(title)

        if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]")
        itemlist.append( Item( channel=CHANNELNAME , title=title , action="play" , server="upvtv" , url=url , thumbnail=thumbnail , plot=plot , show=item.show , fanart=thumbnail , aired_date=aired_date, folder=False ) )

    return itemlist
コード例 #9
0
ファイル: adn40.py プロジェクト: tvalacarta/tvalacarta
def episodios(item):
    logger.info("tvalacarta.channels.adn40 episodios")

    itemlist = []

    # Descarga la página
    data = scrapertools.cache_page(item.url)
    json_data = jsontools.load_json(data)

    for json_item in json_data["video"]:
        title = json_item["title"]
        url = json_item["link"]
        thumbnail = json_item["image"]
        plot = json_item["teaser"]
        aired_date = scrapertools.parse_date(json_item["date"])

        itemlist.append(
            Item(channel=CHANNELNAME,
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 plot=plot,
                 action="play",
                 server="adn40",
                 show=item.show,
                 folder=False))

    return itemlist
コード例 #10
0
def detalle_episodio(item):

    item.aired_date = scrapertools.parse_date(item.title)
    item.geolocked = "0"
    item.media_url = item.extra

    return item
コード例 #11
0
def detalle_episodio(item):

    # Saca de conectate la duración y fecha
    rec_id = scrapertools.find_single_match(item.url, "videos/(\d+)")
    data = scrapertools.cache_page(
        "http://www.conectate.gob.ar/sitios/conectate/busqueda/buscar?rec_id="
        + rec_id)
    scrapeddate = scrapertools.find_single_match(
        data, '"fecha_creacion"\:"([^"]+)"')

    if scrapeddate == "":
        scrapeddate = scrapertools.find_single_match(data,
                                                     '"fecha"\:"([^"]+)"')

    item.aired_date = scrapertools.parse_date(scrapeddate.replace("\\/", "/"))

    scrapedduration = scrapertools.find_single_match(
        data, '"duracion_segundos":"(\d+)"')
    item.duration = scrapertools.parse_duration_secs(scrapedduration)

    # Ahora saca de PakaPaka la URL
    data = scrapertools.cache_page(item.url)

    item.geolocked = "0"
    try:
        from servers import pakapaka as servermodule
        video_urls = servermodule.get_video_url(item.url)
        item.media_url = video_urls[0][1]
    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #12
0
def detalle_episodio(item):
    logger.info("tvalacarta.rtvcm.detalle_episodio")

    idvideo = scrapertools.find_single_match(item.url, "video-(\d+)$")
    url = "http://api.rtvcm.webtv.flumotion.com/pods/" + idvideo + "?extended=true"
    data = scrapertools.cache_page(url)

    try:
        json_object = jsontools.load_json(data)

        item.thumbnail = json_object["video_image_url"].split("?")[0]
        item.geolocked = "0"
        item.duration = scrapertools.parse_duration_secs(
            json_object["duration"])
        item.aired_date = scrapertools.parse_date(item.title)

        from servers import rtvcm as servermodule
        video_urls = servermodule.get_video_url(item.url)
        item.media_url = video_urls[0][1]

    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #13
0
ファイル: xiptv.py プロジェクト: Jmlaguna89/miNuevoRepo
def detalle_episodio(item):

    data = scrapertools.cache_page(item.url)

    scrapedplot = scrapertools.find_single_match(data,'<meta content="([^"]+)" property="og\:description"')
    item.plot = scrapertools.htmlclean( scrapedplot ).strip()

    scrapedthumbnail = scrapertools.find_single_match(data,'<meta content="([^"]+)" property="og\:image"')
    item.thumbnail = scrapedthumbnail.strip()

    scrapeddate = scrapertools.find_single_match(data,'<span class="date">([^<]+)</span>')
    item.aired_date = scrapertools.parse_date( scrapeddate.strip() )

    item.duration = scrapertools.find_single_match(data,'<span class="duration">([^<]+)</span>')

    item.geolocked = "0"
    
    try:
        from servers import xiptv as servermodule
        video_urls = servermodule.get_video_url(item.url)
        item.media_url = video_urls[0][1]
    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #14
0
ファイル: rtva.py プロジェクト: igurrutxaga/tvalacarta
def detalle_episodio(item):

    data = play_get_xml_data(item.url)

    item.plot = scrapertools.find_single_match(data,"<introduction><\!\[CDATA\[(.*?)\]\]><")
    item.thumbnail = scrapertools.find_single_match(data,"<picture>([^<]+)<")
    item.aired_date = scrapertools.parse_date( scrapertools.find_single_match(data,"<publication_date>([^<]+)<") )

    if item.aired_date == "":
        item.aired_date = scrapertools.parse_date(item.title)

    item.geolocked = "0"

    items = play(item,page_data=data)
    item.media_url = items[-1].url

    return item
コード例 #15
0
def episodios(item):
    logger.info("tvalacarta.channels.sietetvandalucia episodios")

    itemlist = []

    # Descarga la página
    data = scrapertools.cache_page(item.url)

    # Parse
    '''
    <div class="wrap-toggle active">
    <div>
    <input type="checkbox" id="question0" name="q"  class="questions">
    <div class="plus">+</div>
    <label for="question0" class="question">
    Listado de episodios de la temporada 5                            </label>
    <div class="answers">
    <ul>
    <li><a href="https://7tvandalucia.es/andalucia/cuaderno-agrario/5-14-08122018-cuaderno-agrario/43608/" > Número 14 / 08/12/2018 Cuaderno Agrario</a></li>
    <li><a href="https://7tvandalucia.es/andalucia/cuaderno-agrario/5-13-01122018-cuaderno-agrario/43508/" > Número 13 / 01/12/2018 Cuaderno Agrario</a></li>
    ...
    </ul>
    '''
    patron = '<div class="wrap-toggle active"[^<]+'
    patron += '<div[^<]+'
    patron += '<input type="checkbox"[^<]+'
    patron += '<div class="plus"[^<]+</div[^<]+'
    patron += '<label[^>]+>([^<]+)</label[^<]+'
    patron += '<div class="answers"[^<]+'
    patron += '<ul(.*?)</ul'

    matches = scrapertools.find_multiple_matches(data, patron)
    for season_title, season_body in matches:

        season_label = season_title.strip()
        season_label = season_label.replace("Listado de episodios de la ",
                                            "").capitalize()

        patron = '<li><a href="([^"]+)[^>]+>([^<]+)</a>'
        matches2 = scrapertools.find_multiple_matches(season_body, patron)

        for scraped_url, scraped_title in matches2:
            url = urlparse.urljoin(item.url, scraped_url)
            title = season_label + " " + scraped_title.strip()
            aired_date = scrapertools.parse_date(title)
            itemlist.append(
                Item(channel=CHANNELNAME,
                     action="play",
                     server="sietetvandalucia",
                     title=title,
                     show=item.show,
                     url=url,
                     aired_date=aired_date,
                     folder=False))

    return itemlist
コード例 #16
0
def episodios_bloque_izquierdo(item):
    logger.info("extremaduratv.episodios_bloque_izquierdo")
    itemlist = []

    # Descarga la página
    data = scrapertools.cachePage(item.url)
    data = scrapertools.get_match(
        data, '<div class="contenedor-izq(.*?)<div class="contenedor-der')

    patron = '<li class="views-row[^<]+'
    patron += '<div class="views-field views-field-title"[^<]+'
    patron += '<span class="field-content"[^<]+'
    patron += '<a href="([^"]+)">([^<]+)</a>'

    matches = re.findall(patron, data, re.DOTALL)

    for url, titulo in matches:
        scrapedtitle = titulo.strip()
        scrapedurl = urlparse.urljoin(item.url, url)
        scrapedthumbnail = ""
        scrapedplot = ""

        # Trata de sacar la fecha de emisión del título
        aired_date = scrapertools.parse_date(scrapedtitle)

        if (DEBUG):
            logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl +
                        "], thumbnail=[" + scrapedthumbnail + "]")

        itemlist.append(
            Item(channel=CHANNELNAME,
                 title=scrapedtitle,
                 action="play",
                 server="extremaduratv",
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 show=item.show,
                 aired_date=aired_date,
                 folder=False))

    #<li class="pager-next last"><a href="/alacarta/tv/programas/informativos/97/extremadura-noticias-1?page=1"
    patron = '<li class="pager-next[^<]+<a href="([^"]+)"'
    matches = re.findall(patron, data, re.DOTALL)

    for url in matches:
        scrapedurl = urlparse.urljoin(item.url, url)
        itemlist.append(
            Item(channel=CHANNELNAME,
                 title=">> Página siguiente",
                 action="episodios_bloque_izquierdo",
                 url=scrapedurl,
                 show=item.show,
                 extra=item.extra))

    return itemlist
コード例 #17
0
ファイル: navarratv.py プロジェクト: tvalacarta/tvalacarta
def episodios(item, load_all_pages=True):
    logger.info("tvalacarta.channels.navarratv episodios")

    itemlist = []

    # Descarga la página
    data = scrapertools.cache_page(item.url)
    '''
    <div class="Bloque2Noticias">
    <div class="ImpactoBloque W50 H120 FranjaRoja ">
    <div class="ImpactoBloqueImagen W98" style="height: 150px;">
    <div class="ImpactoContenedorImagen" style="height: 150px; cursor: pointer; background-image: url('https://i.ytimg.com/vi/RXBDpg7oduk/mqdefault.jpg');" onclick="location.href='/AlaCarta/92C813D7-1676-E17B-1AB1E57E2065947C/yt/RXBDpg7oduk/IMPLICADOS-18-DE-JUNIO-DE-2016';"/></div>
    </div>    
    <div class="ImpactoBloqueContenido W98">
    <h2><a href="/AlaCarta/92C813D7-1676-E17B-1AB1E57E2065947C/yt/RXBDpg7oduk/IMPLICADOS-18-DE-JUNIO-DE-2016" class="TextoNeutro">IMPLICADOS 18 DE JUNIO DE 2016</a></h2> 
    <p>IMPLICADOS 18 DE JUNIO DE 2016</p>
    </div>
    </div>
    <div class="W3"></div>  
    '''

    patron = '<div class="Bloque2Noticias"[^<]+'
    patron += '<div class="ImpactoBloque W50 H120 FranjaRoja[^<]+'
    patron += '<div class="ImpactoBloqueImagen[^<]+'
    patron += "<div class=\"ImpactoContenedorImagen\".*?url\('([^']+)'\)[^<]+</div[^<]+"
    patron += '</div[^<]+'
    patron += '<div class="ImpactoBloqueContenido[^<]+'
    patron += '<h2><a href="([^"]+)" class="TextoNeutro">([^<]+)</a></h2[^<]+'
    patron += '<p>([^<]*)</p>'

    matches = re.compile(patron, re.DOTALL).findall(data)

    for scrapedthumbnail, scrapedurl, scrapedtitle, scrapedplot in matches:
        thumbnail = urlparse.urljoin(item.url, scrapedthumbnail)

        yt_id = scrapertools.find_single_match(scrapedurl, "/yt/([^/]+)/")
        url = "https://www.youtube.com/watch?v=" + yt_id
        title = scrapertools.safe_unicode(scrapedtitle).encode("utf-8").strip()
        plot = scrapedplot.strip()
        aired_date = scrapertools.parse_date(title)

        itemlist.append(
            Item(channel=__channel__,
                 action="play",
                 server="navarratv",
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 fanart=thumbnail,
                 show=item.show,
                 aired_date=aired_date,
                 plot=plot,
                 folder=False))

    return itemlist
コード例 #18
0
ファイル: montecarlo.py プロジェクト: nosuko/tvalacarta
def episodios(item, load_all_pages=False):
    logger.info("tvalacarta.channels.montecarlo episodios")
    itemlist = []

    # Descarga la página
    data = scrapertools.cachePage(item.url)

    '''
    <div class="col-lg-4 col-md-6 col-sm-6 col-xs-12">
    <div class="div_videos_contenedor_item">
    <div class="div_videos_imagen">
    <a href="/programas/el-sult%C3%A1n/videos/cap%C3%ADtulo-2">
    <img class="img-responsive" typeof="foaf:Image" src="http://www.montecarlotv.com.uy/sites/default/files/styles/imagen_programa/public/Sultan_20012016.jpg?itok=0rXaCnrn" width="406" height="246" alt="" />
    </a>
    </div> 
    <a href="/programas/el-sult%C3%A1n/videos/cap%C3%ADtulo-2">
    <div class="div_videos_contenedor_descripcion">
    <div class="div_videos_contenedor_descripcion_linea1">
    <div class="div_videos_fecha">
    <span class="date-display-single" property="dc:date" datatype="xsd:dateTime" content="2016-01-20T00:00:00-03:00">20/01/2016</span>
    </div></div><div class="div_videos_titulo">Capítulo 2</div></div></a>
    '''
    patron  = '<div class="div_videos_contenedor_item"[^<]+'
    patron += '<div class="div_videos_imagen"[^<]+'
    patron += '<a href="([^"]+)"[^<]+'
    patron += '<img class="img-responsive" typeof="foaf:Image" src="([^"]+)"[^<]+'
    patron += '</a[^<]+'
    patron += '</div>[^<]+'
    patron += '<a[^<]+'
    patron += '<div class="div_videos_contenedor_descripcion[^<]+'
    patron += '<div class="div_videos_contenedor_descripcion[^<]+'
    patron += '<div class="div_videos_fecha[^<]+'
    patron += '<span class="date-display-single[^>]+>([^<]+)</span[^<]+'
    patron += '</div></div><div class="div_videos_titulo">([^<]+)<'

    matches = re.findall(patron,data,re.DOTALL)

    for scrapedurl,scrapedthumbnail,scrapedfecha,scrapedtitle in matches:
        title = scrapedtitle
        url = urlparse.urljoin(item.url,scrapedurl)
        thumbnail = urlparse.urljoin(item.url,scrapedthumbnail)
        plot = ""
        aired_date = scrapertools.parse_date(scrapedfecha)
        if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]")

        itemlist.append( Item(channel=CHANNELNAME, title=title , action="play" , server="montecarlo" , url=url, thumbnail=thumbnail, plot=plot, show=item.show, aired_date=aired_date, folder=False) )

    next_page_url = scrapertools.find_single_match(data,'<a title="Ir a la p[^"]+" href="([^>]+)">siguiente')
    if next_page_url!="":
        itemlist.append( Item(channel=CHANNELNAME, title=">> Página siguiente" , action="episodios" , url=urlparse.urljoin(item.url,next_page_url), show=item.show) )

    return itemlist
コード例 #19
0
def episodios(item):
    logger.info("extremaduratv.episodios")
    itemlist = []

    # Descarga la página
    data = scrapertools.cachePage(item.url)
    '''
    <div class="modal-video-ajax modal-video modal fade color-tv" id="modalTV2_33318" tabindex="-1" role="dialog" aria-labelledby="modalTV2_33318Label" aria-hidden="true"
    data-video-imagen-modal="http://www.canalextremadura.es/sites/default/files/styles/nuevo_dise_o_-_grande/public/imagenes-nuevo-disenio/tv-a-la-carta/_desdeelaire.jpg?itok=FmvSbPkH"
    data-video-video-mobile="http://iphonevod.canalextremadura.es/S-B4583-009.mp4"
    data-video-url="/alacarta/tv/videos/extremadura-desde-el-aire-3"
    data-video-titulo-modal="El Reino del Pata Negra"
    data-video-id-nodo="33318"
    data-video-video-modal="rtmp://canalextremadura.cdn.canalextremadura.es/canalextremadura/tv/S-B4583-009.mp4"
    '''
    patron = '<div class="modal-video-ajax(.*?</blockquote>)'
    matches = re.findall(patron, data, re.DOTALL)

    for match in matches:
        title = scrapertools.find_single_match(
            match, 'data-video-titulo-modal="([^"]+)"')
        url = urlparse.urljoin(
            item.url,
            scrapertools.find_single_match(match, 'data-video-url="([^"]+)"'))
        thumbnail = urlparse.urljoin(
            item.url,
            scrapertools.find_single_match(
                match, 'data-video-imagen-modal="([^"]+)"'))
        plot = scrapertools.find_single_match(
            match, '<blockquote class="nomargin">(.*?)</blockquote>').strip()
        aired_date = scrapertools.parse_date(title)
        extra = urlparse.urljoin(
            item.url,
            scrapertools.find_single_match(match,
                                           'data-video-video-modal="([^"]+)"'))

        itemlist.append(
            Item(channel=CHANNELNAME,
                 title=title,
                 action="play",
                 server="extremaduratv",
                 plot=plot,
                 url=url,
                 thumbnail=thumbnail,
                 fanart=thumbnail,
                 show=item.show,
                 aired_date=aired_date,
                 extra=extra,
                 view="videos",
                 folder=False))

    return itemlist
コード例 #20
0
ファイル: montecarlo.py プロジェクト: igurrutxaga/tvalacarta
def episodios(item, load_all_pages=False):
    logger.info("tvalacarta.channels.montecarlo episodios")
    itemlist = []

    # Descarga la página
    data = scrapertools.cachePage(item.url)

    '''
    <div class="col-lg-4 col-md-6 col-sm-6 col-xs-12">
    <div class="div_videos_contenedor_item">
    <div class="div_videos_imagen">
    <a href="/programas/el-sult%C3%A1n/videos/cap%C3%ADtulo-2">
    <img class="img-responsive" typeof="foaf:Image" src="http://www.montecarlotv.com.uy/sites/default/files/styles/imagen_programa/public/Sultan_20012016.jpg?itok=0rXaCnrn" width="406" height="246" alt="" />
    </a>
    </div> 
    <a href="/programas/el-sult%C3%A1n/videos/cap%C3%ADtulo-2">
    <div class="div_videos_contenedor_descripcion">
    <div class="div_videos_contenedor_descripcion_linea1">
    <div class="div_videos_fecha">
    <span class="date-display-single" property="dc:date" datatype="xsd:dateTime" content="2016-01-20T00:00:00-03:00">20/01/2016</span>
    </div></div><div class="div_videos_titulo">Capítulo 2</div></div></a>
    '''
    patron  = '<div class="div_videos_contenedor_item"[^<]+'
    patron += '<div class="div_videos_imagen"[^<]+'
    patron += '<a href="([^"]+)"[^<]+'
    patron += '<img class="img-responsive" typeof="foaf:Image" src="([^"]+)"[^<]+'
    patron += '</a[^<]+'
    patron += '</div>[^<]+'
    patron += '<a[^<]+'
    patron += '<div class="div_videos_contenedor_descripcion[^<]+'
    patron += '<div class="div_videos_contenedor_descripcion[^<]+'
    patron += '<div class="div_videos_fecha[^<]+'
    patron += '<span class="date-display-single[^>]+>([^<]+)</span[^<]+'
    patron += '</div></div><div class="div_videos_titulo">([^<]+)<'

    matches = re.findall(patron,data,re.DOTALL)

    for scrapedurl,scrapedthumbnail,scrapedfecha,scrapedtitle in matches:
        title = scrapedtitle
        url = urlparse.urljoin(item.url,scrapedurl)
        thumbnail = urlparse.urljoin(item.url,scrapedthumbnail)
        plot = ""
        aired_date = scrapertools.parse_date(scrapedfecha)
        if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]")

        itemlist.append( Item(channel=CHANNELNAME, title=title , action="play" , server="montecarlo" , url=url, thumbnail=thumbnail, plot=plot, show=item.show, aired_date=aired_date, folder=False) )

    next_page_url = scrapertools.find_single_match(data,'<a title="Ir a la p[^"]+" href="([^>]+)">siguiente')
    if next_page_url!="":
        itemlist.append( Item(channel=CHANNELNAME, title=">> Página siguiente" , action="episodios" , url=urlparse.urljoin(item.url,next_page_url), show=item.show) )

    return itemlist
コード例 #21
0
ファイル: rtpa.py プロジェクト: pablobart/tvalacarta-web
def episodios(item):
    logger.info("tvalacarta.channels.rtpa episodios")
    itemlist = []

    if "&fin=" not in item.url:
        item.url = item.url + "&fin=1000"

    data = scrapertools.cache_page(item.url)
    json_object = jsontools.load_json(data)
    #logger.info("json_object="+repr(json_object))
    #logger.info("VOD="+repr(json_object["VOD"]))

    for vod in json_object["VOD"]:
        logger.info("vod=" + repr(vod))
        title = vod["nombre_programa"]
        if vod["titulo"] != "":
            title = title + " - " + vod["titulo"]
        if vod["fecha_emision"] != "":
            title = title + " (" + scrapertools.htmlclean(
                vod["fecha_emision"]) + ")"
        url = "http://www.rtpa.es/video:" + urllib.quote(
            vod["nombre_programa"]) + "_" + vod["id_generado"] + ".html"

        try:
            url_imagen = vod["url_imagen"]
            thumbnail = urllib.quote(url_imagen).replace("//", "/").replace(
                "http%3A/", "http://")
        except:
            thumbnail = ""

        aired_date = scrapertools.parse_date(vod["fecha_emision"])

        plot = scrapertools.htmlclean(vod["sinopsis"])
        itemlist.append(
            Item(channel=CHANNELNAME,
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 plot=plot,
                 fanart=thumbnail,
                 server="rtpa",
                 action="play",
                 show=item.show,
                 viewmode="movie_with_plot",
                 aired_date=aired_date,
                 folder=False))

    return itemlist
コード例 #22
0
def episodios_bloque_derecho(item, load_all_pages=False):
    logger.info("extremaduratv.episodios_bloque_derecho")
    itemlist = []

    # Descarga la página
    data = scrapertools.cachePage(item.url)

    '''
    <a href="/alacarta/tv/videos/trastos-y-tesoros-260315">
    <img src="http://www.canalextremadura.es/sites/default/files/styles/alacarta_listado_programas/public/cadillac.jpg?itok=cAhwJKrp" width="225" height="140" alt="" />
    </a></div>  </div>  
    <div class="views-field views-field-title">        
    <span class="field-content">Trastos y tesoros (26/03/15)</span>
    '''
    patron  = '<a href="([^"]+)"[^<]+'
    patron += '<img src="([^"]+)"[^<]+'
    patron += '</a></div[^<]+</div[^<]+'
    patron += '<div class="views-field views-field-title"[^<]+'
    patron += '<span class="field-content">([^<]+)</span>'

    matches = re.findall(patron,data,re.DOTALL)

    for url,thumbnail,titulo in matches:
        scrapedtitle = titulo.strip()
        scrapedurl = urlparse.urljoin(item.url,url)
        scrapedthumbnail = thumbnail
        scrapedplot = ""

        # Trata de sacar la fecha de emisión del título
        aired_date = scrapertools.parse_date(scrapedtitle)

        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")

        itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="play" , server="extremaduratv" , url=scrapedurl, thumbnail = scrapedthumbnail, show=item.show, aired_date=aired_date, folder=False) )

    #<li class="pager-next last"><a href="/alacarta/tv/programas/informativos/97/extremadura-noticias-1?page=1" 
    next_page_url = scrapertools.find_single_match(data,'href="([^"]+)">siguiente')
    if next_page_url!="":
        next_page_url = urlparse.urljoin(item.url,next_page_url)
        next_page_item = Item(channel=CHANNELNAME, title=">> Página siguiente" , action="episodios" , url=next_page_url, show=item.show, extra=item.extra)

        if load_all_pages:
            itemlist.extend(episodios(next_page_item,load_all_pages))
        else:
            itemlist.append( next_page_item )

    return itemlist
コード例 #23
0
def episodios(item):
    logger.info("tvalacarta.channels.extremaduratv.episodios")
    itemlist = []

    # Descarga la página
    data = scrapertools.cachePage(item.url)

    '''
    <div class="modal-video-ajax modal-video modal fade color-tv" id="modalTV2_33318" tabindex="-1" role="dialog" aria-labelledby="modalTV2_33318Label" aria-hidden="true"
    data-video-imagen-modal="http://www.canalextremadura.es/sites/default/files/styles/nuevo_dise_o_-_grande/public/imagenes-nuevo-disenio/tv-a-la-carta/_desdeelaire.jpg?itok=FmvSbPkH"
    data-video-video-mobile="http://iphonevod.canalextremadura.es/S-B4583-009.mp4"
    data-video-url="/alacarta/tv/videos/extremadura-desde-el-aire-3"
    data-video-titulo-modal="El Reino del Pata Negra"
    data-video-id-nodo="33318"
    data-video-video-modal="rtmp://canalextremadura.cdn.canalextremadura.es/canalextremadura/tv/S-B4583-009.mp4"
    '''
    patron  = '<div class="modal-video-ajax(.*?<div class="barra-cerrar-modal)'
    matches = re.findall(patron,data,re.DOTALL)

    # Las páginas siguientes se saltan los dos primeros vídeos (son destacados que se repiten)
    if "?page" in item.url:
        saltar = 2
    else:
        saltar = 0

    for match in matches:

        if saltar>0:
            saltar = saltar - 1
            continue

        title = scrapertools.find_single_match(match,'data-video-titulo-modal="([^"]+)"')
        url = urlparse.urljoin(item.url,scrapertools.find_single_match(match,'data-video-url="([^"]+)"'))
        thumbnail = urlparse.urljoin(item.url,scrapertools.find_single_match(match,'data-video-imagen-modal="([^"]+)"'))
        plot = scrapertools.find_single_match(match,'<blockquote class="nomargin">(.*?)</blockquote>').strip()
        aired_date = scrapertools.parse_date(title)
        extra = urlparse.urljoin(item.url,scrapertools.find_single_match(match,'data-video-video-modal="([^"]+)"'))

        itemlist.append( Item(channel=CHANNELNAME, title=title , action="play" , server="extremaduratv" , plot=plot, url=url, thumbnail=thumbnail, fanart=thumbnail, show=item.show, aired_date=aired_date, extra=extra, view="videos", folder=False) )

    if len(itemlist)>0:
        next_page_url = scrapertools.find_single_match(data,'<li class="pager-next"><a title="[^"]+" href="([^"]+)"')
        next_page_url = urlparse.urljoin(item.url,next_page_url)
        next_page_item = Item(channel=CHANNELNAME, title=">> Página siguiente" , action="episodios" , url=next_page_url)
        itemlist.append( next_page_item )

    return itemlist
コード例 #24
0
ファイル: rtve_api.py プロジェクト: codixor/maximumTv
def episodios(item):
    logger.info("tvalacarta.channels.rtve_api episodios")

    itemlist = []

    # Descarga la página
    url = item.url+"/videos.json"
    data = scrapertools.cache_page(url)
    json_object = jsontools.load_json(data)
    #logger.info("json_object="+json_object)
    json_items = json_object["page"]["items"]

    for json_item in json_items:
        title = json_item["longTitle"]
        url = json_item["uri"]
        thumbnail = json_item["imageSEO"]
        if json_item["description"] is not None:
            plot = scrapertools.htmlclean(json_item["description"])
        else:
            plot = ""
        fanart = item.fanart
        page = json_item["htmlUrl"]
        aired_date = scrapertools.parse_date(json_item["publicationDate"])

        ms = json_item["duration"]
        if ms is None:
            duration=""
        else:
            x = ms / 1000
            seconds = x % 60
            x /= 60
            minutes = x % 60
            x /= 60
            hours = x % 24
            if hours>0:
                duration = str(hours)+":"+str(minutes)+":"+str(seconds)
            else:
                duration = str(minutes)+":"+str(seconds)

        if (DEBUG): logger.info(" title=["+repr(title)+"], url=["+repr(url)+"], thumbnail=["+repr(thumbnail)+"] plot=["+repr(plot)+"]")
        itemlist.append( Item(channel="rtve", title=title , action="play" , server="rtve", page=page, url=url, thumbnail=thumbnail, fanart=thumbnail, show=item.show , plot=plot , duration=duration, aired_date=aired_date, viewmode="movie_with_plot", folder=False) )

    from core import config
    if config.is_xbmc() and len(itemlist)>0:
        itemlist.append( Item(channel=item.channel, title=">> Opciones para esta serie", url=item.url, action="serie_options##episodios", thumbnail=item.thumbnail, show=item.show, folder=False))

    return itemlist
コード例 #25
0
ファイル: clantve.py プロジェクト: franbetis/actualizarkodi
def episodios(item):
    logger.info("tvalacarta.channels.clantv episodios")

    itemlist = []

    # Descarga la página
    url = item.url+"/videos.json"
    data = scrapertools.cache_page(url)
    json_object = jsontools.load_json(data)
    #logger.info("json_object="+json_object)
    json_items = json_object["page"]["items"]

    for json_item in json_items:
        title = json_item["longTitle"]
        url = json_item["uri"]
        thumbnail = json_item["imageSEO"]
        if json_item["description"] is not None:
            plot = scrapertools.htmlclean(json_item["description"])
        else:
            plot = ""
        fanart = item.fanart
        page = json_item["htmlUrl"]
        aired_date = scrapertools.parse_date(json_item["publicationDate"])

        ms = json_item["duration"]
        if ms is None:
            duration=""
        else:
            x = ms / 1000
            seconds = x % 60
            x /= 60
            minutes = x % 60
            x /= 60
            hours = x % 24
            if hours>0:
                duration = str(hours)+":"+str(minutes)+":"+str(seconds)
            else:
                duration = str(minutes)+":"+str(seconds)

        if (DEBUG): logger.info(" title=["+repr(title)+"], url=["+repr(url)+"], thumbnail=["+repr(thumbnail)+"] plot=["+repr(plot)+"]")
        itemlist.append( Item(channel="rtve", title=title , action="play" , server="rtve", page=page, url=url, thumbnail=thumbnail, fanart=thumbnail, show=item.show , plot=plot , duration=duration, aired_date=aired_date, viewmode="movie_with_plot", folder=False) )

    from core import config
    if config.is_xbmc() and len(itemlist)>0:
        itemlist.append( Item(channel=item.channel, title=">> Opciones para esta serie", url=item.url, action="serie_options##episodios", thumbnail=item.thumbnail, show=item.show, folder=False))

    return itemlist
コード例 #26
0
ファイル: cctvspan.py プロジェクト: Jmlaguna89/miNuevoRepo
def detalle_episodio(item):

    item.geolocked = "0"

    if item.aired_date == "":
        item.aired_date = scrapertools.parse_date(item.title,"mdy")

    try:
        from servers import cntv as servermodule
        video_urls = servermodule.get_video_url(item.url)
        item.media_url = video_urls[0][1]
    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #27
0
ファイル: cctvspan.py プロジェクト: pablobart/tvalacarta-web
def detalle_episodio(item):

    item.geolocked = "0"

    if item.aired_date == "":
        item.aired_date = scrapertools.parse_date(item.title, "mdy")

    try:
        from servers import cntv as servermodule
        video_urls = servermodule.get_video_url(item.url)
        item.media_url = video_urls[0][1]
    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #28
0
ファイル: rtvcm.py プロジェクト: erral/tvalacarta
def episodios(item):
    logger.info("tvalacarta.rtvcm.episodios")

    itemlist = []

    # Descarga la página
    data = scrapertools.cache_page(item.url)

    '''
    <article>
    <figure>
    <img src="http://api.rtvcm.webtv.flumotion.com/videos/31351/poster.jpg?w=7fe8fa22" alt="Cosecha propia">
    <a class="icon-play-circle" href="http://www.cmmedia.es/programas/tv/cosecha-propia/videos/31351" title="Cosecha propia"><span class="sr-only">http://api.rtvcm.webtv.flumotion.com/videos/31351/poster.jpg?w=7fe8fa22</span></a>
    </figure>
    <p class="date"><time>24/09/2016</time></p>
    <h3><a href="http://www.cmmedia.es/programas/tv/cosecha-propia/videos/31351" title="Cosecha propia">Cosecha propia</a></h3>
    <p>Venta de Don Quijote</p>
    </article>
    '''

    patron  = '<article[^<]+'
    patron += '<figure[^<]+'
    patron += '<img src="([^"]+)" alt="([^"]+)"[^<]+'
    patron += '<a class="icon-play-circle" href="([^"]+)"[^<]+<span[^<]+</span></a[^<]+'
    patron += '</figure[^<]+'
    patron += '<p class="date"><time>([^<]+)</time></p[^<]+'
    patron += '<h3><a[^<]+</a></h3[^<]+'
    patron += '<p>([^<]+)</p>'

    matches = re.compile(patron,re.DOTALL).findall(data)

    for scrapedthumbnail,scrapedtitle,scrapedurl,fecha,scrapedplot in matches:
        thumbnail = urlparse.urljoin(item.url,scrapedthumbnail)
        url = urlparse.urljoin(item.url,scrapedurl)
        title = scrapedtitle+" "+fecha
        plot = scrapedplot
        aired_date = scrapertools.parse_date(fecha)

        itemlist.append( Item(channel=__channel__, title=title , url=url, plot=plot, thumbnail=thumbnail , fanart=thumbnail , action="play" , server="rtvcm", show = item.title , aired_date=aired_date, folder=False) )

    next_page_url = scrapertools.find_single_match(data,'<a href="([^"]+)" aria-label="Siguiente">')
    if next_page_url!="":
        itemlist.append( Item(channel=__channel__, action="episodios", title=">> Página siguiente" , url=urlparse.urljoin(item.url,next_page_url) ,  folder=True) )    

    return itemlist
コード例 #29
0
ファイル: onceninos.py プロジェクト: pablobart/tvalacarta-web
def detalle_episodio(item):

    data = scrapertools.cache_page(item.url)
    item.title = scrapertools.find_single_match(data,'<p id="title">([^<]+)</p>')
    item.aired_date = scrapertools.parse_date(item.title)
    item.plot = scrapertools.find_single_match(data,'<p id="desp">([^<]+)</p>')

    item.geolocked = "0"    
    try:
        from servers import onceninos as servermodule
        video_urls = servermodule.get_video_url(item.url,page_data=data)
        item.media_url = video_urls[0][1]
    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #30
0
ファイル: adn40.py プロジェクト: nosuko/tvalacarta
def episodios(item):
    logger.info("tvalacarta.channels.adn40 episodios")

    itemlist = []
    
    # Descarga la página
    data = scrapertools.cache_page(item.url)
    json_data = jsontools.load_json(data)    

    for json_item in json_data["video"]:
        title = json_item["title"]
        url = json_item["link"]
        thumbnail = json_item["image"]
        plot = json_item["teaser"]
        aired_date = scrapertools.parse_date(json_item["date"])

        itemlist.append( Item(channel=CHANNELNAME, title=title , url=url, thumbnail=thumbnail, plot=plot, action="play", server="adn40", show=item.show, folder=False) )

    return itemlist
コード例 #31
0
ファイル: rtvcm.py プロジェクト: tvalacarta/tvalacarta
def detalle_episodio(item):
    logger.info("tvalacarta.rtvcm.detalle_episodio")

    data = scrapertools.cache_page(item.url)

    try:
        json_object = jsontools.load_json(data)

        item.geolocked = "0"
        item.aired_date = scrapertools.parse_date(item.title)

        from servers import rtvcm as servermodule
        video_urls = servermodule.get_video_url(item.url)
        item.media_url = video_urls[0][1]

    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #32
0
ファイル: rtvcm.py プロジェクト: erral/tvalacarta
def detalle_episodio(item):
    logger.info("tvalacarta.rtvcm.detalle_episodio")

    data = scrapertools.cache_page(item.url)

    try:
        json_object = jsontools.load_json(data)

        item.geolocked = "0"
        item.aired_date = scrapertools.parse_date(item.title)

        from servers import rtvcm as servermodule
        video_urls = servermodule.get_video_url(item.url)
        item.media_url = video_urls[0][1]

    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #33
0
def detalle_episodio(item):

    data = scrapertools.cache_page(item.url)

    scrapedplot = scrapertools.find_single_match(data,'<span class="title">Resumen del v[^>]+</span>(.*?)</div>')
    item.plot = scrapertools.htmlclean( scrapedplot ).strip()
    item.title = scrapertools.find_single_match(data,'<span class="activo"><strong>([^<]+)</strong></span>')
    item.aired_date = scrapertools.parse_date( item.title )

    item.geolocked = "0"
    
    try:
        from servers import aragontv as servermodule
        video_urls = servermodule.get_video_url(item.url)
        item.media_url = video_urls[0][1]
    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #34
0
ファイル: laredcl.py プロジェクト: nosuko/tvalacarta
def detalle_episodio(item):

    data = scrapertools.cache_page(item.url)

    item.plot = scrapertools.htmlclean(scrapertools.find_single_match(data,'<meta content="([^"]+)" itemprop="description')).strip()
    item.thumbnail = scrapertools.find_single_match(data,'<meta content="([^"]+)" itemprop="thumbnailUrl')

    #<meta content="miércoles, 16 de septiembre de 2015 3:30" itemprop="datePublished"
    scrapeddate = scrapertools.find_single_match(data,'<meta content="([^"]+)" itemprop="datePublished')

    item.aired_date = scrapertools.parse_date(scrapeddate)

    item.geolocked = "0"

    media_item = play(item)
    try:
        item.media_url = media_item[0].url.replace("\\","/")
    except:
        import traceback
        print traceback.format_exc()
        item.media_url = ""

    return item
コード例 #35
0
def episodios_bloque_izquierdo(item):
    logger.info("extremaduratv.episodios_bloque_izquierdo")
    itemlist = []

    # Descarga la página
    data = scrapertools.cachePage(item.url)
    data = scrapertools.get_match(data,'<div class="contenedor-izq(.*?)<div class="contenedor-der')

    patron  = '<li class="views-row[^<]+'
    patron += '<div class="views-field views-field-title"[^<]+'
    patron += '<span class="field-content"[^<]+'
    patron += '<a href="([^"]+)">([^<]+)</a>'

    matches = re.findall(patron,data,re.DOTALL)

    for url,titulo in matches:
        scrapedtitle = titulo.strip()
        scrapedurl = urlparse.urljoin(item.url,url)
        scrapedthumbnail = ""
        scrapedplot = ""

        # Trata de sacar la fecha de emisión del título
        aired_date = scrapertools.parse_date(scrapedtitle)

        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")

        itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="play" , server="extremaduratv" , url=scrapedurl, thumbnail = scrapedthumbnail, show=item.show, aired_date=aired_date, folder=False) )

    #<li class="pager-next last"><a href="/alacarta/tv/programas/informativos/97/extremadura-noticias-1?page=1" 
    patron = '<li class="pager-next[^<]+<a href="([^"]+)"'
    matches = re.findall(patron,data,re.DOTALL)

    for url in matches:
        scrapedurl = urlparse.urljoin(item.url,url)
        itemlist.append( Item(channel=CHANNELNAME, title=">> Página siguiente" , action="episodios_bloque_izquierdo" , url=scrapedurl, show=item.show, extra=item.extra) )

    return itemlist
コード例 #36
0
ファイル: extremaduratv.py プロジェクト: neopack1/tvalacarta
def episodios(item):
    logger.info("tvalacarta.channels.extremaduratv.episodios")
    itemlist = []

    # Descarga la página
    data = scrapertools.cachePage(item.url)

    # En la primera página debe parsear los destacados
    if "?page" not in item.url:
        patron = '<div class="ipost clearfix">(.*?<li><i class="icon-calendar3"></i[^<]+<span class="date-display-single">[^<]+</span>)'
        matches = re.findall(patron, data, re.DOTALL)
        logger.info("matches=" + repr(matches))

        for match in matches:

            title = scrapertools.find_single_match(
                match, '<h3[^>]+>([^<]+)</h3>').strip()
            url = urlparse.urljoin(
                item.url,
                scrapertools.find_single_match(match, '<a href="([^"]+)"'))
            thumbnail = urlparse.urljoin(
                item.url,
                scrapertools.find_single_match(
                    match, '<img class="image_fade" src="([^"]+)"'))
            plot = ""
            aired_date = scrapertools.find_single_match(
                match, '<span class="date-display-single">([^<]+)</span>')
            aired_date = scrapertools.parse_date(aired_date).strip()
            if aired_date == "":
                aired_date = scrapertools.parse_date(title).strip()

            if title != "":
                itemlist.append(
                    Item(channel=CHANNELNAME,
                         title=title,
                         action="play",
                         server="extremaduratv",
                         plot=plot,
                         url=url,
                         thumbnail=thumbnail,
                         fanart=thumbnail,
                         show=item.show,
                         aired_date=aired_date,
                         view="videos",
                         folder=False))

    patron = '<div class="col-md-4 col-sm-4 col-xs-6">(.*?<li><i class="icon-calendar3"></i[^<]+<span class="date-display-single">[^<]+</span>)'
    matches = re.findall(patron, data, re.DOTALL)

    for match in matches:

        title = scrapertools.find_single_match(
            match, '<h4[^>]+>([^<]+)</h4>').strip()
        url = urlparse.urljoin(
            item.url, scrapertools.find_single_match(match,
                                                     '<a href="([^"]+)"'))
        thumbnail = urlparse.urljoin(
            item.url,
            scrapertools.find_single_match(
                match, '<img class="image_fade" src="([^"]+)"'))
        plot = ""
        aired_date = scrapertools.find_single_match(
            match, '<span class="date-display-single">([^<]+)</span>')
        aired_date = scrapertools.parse_date(aired_date).strip()
        if aired_date == "":
            aired_date = scrapertools.parse_date(title).strip()

        if title != "":
            itemlist.append(
                Item(channel=CHANNELNAME,
                     title=title,
                     action="play",
                     server="extremaduratv",
                     plot=plot,
                     url=url,
                     thumbnail=thumbnail,
                     fanart=thumbnail,
                     show=item.show,
                     aired_date=aired_date,
                     view="videos",
                     folder=False))

    if len(itemlist) > 0:
        next_page_url = scrapertools.find_single_match(
            data, '<li class="pager-next"><a title="[^"]+" href="([^"]+)"')
        next_page_url = urlparse.urljoin(item.url, next_page_url)
        next_page_item = Item(channel=CHANNELNAME,
                              title=">> Página siguiente",
                              action="episodios",
                              url=next_page_url)
        itemlist.append(next_page_item)

    return itemlist
コード例 #37
0
def episodios(item):
    logger.info("tvalacarta.channels.dwspan episodios")
    itemlist = []

    #
    '''
    <div class="col1">

    <div class="news searchres hov">
    <a href="/es/life-links-readytofight-listos-para-pelear/av-19224025">
    <div class="teaserImg tv">
    <img border="0" width="220" height="124" src="/image/18378218_301.jpg" title="Life Links - #readytofight: Listos para pelear" alt="default" /> </div>
    <h2>Life Links - #readytofight: Listos para pelear
    <span class="date">30.04.2016
    | 26:06 Minutos
    </span>
    <span class='icon tv'></span> </h2>
    <p>Un imán, un exsalafista, un ex marine de EE. UU. A todos ellos les une una meta: luchar contra el extremismo y “Estado Islámico”.</p>
    </a>
    </div>
    </div>
    '''
    if "pagenumber=" in item.url:
        data_url = item.url
    else:
        data = scrapertools.cache_page(item.url)
        # http://www.dw.com/es/multimedia/todos-los-contenidos/s-100838?type=18&programs=15535663
        # http://www.dw.com/mediafilter/research?lang=es&type=18&programs=15535663&sort=date&results=32&showteasers=true&pagenumber=1
        program_id = scrapertools.find_single_match(
            data,
            '<a href="http://www.dw.com/es/multimedia/todos-los-contenidos/s-100838.type=18&programs=([^"]+)"'
        )
        data_url = "http://www.dw.com/mediafilter/research?lang=es&type=18&programs=" + program_id + "&sort=date&results=32&showteasers=true&pagenumber=1"

    data = scrapertools.cache_page(data_url)
    pattern = '<div class="col1"[^<]+'
    pattern += '<div class="news searchres hov"[^<]+'
    pattern += '<a href="([^"]+)"[^<]+'
    pattern += '<div class="teaserImg tv"[^<]+'
    pattern += '<img.*?src="([^"]+)"[^<]+</div>[^<]+'
    pattern += '<h2>([^<]+)'
    pattern += '<span class="date">(\d+\.\d+\.\d+)\s+\|\s+(\d+\:\d+)[^<]+'
    pattern += '</span>[^<]+'
    pattern += '<span[^<]+</span[^<]+</h2[^<]+'
    pattern += '<p>([^<]+)</p>'
    matches = re.compile(pattern, re.DOTALL).findall(data)
    logger.info(repr(matches))

    for scrapedurl, scrapedthumbnail, scrapedtitle, scrapeddate, duration, scrapedplot in matches:
        title = scrapedtitle.strip()
        thumbnail = urlparse.urljoin(item.url, scrapedthumbnail)
        url = urlparse.urljoin(item.url, scrapedurl.strip())
        plot = scrapedplot
        aired_date = scrapertools.parse_date(scrapeddate)

        # Appends a new item to the xbmc item list
        itemlist.append(
            Item(channel=CHANNELNAME,
                 title=title,
                 action="play",
                 server="dwspan",
                 url=url,
                 thumbnail=thumbnail,
                 fanart=thumbnail,
                 plot=plot,
                 aired_date=aired_date,
                 duration=duration,
                 show=item.show,
                 view="videos",
                 folder=False))

    if len(itemlist) > 0:
        current_page = scrapertools.find_single_match(data_url,
                                                      "pagenumber=(\d+)")
        logger.info("current_page=" + current_page)
        next_page = str(int(current_page) + 1)
        logger.info("next_page=" + next_page)
        next_page_url = data_url.replace("pagenumber=" + current_page,
                                         "pagenumber=" + next_page)
        logger.info("next_page_url=" + next_page_url)

        itemlist.append(
            Item(channel=CHANNELNAME,
                 title=">> Página siguiente",
                 action="episodios",
                 url=next_page_url,
                 show=item.show))

    return itemlist
コード例 #38
0
def episodios(item):
    logger.info("tvalacarta.channels.apunt episodios")

    itemlist = []

    # Descarga la página
    data = scrapertools.cache_page(item.url)

    # Parse
    '''
    <div class="module" id="video-19425">
    <a href="/va/a-la-carta/programes/vist-en-tv/el-mati-a-punt/05-12-2018-el-mati-a-punt" class="photo">
    <div class="imgcontainer">
    <img src="https://secure-cf-c.ooyala.com/VrMHByZzE6jOLo7c-cmV5HkBMt1stm_T/3Gduepif0T1UGY8H4xMDoxOjA4MTsiGN" alt="" />
    </div>
    <span class='time'>03:01:30</span>
    </a><span class="category" style="background-color:#008CD6; color:#FFFFFF " >A la carta</span><a href="/va/a-la-carta/programes/vist-en-tv/el-mati-a-punt" title="El matí À Punt" class="inherit"><span>El matí À Punt</span></a><p><a class="itemTit" href="/va/a-la-carta/programes/vist-en-tv/el-mati-a-punt/05-12-2018-el-mati-a-punt" videoid="video-19425">05.12.2018 | El Matí À Punt</a></p> 
    <p title="Programa complet d'El Matí À Punt del dimecres 5 de desembre de 2018.">Programa complet d'El Matí À Punt del dimecres 5 de desembre de 2018.</p>
    <ul><li><a href="#"><span class="icon fa-heart-o" videoid="19425"></span></a></li> <li><a href="#">
    <span  newtitle="05.12.2018 | El Matí À Punt" destiny ="https://www.apuntmedia.es/va/a-la-carta/programes/vist-en-tv/el-mati-a-punt/05-12-2018-el-mati-a-punt" class="icon fa-share-alt"></span></a></li></ul>
    </div>
    '''
    patron = '<div class="module" id="video-[^<]+'
    patron += '<a href="([^"]+)"[^<]+'
    patron += '<div class="imgcontainer"[^<]+'
    patron += '<img src="([^"]+)"[^<]+'
    patron += '</div[^<]+'
    patron += "<span class='time'>([^<]+)</span.*?"
    patron += '<p title=[^>]+>(.*?)</p>.*?'
    patron += 'newtitle="(.*?)" destiny'

    matches = scrapertools.find_multiple_matches(data, patron)

    for scraped_url, thumbnail, duration, scraped_plot, title in matches:
        url = urlparse.urljoin(item.url, scraped_url)
        plot = scrapertools.htmlclean(scraped_plot)
        aired_date = scrapertools.parse_date(title)
        itemlist.append(
            Item(channel=CHANNELNAME,
                 action="play",
                 server="apunt",
                 title=title,
                 plot=plot,
                 show=item.show,
                 url=url,
                 thumbnail=thumbnail,
                 duration=duration,
                 aired_date=aired_date,
                 folder=False))

    next_page_url = scrapertools.find_single_match(
        data, '<a class="flechapaginado" href="([^"]+)"')
    if next_page_url != "":
        next_page_url = urlparse.urljoin(item.url, next_page_url)
        if next_page_url != item.url and not next_page_url.endswith("/0"):
            itemlist.append(
                Item(channel=CHANNELNAME,
                     title=">> Página siguiente",
                     url=next_page_url,
                     action="episodios",
                     show=item.show,
                     folder=True))

    return itemlist
コード例 #39
0
ファイル: aragontv.py プロジェクト: YingYangTV/yingyang
def episodios(item,data=""):
    logger.info("tvalacarta.channels.aragontv episodios")
    logger.info("tvalacarta.channels.aragontv programa [item="+item.tostring()+" show="+item.show+"]")
    itemlist = []

    # Descarga la página
    if data=="":
        data = scrapertools.cachePage(item.url)

    # Extrae las entradas
    '''
    <div id="idv_1186" class="vid bloque">
    <div class="imagen">
    <img title="Malanquilla y Camarillas" alt="Malanquilla y Camarillas" src="/_archivos/imagenes/galeria_5738_thumb.jpg" />			        
    <div class="play">
    <a href="/programas/pequeños-pero-no-invisibles/malanquilla-y-camarillas-27122011-2131" title="Ver video" rel="videoFacebox"><span>Ver video</span></a>
    </div>
    </div>
    <h2><a href="/programas/pequeños-pero-no-invisibles/malanquilla-y-camarillas-27122011-2131" title="Malanquilla y Camarillas" rel="videoFacebox">Malanquilla y Camarillas</a></h2>
    
    <!--<br><a href="/programas/pequeños-pero-no-invisibles/malanquilla-y-camarillas-27122011-2131" title="Malanquilla y Camarillas" rel="videoFacebox2">Malanquilla y Camarillas</a> -->
    <div class="social">
    <span class="fecha">
    27/12/2011 21:31 h<br />
    Duración: 00:49:38
    </span>
    </div>
    </div>
    '''
    patron  = '<div id="[^"]+" class="vid bloque[^<]+'
    patron += '<div class="imagen[^<]+'
    patron += '<img title="[^"]+" alt="([^"]+)" src="([^"]+)"[^<]+'
    patron += '<div class="play">[^<]+'
    patron += '<a href="([^"]+)".*?'
    patron += '<span class="fecha">(.*?)</span>'

    matches = re.compile(patron,re.DOTALL).findall(data)
    #if DEBUG: scrapertools.printMatches(matches)

    itemlist = []
    for match in matches:
        # Interpreta la fecha
        patron_fecha = "\s*([^<]+)<br />\s*Duración\: ([^\s]+)"
        campos_fecha =re.compile(patron_fecha,re.DOTALL).findall(match[3])
        fecha_string = campos_fecha[0][0].strip()
        #import time
        #fecha = time.strptime(fecha_string,"%d/%m/%y %H:%M")
        duracion_string = campos_fecha[0][1].strip()

        aired_date = scrapertools.parse_date(fecha_string)
        duration = duracion_string

        #scrapedtitle = match[0]+" "+fecha.strftime("%d/%m/%y")+" (Duración "+duracion_string+")"
        scrapedtitle = match[0].strip()
        scrapedurl = urlparse.urljoin(item.url,match[2])
        scrapedthumbnail = urlparse.urljoin(item.url,match[1])
        scrapedplot = ""
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"], show=["+item.show+"]")

        # Añade al listado
        itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="play" , server="aragontv" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , show=item.show, aired_date=aired_date, duration=duration, folder=False) )

    patron  = "Paginación.*?<span class='activo'>[^<]+</span>  \|  <a href='([^']+)'"
    matches = re.compile(patron,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)
    if len(matches)>0:
        pageitem = Item(channel=CHANNELNAME, title=">> Página siguiente" , action="episodios" , url=urlparse.urljoin(item.url,matches[0]), thumbnail=item.thumbnail, plot=item.plot , show=item.show, folder=True)
        itemlist.append( pageitem )

    return itemlist
コード例 #40
0
ファイル: aragontv.py プロジェクト: sal666/tvalacarta
def episodios(item, data=""):
    logger.info("tvalacarta.channels.aragontv episodios")
    logger.info("tvalacarta.channels.aragontv programa [item=" +
                item.tostring() + " show=" + item.show + "]")
    itemlist = []

    # Descarga la página
    if data == "":
        data = scrapertools.cachePage(item.url)

    # Extrae las entradas
    '''
    <div id="idv_1186" class="vid bloque">
    <div class="imagen">
    <img title="Malanquilla y Camarillas" alt="Malanquilla y Camarillas" src="/_archivos/imagenes/galeria_5738_thumb.jpg" />			        
    <div class="play">
    <a href="/programas/pequeños-pero-no-invisibles/malanquilla-y-camarillas-27122011-2131" title="Ver video" rel="videoFacebox"><span>Ver video</span></a>
    </div>
    </div>
    <h2><a href="/programas/pequeños-pero-no-invisibles/malanquilla-y-camarillas-27122011-2131" title="Malanquilla y Camarillas" rel="videoFacebox">Malanquilla y Camarillas</a></h2>
    
    <!--<br><a href="/programas/pequeños-pero-no-invisibles/malanquilla-y-camarillas-27122011-2131" title="Malanquilla y Camarillas" rel="videoFacebox2">Malanquilla y Camarillas</a> -->
    <div class="social">
    <span class="fecha">
    27/12/2011 21:31 h<br />
    Duración: 00:49:38
    </span>
    </div>
    </div>
    '''
    patron = '<div id="[^"]+" class="vid bloque[^<]+'
    patron += '<div class="imagen[^<]+'
    patron += '<img title="[^"]+" alt="([^"]+)" src="([^"]+)"[^<]+'
    patron += '<div class="play">[^<]+'
    patron += '<a href="([^"]+)".*?'
    patron += '<span class="fecha">(.*?)</span>'

    matches = re.compile(patron, re.DOTALL).findall(data)
    #if DEBUG: scrapertools.printMatches(matches)

    itemlist = []
    for match in matches:
        # Interpreta la fecha
        patron_fecha = "\s*([^<]+)<br />\s*Duración\: ([^\s]+)"
        campos_fecha = re.compile(patron_fecha, re.DOTALL).findall(match[3])
        fecha_string = campos_fecha[0][0].strip()
        #import time
        #fecha = time.strptime(fecha_string,"%d/%m/%y %H:%M")
        duracion_string = campos_fecha[0][1].strip()

        aired_date = scrapertools.parse_date(fecha_string)
        duration = duracion_string

        #scrapedtitle = match[0]+" "+fecha.strftime("%d/%m/%y")+" (Duración "+duracion_string+")"
        scrapedtitle = match[0].strip()

        if "informativos" in item.url:
            scrapedtitle = scrapedtitle + " (" + aired_date + ")"

        scrapedurl = urlparse.urljoin(item.url, match[2])
        scrapedthumbnail = urlparse.urljoin(item.url, match[1])
        scrapedplot = ""
        if (DEBUG):
            logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl +
                        "], thumbnail=[" + scrapedthumbnail + "], show=[" +
                        item.show + "]")

        # Añade al listado
        itemlist.append(
            Item(channel=CHANNELNAME,
                 title=scrapedtitle,
                 action="play",
                 server="aragontv",
                 url=scrapedurl,
                 thumbnail=scrapedthumbnail,
                 plot=scrapedplot,
                 show=item.show,
                 aired_date=aired_date,
                 duration=duration,
                 folder=False))

    patron = "Paginación.*?<span class='activo'>[^<]+</span>  \|  <a href='([^']+)'"
    matches = re.compile(patron, re.DOTALL).findall(data)
    scrapertools.printMatches(matches)
    if len(matches) > 0:
        pageitem = Item(channel=CHANNELNAME,
                        title=">> Página siguiente",
                        action="episodios",
                        url=urlparse.urljoin(item.url, matches[0]),
                        thumbnail=item.thumbnail,
                        plot=item.plot,
                        show=item.show,
                        folder=True,
                        view="videos")
        itemlist.append(pageitem)

    return itemlist
コード例 #41
0
ファイル: cctvspan.py プロジェクト: igurrutxaga/tvalacarta
def episodios(item):
    logger.info("tvalacarta.cctvspan episodios")    
    itemlist = []

    '''
    <div class="text_lt">
    <a guid="40f061633e614ffe829ab3df91279b44" style="cursor:pointer;" onclick="loadvideo('40f061633e614ffe829ab3df91279b44')"><img src="http://p2.img.cctvpic.com/photoworkspace/2015/03/15/2015031515100374890.bmp" width="96" height="75" class="l" /></a>
    <h3><a onclick="loadvideo('40f061633e614ffe829ab3df91279b44')" style="cursor:pointer;">EXTRANJEROS EN CHINA 03/15/2015 Liz Vargas, Profesora de la Universidad de Estudios Internacionales de Beijing</a></h3>
    '''
    # Descarga la pȧina
    data = scrapertools.cachePage(item.url)
    patron  = '<div class="text_lt"[^<]+'
    patron += '<a guid="([^"]+)"[^<]+<img src="([^"]+)"[^<]+</a[^<]+'
    patron += '<h3><a[^>]+>([^<]+)</a>'

    matches = re.compile(patron,re.DOTALL).findall(data)
    if DEBUG: scrapertools.printMatches(matches)
    
    for guid,scrapedthumbnail,scrapedtitle in matches:

        title = scrapertools.htmlclean(scrapedtitle)
        url = guid
        thumbnail = scrapedthumbnail
        aired_date = scrapertools.parse_date(scrapedtitle,"mdy")
        itemlist.append( Item(channel=__channel__, action="play", server="cntv", title=title , url=url , thumbnail=thumbnail, show=item.show, aired_date=aired_date, folder=False) )

    '''
    <span class="text_lt">
    <h3><a href="http://cctv.cntv.cn/2015/03/31/VIDE1427774161717552.shtml" target="_blank">ECONOMÍA  AL DÍA 03/31/2015 11:00</a></h3>
    '''
    patron  = '<span class="text_lt"[^<]+'
    patron += '<h3><a href="([^"]+)"[^>]+>([^<]+)</a>'

    matches = re.compile(patron,re.DOTALL).findall(data)
    if DEBUG: scrapertools.printMatches(matches)
    
    for scrapedurl,scrapedtitle in matches:

        title = scrapertools.htmlclean(scrapedtitle)
        url = scrapedurl
        thumbnail = ""
        aired_date = scrapertools.parse_date(scrapedtitle,"mdy")
        itemlist.append( Item(channel=__channel__, action="play", server="cntv", title=title , url=url , thumbnail=thumbnail, show=item.show, aired_date=aired_date, folder=False) )


    '''
    <li>
    <a href="http://cctv.cntv.cn/2015/08/21/VIDE1440121441066290.shtml" target="_blank">
    <img src="http://p1.img.cctvpic.com/photoworkspace/2015/08/21/2015082114203738064.jpg" width="151" height="110" />
    </a>
    <div class="tp1"><a href="http://cctv.cntv.cn/2015/08/21/VIDE1440121441066290.shtml" target="_blank">
    </a>
    </div>
    <div class="tp2">
    <a href="http://cctv.cntv.cn/2015/08/21/VIDE1440121441066290.shtml" target="_blank">
    NIHAO CHINA 08/21/2015 Viajando y Aprendiendo Chino-Palabras y frases sobre mobiliarios
    </a></div></li>
    '''
    patron  = '<li[^<]+'
    patron += '<a href="([^"]+)"[^<]+'
    patron += '<img src="([^"]+)"[^<]+'
    patron += '</a[^<]+'
    patron += '<div class="tp1"><a[^<]+'
    patron += '</a[^<]+'
    patron += '</div[^<]+'
    patron += '<div class="tp2"[^<]+'
    patron += '<a[^>]+>([^<]+)</a>'

    matches = re.compile(patron,re.DOTALL).findall(data)
    if DEBUG: scrapertools.printMatches(matches)
    
    for scrapedurl,scrapedthumbnail,scrapedtitle in matches:

        title = scrapertools.htmlclean(scrapedtitle)
        url = scrapedurl
        thumbnail = scrapedthumbnail
        aired_date = scrapertools.parse_date(scrapedtitle,"mdy")
        itemlist.append( Item(channel=__channel__, action="play", server="cntv", title=title , url=url , thumbnail=thumbnail, show=item.show, aired_date=aired_date, folder=False) )

    # Prueba a ver si es la página de una serie
    if len(itemlist)==0:
        itemlist = episodios_serie(item,data)

    return itemlist
コード例 #42
0
ファイル: cctvspan.py プロジェクト: pablobart/tvalacarta-web
def episodios(item):
    logger.info("tvalacarta.cctvspan episodios")
    itemlist = []
    '''
    <div class="text_lt">
    <a guid="40f061633e614ffe829ab3df91279b44" style="cursor:pointer;" onclick="loadvideo('40f061633e614ffe829ab3df91279b44')"><img src="http://p2.img.cctvpic.com/photoworkspace/2015/03/15/2015031515100374890.bmp" width="96" height="75" class="l" /></a>
    <h3><a onclick="loadvideo('40f061633e614ffe829ab3df91279b44')" style="cursor:pointer;">EXTRANJEROS EN CHINA 03/15/2015 Liz Vargas, Profesora de la Universidad de Estudios Internacionales de Beijing</a></h3>
    '''
    # Descarga la pȧina
    data = scrapertools.cachePage(item.url)
    patron = '<div class="text_lt"[^<]+'
    patron += '<a guid="([^"]+)"[^<]+<img src="([^"]+)"[^<]+</a[^<]+'
    patron += '<h3><a[^>]+>([^<]+)</a>'

    matches = re.compile(patron, re.DOTALL).findall(data)
    if DEBUG: scrapertools.printMatches(matches)

    for guid, scrapedthumbnail, scrapedtitle in matches:

        title = scrapertools.htmlclean(scrapedtitle)
        url = guid
        thumbnail = scrapedthumbnail
        aired_date = scrapertools.parse_date(scrapedtitle, "mdy")
        itemlist.append(
            Item(channel=__channel__,
                 action="play",
                 server="cntv",
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 show=item.show,
                 aired_date=aired_date,
                 folder=False))
    '''
    <span class="text_lt">
    <h3><a href="http://cctv.cntv.cn/2015/03/31/VIDE1427774161717552.shtml" target="_blank">ECONOMÍA  AL DÍA 03/31/2015 11:00</a></h3>
    '''
    patron = '<span class="text_lt"[^<]+'
    patron += '<h3><a href="([^"]+)"[^>]+>([^<]+)</a>'

    matches = re.compile(patron, re.DOTALL).findall(data)
    if DEBUG: scrapertools.printMatches(matches)

    for scrapedurl, scrapedtitle in matches:

        title = scrapertools.htmlclean(scrapedtitle)
        url = scrapedurl
        thumbnail = ""
        aired_date = scrapertools.parse_date(scrapedtitle, "mdy")
        itemlist.append(
            Item(channel=__channel__,
                 action="play",
                 server="cntv",
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 show=item.show,
                 aired_date=aired_date,
                 folder=False))
    '''
    <li>
    <a href="http://cctv.cntv.cn/2015/08/21/VIDE1440121441066290.shtml" target="_blank">
    <img src="http://p1.img.cctvpic.com/photoworkspace/2015/08/21/2015082114203738064.jpg" width="151" height="110" />
    </a>
    <div class="tp1"><a href="http://cctv.cntv.cn/2015/08/21/VIDE1440121441066290.shtml" target="_blank">
    </a>
    </div>
    <div class="tp2">
    <a href="http://cctv.cntv.cn/2015/08/21/VIDE1440121441066290.shtml" target="_blank">
    NIHAO CHINA 08/21/2015 Viajando y Aprendiendo Chino-Palabras y frases sobre mobiliarios
    </a></div></li>
    '''
    patron = '<li[^<]+'
    patron += '<a href="([^"]+)"[^<]+'
    patron += '<img src="([^"]+)"[^<]+'
    patron += '</a[^<]+'
    patron += '<div class="tp1"><a[^<]+'
    patron += '</a[^<]+'
    patron += '</div[^<]+'
    patron += '<div class="tp2"[^<]+'
    patron += '<a[^>]+>([^<]+)</a>'

    matches = re.compile(patron, re.DOTALL).findall(data)
    if DEBUG: scrapertools.printMatches(matches)

    for scrapedurl, scrapedthumbnail, scrapedtitle in matches:

        title = scrapertools.htmlclean(scrapedtitle)
        url = scrapedurl
        thumbnail = scrapedthumbnail
        aired_date = scrapertools.parse_date(scrapedtitle, "mdy")
        itemlist.append(
            Item(channel=__channel__,
                 action="play",
                 server="cntv",
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 show=item.show,
                 aired_date=aired_date,
                 folder=False))

    # Prueba a ver si es la página de una serie
    if len(itemlist) == 0:
        itemlist = episodios_serie(item, data)

    return itemlist
コード例 #43
0
ファイル: rtvcm.py プロジェクト: tvalacarta/tvalacarta
def episodios(item):
    logger.info("tvalacarta.rtvcm.episodios")

    itemlist = []

    # Descarga la página
    prueba_urls = []
    if "?pagina=" in item.url:
        prueba_urls.append(item.url)
    else:
        prueba_urls.append(item.url + "/programas-completos")
        prueba_urls.append(item.url + "/videos")

    for prueba_url in prueba_urls:
        data = scrapertools.cache_page(prueba_url)
        logger.info("tvalacarta.rtvcm.episodios data=" + data)
        '''
        <article>
        <figure>
        <img src="http://api.rtvcm.webtv.flumotion.com/videos/30531/poster.jpg?w=f720f390" alt="Promo A tu vera Mini">
        <a class="icon-play-circle" href="http://www.cmmedia.es/programas/tv/a-tu-vera//programas-completos/30531?pagina=2" title="Promo A tu vera Mini">
        <span class="sr-only">http://api.rtvcm.webtv.flumotion.com/videos/30531/poster.jpg?w=f720f390</span></a>
        </figure>
        <p class="date"><time></time></p>
        <h3><a href="http://www.cmmedia.es/programas/tv/a-tu-vera//programas-completos/30531?pagina=2" title="Promo A tu vera Mini">Promo A tu vera Mini</a></h3>
        <p>La novena edición de A Tu Vera Mini ya está en marcha. Participa en los casting llamando al 905 447 366</p>
        </article>
        '''

        patron = '<article[^<]+'
        patron += '<figure[^<]+'
        patron += '<img src="([^"]+)" alt="([^"]+)"[^<]+'
        patron += '<a class="icon-play-circle" href="([^"]+)"[^<]+'
        patron += '<span[^<]+</span></a[^<]+'
        patron += '</figure[^<]+'
        patron += '<p class="date"><time>([^<]*)</time></p[^<]+'
        patron += '<h3><a[^<]+</a></h3[^<]+'
        patron += '<p>(.*?)</p>'

        matches = re.compile(patron, re.DOTALL).findall(data)

        for scrapedthumbnail, scrapedtitle, scrapedurl, fecha, scrapedplot in matches:
            thumbnail = urlparse.urljoin(item.url, scrapedthumbnail)
            url = urlparse.urljoin(item.url, scrapedurl)
            title = scrapedtitle + " " + fecha
            plot = scrapedplot
            aired_date = scrapertools.parse_date(fecha)

            itemlist.append(
                Item(channel=__channel__,
                     title=title,
                     url=url,
                     plot=plot,
                     thumbnail=thumbnail,
                     fanart=thumbnail,
                     action="play",
                     server="rtvcm",
                     show=item.title,
                     aired_date=aired_date,
                     folder=False))

        if len(itemlist) > 0:
            break

    next_page_url = scrapertools.find_single_match(
        data, '<a href="([^"]+)" aria-label="Siguiente">')
    if next_page_url != "":
        itemlist.append(
            Item(channel=__channel__,
                 action="episodios",
                 title=">> Página siguiente",
                 url=urlparse.urljoin(item.url, next_page_url),
                 folder=True))

    return itemlist
コード例 #44
0
ファイル: telemundo.py プロジェクト: nosuko/tvalacarta
def episodios(item):
    logger.info("tvalacarta.channels.telemundo episodios")
    itemlist = []

    # Descarga la página
    data = scrapertools.cachePage(item.url)

    if "video_feed" in item.url:
        json_data = jsontools.load_json(data)
        data = json_data["slide"]
        next_page_url = json_data["nextUrl"]
    else:
        next_page_url = scrapertools.find_single_match(data,'data-feed-url-next="([^"]+)"')

    '''
    <div class="media--SHOW-BRAND-VIDEO media--active">
    <div class="media--media">
    <a href="http://www.telemundo.com/novelas/celia/videos/celia/capitulos/celia-capitulo-final-celia-muere-causa-de-un-tumor-en-el-cerebro-1046906" class="media--play-button media-item--aspect-ratio-300x215">
    <img class="media--cover-image" src="http://www.telemundo.com/sites/nbcutelemundo/files/styles/show_brand_video/public/images/mpx/2016/02/08/160208_2982200_Celia__Capitulo_Final__Celia_muere_a_causa_d.jpg?itok=DdLxQQUV" width="300" height="215" alt="Aymee Nuviola en Celia" title="Aymee Nuviola en Celia" />            </a>
    </div>
    <div class="media--content">
    <h4><a class="media--title" href="http://www.telemundo.com/novelas/celia/videos/celia/capitulos/celia-capitulo-final-celia-muere-causa-de-un-tumor-en-el-cerebro-1046906">Capítulo Final:Celia muere a causa de un tumor en el cerebro</a></h4>
    <p class="media--air-date">Emitido: lunes 02/8/16</p>
    <div class="media--description">
    <h3><a href="http://www.telemundo.com/novelas/celia/videos/celia/capitulos/celia-capitulo-final-celia-muere-causa-de-un-tumor-en-el-cerebro-1046906" class="media--link">Después de las complicaciones de salud que venia presentado, Celia parte a mejor vida dejando un legado de amor a todos los latinos en el mundo. </a></h3>
    </div>
    </div>
    </div>
    '''

    '''
    <section class="video-carousel--BRAND" data-feed-url-prev="" data-feed-url-next="http://www.telemundo.com/node/947661/video_feed?group=0&sub=0&vid=1046906&page=0%2C1">

    http://www.telemundo.com/node/947661/video_feed?group=0&sub=0&vid=1046906&page=0%2C0
    -> slide = 
    -> next_url = 
    '''

    patron  = '<div class="media--SHOW-BRAND-VIDEO[^<]+'
    patron += '<div class="media--media"[^<]+'
    patron += '<a href="([^"]+)"[^<]+'
    patron += '<img class="media--cover-image" src="([^"]+)"[^<]+</a[^<]+'
    patron += '</div[^<]+'
    patron += '<div class="media--content"[^<]+'
    patron += '<h4><a class="media--title" href="[^"]+">([^<]+)</a></h4[^<]+'
    patron += '<p class="media--air-date">([^<]+)</p[^<]+'
    patron += '<div class="media--description"[^<]+'
    patron += '<h3><a href="[^"]+" class="media--link">([^<]+)</a></h3>'

    matches = re.compile(patron,re.DOTALL).findall(data)

    for scrapedurl,scrapedthumbnail,scrapedtitle,aired_date,scrapedplot in matches:
        title = scrapedtitle
        url = scrapedurl
        thumbnail = scrapedthumbnail
        plot = scrapedplot
        aired_date = scrapertools.parse_date(aired_date,formato="mdy")
        itemlist.append( Item(channel=__channel__, action="partes", title=title, url=url, thumbnail=thumbnail, fanart=thumbnail, plot=plot, aired_date=aired_date, show=item.show, view="videos", folder=True))


    if next_page_url!="":
        itemlist.append( Item(channel=__channel__, title=">> Página siguiente" , url=urlparse.urljoin(item.url,next_page_url), action="episodios", show=item.show, folder=True) )


    return itemlist
コード例 #45
0
ファイル: sieterm.py プロジェクト: YingYangTV/yingyang
def episodios(item, load_all_pages=False):
    logger.info("tvalacarta.channels.sieterm episodios")

    # Descarga la página
    data = scrapertools.cachePage(item.url)
    #logger.info(data)

    # Extrae los vídeos
    '''
    <dt class="alacarta-video"><a href="http://..." title="...">Murcianos por el mundo: Cracovia</a> · 12/05/2010 · (5411 veces visto)</dt>
    <dd style="height:100%; overflow:hidden">
    <a href="http://www.7rm.es/servlet/rtrm.servlets.ServletLink2?METHOD=DETALLEALACARTA&amp;sit=c,6,ofs,10&amp;serv=BlogPortal2&amp;orden=1&amp;idCarta=40&amp;mId=4182&amp;autostart=TV" title="Ver v&iacute;deo">
    <img src="http://mediateca.regmurcia.com/MediatecaCRM/ServletLink?METHOD=MEDIATECA&amp;accion=imagen&amp;id=4182" alt="Murcianos por el mundo: Cracovia" title="Murcianos por el mundo: Cracovia" style="width:95px" />
    </a>
    Esta semana nos desplazamos al sur de Polonia, a Cracovia y Wroclaw, para conocer cómo viven seis murcianos en una de las ciudades más importantes de Polonia y Patrimonio de la Humanidad.
    <a href="http://ficheros.7rm.es:3025/Video/4/1/4182_BAJA.mp4">
    <img src="/images/bajarArchivo.gif" alt="Descargar Archivo" title="Descargar Archivo" style="margin:0;padding:0 5px 0 0;vertical-align:middle;border:none" />
    </a>
    </dd>
    '''
  
    '''
    <dt class="alacarta-video"><a href="http://www.7rm.es/servlet/rtrm.servlets.ServletLink2?METHOD=DETALLEALACARTA&amp;sit=c,6,ofs,0&amp;serv=BlogPortal2&amp;orden=2&amp;idCarta=36&amp;mId=3214&amp;autostart=TV" title="Ver v&iacute;deo">De la tierra al mar</a> · 22/12/2009 · (1072 veces visto)</dt>
    <dd style="height:100%; overflow:hidden">
    <a href="http://www.7rm.es/servlet/rtrm.servlets.ServletLink2?METHOD=DETALLEALACARTA&amp;sit=c,6,ofs,0&amp;serv=BlogPortal2&amp;orden=2&amp;idCarta=36&amp;mId=3214&amp;autostart=TV" title="Ver v&iacute;deo">
    <img src="http://mediateca.regmurcia.com/MediatecaCRM/ServletLink?METHOD=MEDIATECA&amp;accion=imagen&amp;id=3214" alt="De la tierra al mar" title="De la tierra al mar" style="width:95px" />
    </a>
    En este programa conocemos a Plácido, joven agricultor que nos mostrará la mala situación en que se encuentra el sector, informamos de la campaña 'Dale vida a tu árbol', asistimos a la presentación del libro 'Gestión ambiental. Guía fácil para empresas y profesionales', y nos hacemos eco del malestar de nuestros agricultores con la nueva normativa europea en materia de fitosanitarios, que entrará en vigor en junio de 2011.
    <a href="http://ficheros.7rm.es:3025/Video/3/2/3214_BAJA.mp4">
    <img src="/images/bajarArchivo.gif" alt="Descargar Archivo" title="Descargar Archivo" style="margin:0;padding:0 5px 0 0;vertical-align:middle;border:none" />
    </a>
    </dd>
    '''
    patron  = '<dt class="alacarta-video"><a href="([^"]+)" title="[^"]+">([^<]+)</a>.*?([0-9\/]+).*?</dt>[^<]+'
    patron += '<dd style="[^<]+">[^<]+'
    patron += '<a href="[^"]+" title="[^"]+">[^<]+'
    patron += '<img src="([^"]+)"[^<]+'
    patron += '</a>([^<]+)<a href="([^"]+)">'
    matches = re.compile(patron,re.DOTALL).findall(data)
    #scrapertools.printMatches(matches)
    itemlist = []
    for match in matches:
        # Atributos del vídeo
        scrapedtitle = unicode( match[1].strip()+" ("+match[2]+")" , "iso-8859-1" , errors="ignore").encode("utf-8")
        scrapedurl = urlparse.urljoin(item.url,match[5]).replace("&amp;","&")
        scrapedthumbnail = urlparse.urljoin(item.url,match[3]).replace("&amp;","&")
        scrapedplot = unicode( match[4].strip()  , "iso-8859-1" , errors="ignore").encode("utf-8")
        scrapedpage = urlparse.urljoin(item.url,match[0]).replace("&amp;","&")
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], page=["+scrapedpage+"], thumbnail=["+scrapedthumbnail+"]")

        # Trata de sacar la fecha de emisión del título
        aired_date = scrapertools.parse_date(scrapedtitle)
        #logger.info("aired_date="+aired_date)

        # Añade al listado de XBMC
        itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="play" , server="sieterm" , url=scrapedpage, thumbnail=scrapedthumbnail, fanart=scrapedthumbnail, plot=scrapedplot , show = item.show , page=scrapedpage, viewmode="movie_with_plot", aired_date=aired_date, folder=False) )

    # Busca la página siguiente
    next_page_url = scrapertools.find_single_match(data,'<a class="list-siguientes" href="([^"]+)" title="Ver siguientes archivos">')
    if next_page_url!="":
        next_page_url = urlparse.urljoin(item.url,next_page_url)
        next_page_item = Item(channel=CHANNELNAME, title=">> Página siguiente" , action="episodios" , url=next_page_url , show=item.show, folder=True)

        if load_all_pages:
            itemlist.extend(episodios(next_page_item,load_all_pages))
        else:
            itemlist.append( next_page_item )

    return itemlist
コード例 #46
0
ファイル: tvg.py プロジェクト: erral/tvalacarta
def episodios(item, load_all_pages = False):
    logger.info("[tvg.py] episodios")
    itemlist = []

    # Lee la página del programa y extrae el id_programa
    if "/ax/" in item.url:
        headers=[]
        headers.append(["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:17.0) Gecko/20100101 Firefox/17.0"])
        headers.append(["X-Requested-With","XMLHttpRequest"])
        headers.append(["Referer",item.url])
        data = scrapertools.cache_page(item.url, post="", headers=headers)
        data = data.replace("\\n"," ")
        data = data.replace("\\\"","\"")
        data = data.replace("\\/","/")
    else:
        data = scrapertools.cache_page(item.url)
        try:
            id_programa = scrapertools.get_match(data,"initAlaCartaBuscador.(\d+)")
        except:
            id_programa = ""
        
        # Lee la primera página de episodios
        #http://www.crtvg.es/ax/tvgalacartabuscador/programa:33517/pagina:1/seccion:294/titulo:/mes:null/ano:null/temporada:null
        logger.info("[tvg.py] videos - hay programa")
        url = "http://www.crtvg.es/ax/tvgalacartabuscador/programa:"+id_programa+"/pagina:1/seccion:294/titulo:/mes:null/ano:null/temporada:null"
        headers=[]
        headers.append(["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:17.0) Gecko/20100101 Firefox/17.0"])
        headers.append(["X-Requested-With","XMLHttpRequest"])
        headers.append(["Referer",item.url])
        data = scrapertools.cache_page(url, post="", headers=headers)
        data = data.replace("\\n"," ")
        data = data.replace("\\\"","\"")
        data = data.replace("\\/","/")

    #logger.info("data="+data)

    # Extrae los videos
    '''
    <tr>                  
    <td class="a-carta-resultado-titulo">                      
    <a href="\/tvg\/a-carta\/rea-publica-74" title="\u00c1rea p\u00fablica">\u00c1rea p\u00fablica<\/a>                 <\/td>                                                       <td class="a-carta-resultado-tempada">                                           <\/td>                                     <td class="a-carta-resultado-data">                  26\/01\/2016 18:30                 <\/td>                              <\/tr>
    '''

    patron  = '<tr[^<]+'
    patron += '<td class="a-carta-resultado-titulo[^<]+'
    patron += '<a href="([^"]+)"\s+title="([^"]+)".*?'
    patron += '<td class="a-carta-resultado-data">(.*?)</td>'
    matches = re.compile(patron,re.DOTALL).findall(data)
    if DEBUG: scrapertools.printMatches(matches)

    for scrapedurl,scrapedtitle,fecha in matches:
        title = scrapedtitle.strip()
        json_title = jsontools.load_json('{"title":"'+title+'"}')
        title = json_title["title"]
        title = scrapertools.htmlclean(title)+" - "+fecha.strip()

        url = urlparse.urljoin(item.url,scrapedurl)
        thumbnail = ""
        plot = ""
        aired_date = scrapertools.parse_date(fecha)
        if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]")

        # Añade al listado de XBMC
        itemlist.append( Item(channel=CHANNELNAME, title=title , action="play" , server="tvg", url=url, thumbnail=thumbnail, plot=plot , show=item.show , aired_date=aired_date, folder=False) )

    #<a href=\"#\" title=\"Seguinte\" onclick=\"return posteriorpaginaclick(33517, 2, 294)
    patron  = '<a href="\#" title="Seguinte" onclick="return posteriorpaginaclick\((\d+), (\d+), (\d+)'
    matches = re.compile(patron,re.DOTALL).findall(data)
    if DEBUG: scrapertools.printMatches(matches)

    for match in matches:
        scrapedtitle = ">>> Página siguiente"
        #http://www.crtvg.es/ax/tvgalacartabuscador/programa:33517/pagina:2/seccion:294/titulo:/mes:null/ano:null/temporada:null
        scrapedurl = "http://www.crtvg.es/ax/tvgalacartabuscador/programa:%s/pagina:%s/seccion:%s/titulo:/mes:null/ano:null/temporada:null" % (match[0],match[1],match[2])
        scrapedthumbnail = urlparse.urljoin(item.url,match[2])
        scrapedplot = ""
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")

        next_page_item = Item(channel=CHANNELNAME, title=scrapedtitle , action="episodios" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , show=item.show , category = item.category , folder=True)
        if load_all_pages:
            itemlist.extend( episodios(next_page_item, load_all_pages) )
        else:
            itemlist.append( next_page_item )

        break

    return itemlist
コード例 #47
0
ファイル: dwspan.py プロジェクト: nosuko/tvalacarta
def episodios(item):
    logger.info("tvalacarta.channels.dwspan episodios")
    itemlist = []

    # 
    '''
    <div class="col1">

    <div class="news searchres hov">
    <a href="/es/life-links-readytofight-listos-para-pelear/av-19224025">
    <div class="teaserImg tv">
    <img border="0" width="220" height="124" src="/image/18378218_301.jpg" title="Life Links - #readytofight: Listos para pelear" alt="default" /> </div>
    <h2>Life Links - #readytofight: Listos para pelear
    <span class="date">30.04.2016
    | 26:06 Minutos
    </span>
    <span class='icon tv'></span> </h2>
    <p>Un imán, un exsalafista, un ex marine de EE. UU. A todos ellos les une una meta: luchar contra el extremismo y “Estado Islámico”.</p>
    </a>
    </div>
    </div>
    '''
    if "pagenumber=" in item.url:
        data_url = item.url
    else:
        data = scrapertools.cache_page(item.url)
        # http://www.dw.com/es/multimedia/todos-los-contenidos/s-100838?type=18&programs=15535663
        # http://www.dw.com/mediafilter/research?lang=es&type=18&programs=15535663&sort=date&results=32&showteasers=true&pagenumber=1
        program_id = scrapertools.find_single_match(data,'<a href="http://www.dw.com/es/multimedia/todos-los-contenidos/s-100838.type=18&programs=([^"]+)"')
        data_url = "http://www.dw.com/mediafilter/research?lang=es&type=18&programs="+program_id+"&sort=date&results=32&showteasers=true&pagenumber=1"

    data = scrapertools.cache_page(data_url)
    pattern  = '<div class="col1"[^<]+'
    pattern += '<div class="news searchres hov"[^<]+'
    pattern += '<a href="([^"]+)"[^<]+'
    pattern += '<div class="teaserImg tv"[^<]+'
    pattern += '<img.*?src="([^"]+)"[^<]+</div>[^<]+'
    pattern += '<h2>([^<]+)'
    pattern += '<span class="date">(\d+\.\d+\.\d+)\s+\|\s+(\d+\:\d+)[^<]+'
    pattern += '</span>[^<]+'
    pattern += '<span[^<]+</span[^<]+</h2[^<]+'
    pattern += '<p>([^<]+)</p>'
    matches = re.compile(pattern,re.DOTALL).findall(data)
    logger.info( repr(matches) )

    for scrapedurl, scrapedthumbnail, scrapedtitle, scrapeddate, duration, scrapedplot in matches:
        title = scrapedtitle.strip()
        thumbnail = urlparse.urljoin( item.url , scrapedthumbnail )
        url = urlparse.urljoin( item.url , scrapedurl.strip() )
        plot = scrapedplot
        aired_date = scrapertools.parse_date(scrapeddate)

        # Appends a new item to the xbmc item list
        itemlist.append( Item(channel=CHANNELNAME, title=title , action="play" , server="dwspan", url=url, thumbnail=thumbnail, fanart=thumbnail, plot=plot , aired_date=aired_date, duration=duration, show=item.show, view="videos", folder=False) )

    if len(itemlist)>0:
        current_page = scrapertools.find_single_match(data_url,"pagenumber=(\d+)")
        logger.info("current_page="+current_page)
        next_page = str(int(current_page)+1)
        logger.info("next_page="+next_page)
        next_page_url = data_url.replace("pagenumber="+current_page,"pagenumber="+next_page)
        logger.info("next_page_url="+next_page_url)

        itemlist.append(Item(channel=CHANNELNAME, title=">> Página siguiente" , action="episodios" , url=next_page_url, show=item.show) )


    return itemlist
コード例 #48
0
ファイル: conectate.py プロジェクト: Jmlaguna89/miNuevoRepo
# -*- coding: utf-8 -*-
コード例 #49
0
ファイル: tvg.py プロジェクト: pablobart/tvalacarta-web
def episodios(item, load_all_pages = False):
    logger.info("[tvg.py] episodios")
    itemlist = []

    # Lee la página del programa y extrae el id_programa
    if "/ax/" in item.url:
        headers=[]
        headers.append(["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:17.0) Gecko/20100101 Firefox/17.0"])
        headers.append(["X-Requested-With","XMLHttpRequest"])
        headers.append(["Referer",item.url])
        data = scrapertools.cache_page(item.url, post="", headers=headers)
        data = data.replace("\\n"," ")
        data = data.replace("\\\"","\"")
        data = data.replace("\\/","/")
    else:
        data = scrapertools.cache_page(item.url)
        try:
            id_programa = scrapertools.get_match(data,"initAlaCartaBuscador.(\d+)")
        except:
            id_programa = ""
        
        # Lee la primera página de episodios
        #http://www.crtvg.es/ax/tvgalacartabuscador/programa:33517/pagina:1/seccion:294/titulo:/mes:null/ano:null/temporada:null
        logger.info("[tvg.py] videos - hay programa")
        url = "http://www.crtvg.es/ax/tvgalacartabuscador/programa:"+id_programa+"/pagina:1/seccion:294/titulo:/mes:null/ano:null/temporada:null"
        headers=[]
        headers.append(["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:17.0) Gecko/20100101 Firefox/17.0"])
        headers.append(["X-Requested-With","XMLHttpRequest"])
        headers.append(["Referer",item.url])
        data = scrapertools.cache_page(url, post="", headers=headers)
        data = data.replace("\\n"," ")
        data = data.replace("\\\"","\"")
        data = data.replace("\\/","/")

    #logger.info("data="+data)

    # Extrae los videos
    '''
    <tr>                  
    <td class="a-carta-resultado-titulo">                      
    <a href="\/tvg\/a-carta\/rea-publica-74" title="\u00c1rea p\u00fablica">\u00c1rea p\u00fablica<\/a>                 <\/td>                                                       <td class="a-carta-resultado-tempada">                                           <\/td>                                     <td class="a-carta-resultado-data">                  26\/01\/2016 18:30                 <\/td>                              <\/tr>
    '''

    patron  = '<tr[^<]+'
    patron += '<td class="a-carta-resultado-titulo[^<]+'
    patron += '<a href="([^"]+)"\s+title="([^"]+)".*?'
    patron += '<td class="a-carta-resultado-data">(.*?)</td>'
    matches = re.compile(patron,re.DOTALL).findall(data)
    if DEBUG: scrapertools.printMatches(matches)

    for scrapedurl,scrapedtitle,fecha in matches:
        title = scrapedtitle.strip()
        json_title = jsontools.load_json('{"title":"'+title+'"}')
        title = json_title["title"]
        title = scrapertools.htmlclean(title)+" - "+fecha.strip()

        url = urlparse.urljoin(item.url,scrapedurl)
        thumbnail = ""
        plot = ""
        aired_date = scrapertools.parse_date(fecha)
        if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]")

        # Añade al listado de XBMC
        itemlist.append( Item(channel=CHANNELNAME, title=title , action="play" , server="tvg", url=url, thumbnail=thumbnail, plot=plot , show=item.show , aired_date=aired_date, folder=False) )

    #<a href=\"#\" title=\"Seguinte\" onclick=\"return posteriorpaginaclick(33517, 2, 294)
    patron  = '<a href="\#" title="Seguinte" onclick="return posteriorpaginaclick\((\d+), (\d+), (\d+)'
    matches = re.compile(patron,re.DOTALL).findall(data)
    if DEBUG: scrapertools.printMatches(matches)

    for match in matches:
        scrapedtitle = ">>> Página siguiente"
        #http://www.crtvg.es/ax/tvgalacartabuscador/programa:33517/pagina:2/seccion:294/titulo:/mes:null/ano:null/temporada:null
        scrapedurl = "http://www.crtvg.es/ax/tvgalacartabuscador/programa:%s/pagina:%s/seccion:%s/titulo:/mes:null/ano:null/temporada:null" % (match[0],match[1],match[2])
        scrapedthumbnail = urlparse.urljoin(item.url,match[2])
        scrapedplot = ""
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]")

        next_page_item = Item(channel=CHANNELNAME, title=scrapedtitle , action="episodios" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , show=item.show , category = item.category , folder=True)
        if load_all_pages:
            itemlist.extend( episodios(next_page_item, load_all_pages) )
        else:
            itemlist.append( next_page_item )

        break

    return itemlist
コード例 #50
0
ファイル: sieterm.py プロジェクト: tvalacarta/tvalacarta
def episodios(item, load_all_pages=False):
    logger.info("tvalacarta.channels.sieterm episodios")

    # Descarga la página
    data = scrapertools.cachePage(item.url)
    #logger.info(data)

    # Extrae los vídeos
    '''
    <dt class="alacarta-video"><a href="http://..." title="...">Murcianos por el mundo: Cracovia</a> · 12/05/2010 · (5411 veces visto)</dt>
    <dd style="height:100%; overflow:hidden">
    <a href="http://www.7rm.es/servlet/rtrm.servlets.ServletLink2?METHOD=DETALLEALACARTA&amp;sit=c,6,ofs,10&amp;serv=BlogPortal2&amp;orden=1&amp;idCarta=40&amp;mId=4182&amp;autostart=TV" title="Ver v&iacute;deo">
    <img src="http://mediateca.regmurcia.com/MediatecaCRM/ServletLink?METHOD=MEDIATECA&amp;accion=imagen&amp;id=4182" alt="Murcianos por el mundo: Cracovia" title="Murcianos por el mundo: Cracovia" style="width:95px" />
    </a>
    Esta semana nos desplazamos al sur de Polonia, a Cracovia y Wroclaw, para conocer cómo viven seis murcianos en una de las ciudades más importantes de Polonia y Patrimonio de la Humanidad.
    <a href="http://ficheros.7rm.es:3025/Video/4/1/4182_BAJA.mp4">
    <img src="/images/bajarArchivo.gif" alt="Descargar Archivo" title="Descargar Archivo" style="margin:0;padding:0 5px 0 0;vertical-align:middle;border:none" />
    </a>
    </dd>
    '''
  
    '''
    <dt class="alacarta-video"><a href="http://www.7rm.es/servlet/rtrm.servlets.ServletLink2?METHOD=DETALLEALACARTA&amp;sit=c,6,ofs,0&amp;serv=BlogPortal2&amp;orden=2&amp;idCarta=36&amp;mId=3214&amp;autostart=TV" title="Ver v&iacute;deo">De la tierra al mar</a> · 22/12/2009 · (1072 veces visto)</dt>
    <dd style="height:100%; overflow:hidden">
    <a href="http://www.7rm.es/servlet/rtrm.servlets.ServletLink2?METHOD=DETALLEALACARTA&amp;sit=c,6,ofs,0&amp;serv=BlogPortal2&amp;orden=2&amp;idCarta=36&amp;mId=3214&amp;autostart=TV" title="Ver v&iacute;deo">
    <img src="http://mediateca.regmurcia.com/MediatecaCRM/ServletLink?METHOD=MEDIATECA&amp;accion=imagen&amp;id=3214" alt="De la tierra al mar" title="De la tierra al mar" style="width:95px" />
    </a>
    En este programa conocemos a Plácido, joven agricultor que nos mostrará la mala situación en que se encuentra el sector, informamos de la campaña 'Dale vida a tu árbol', asistimos a la presentación del libro 'Gestión ambiental. Guía fácil para empresas y profesionales', y nos hacemos eco del malestar de nuestros agricultores con la nueva normativa europea en materia de fitosanitarios, que entrará en vigor en junio de 2011.
    <a href="http://ficheros.7rm.es:3025/Video/3/2/3214_BAJA.mp4">
    <img src="/images/bajarArchivo.gif" alt="Descargar Archivo" title="Descargar Archivo" style="margin:0;padding:0 5px 0 0;vertical-align:middle;border:none" />
    </a>
    </dd>
    '''
    patron  = '<dt class="alacarta-video"><a href="([^"]+)" title="[^"]+">([^<]+)</a>.*?([0-9\/]+).*?</dt>[^<]+'
    patron += '<dd style="[^<]+">[^<]+'
    patron += '<a href="[^"]+" title="[^"]+">[^<]+'
    patron += '<img src="([^"]+)"[^<]+'
    patron += '</a>([^<]+)<a href="([^"]+)">'
    matches = re.compile(patron,re.DOTALL).findall(data)
    #scrapertools.printMatches(matches)
    itemlist = []
    for match in matches:
        # Atributos del vídeo
        scrapedtitle = unicode( match[1].strip()+" ("+match[2]+")" , "iso-8859-1" , errors="ignore").encode("utf-8")
        scrapedurl = urlparse.urljoin(item.url,match[5]).replace("&amp;","&")
        scrapedthumbnail = urlparse.urljoin(item.url,match[3]).replace("&amp;","&")
        scrapedplot = unicode( match[4].strip()  , "iso-8859-1" , errors="ignore").encode("utf-8")
        scrapedpage = urlparse.urljoin(item.url,match[0]).replace("&amp;","&")
        if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], page=["+scrapedpage+"], thumbnail=["+scrapedthumbnail+"]")

        # Trata de sacar la fecha de emisión del título
        aired_date = scrapertools.parse_date(scrapedtitle)
        #logger.info("aired_date="+aired_date)

        # Añade al listado de XBMC
        itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="play" , server="sieterm" , url=scrapedpage, thumbnail=scrapedthumbnail, fanart=scrapedthumbnail, plot=scrapedplot , show = item.show , page=scrapedpage, aired_date=aired_date, folder=False) )

    # Busca la página siguiente
    next_page_url = scrapertools.find_single_match(data,'<a class="list-siguientes" href="([^"]+)" title="Ver siguientes archivos">')
    if next_page_url!="":
        next_page_url = urlparse.urljoin(item.url,next_page_url)
        next_page_item = Item(channel=CHANNELNAME, title=">> Página siguiente" , action="episodios" , url=next_page_url , show=item.show, folder=True)

        if load_all_pages:
            itemlist.extend(episodios(next_page_item,load_all_pages))
        else:
            itemlist.append( next_page_item )

    return itemlist