def get_video_url(page_url, premium=False, user="", password="", video_password="", page_data=""): logger.info("tvalacarta.servers.adn40 get_video_url(page_url='%s')" % page_url) ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) logger.info("tvalacarta.server.rtve get_video_url result=" + repr(result)) video_urls = [] if "ext" in result and "url" in result: video_urls.append([ "[adn40]", scrapertools.safe_unicode(result['url']).encode('utf-8') ]) else: if "entries" in result: for entry in result["entries"]: logger.info("entry=" + repr(entry)) video_urls.append([ scrapertools.safe_unicode(entry["ext"]).encode('utf-8') + " [adn40]", scrapertools.safe_unicode(entry['url']).encode('utf-8') ]) return video_urls
def get_video_url(page_url, premium=False, user="", password="", video_password="", page_data=""): logger.info("tvalacarta.server.populartvcantabria get_video_url page_url" + page_url) data = scrapertools.cache_page(page_url) video_id = scrapertools.find_single_match(data, 'data-video_id="([^"]+)"') logger.info("tvalacarta.server.populartvcantabria video_id=" + video_id) youtube_url = "https://www.youtube.com/watch?v=" + video_id logger.info("tvalacarta.server.populartvcantabria youtube_url=" + youtube_url) ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(youtube_url, download=False) video_urls = [] if 'formats' in result: for entry in result['formats']: logger.info("entry=" + repr(entry)) if 'http' in entry['protocol']: video_urls.append([ scrapertools.safe_unicode(entry['format']).encode('utf-8'), scrapertools.safe_unicode(entry['url']).encode('utf-8') ]) #logger.info('Append: {}'.format(entry['url'])) video_urls.reverse() return video_urls
def get_video_url(page_url, premium=False, user="", password="", video_password="", page_data=""): logger.info("tvalacarta.server.rtve get_video_url page_url" + page_url) video_urls = [] ydl = youtube_dl.YoutubeDL({"outtmpl": u"%(id)s%(ext)s"}) result = ydl.extract_info(page_url, download=False) logger.info("tvalacarta.server.rtve get_video_url result=" + repr(result)) if "ext" in result and "url" in result: video_urls.append(["(opción 1) [rtve]", scrapertools.safe_unicode(result["url"]).encode("utf-8")]) else: if "entries" in result: for entry in result["entries"]: video_urls.append(["(opción 1) [rtve]", scrapertools.safe_unicode(entry["url"]).encode("utf-8")]) try: alternate_video_urls = descargavideos.get_video_url(page_url) for video_url in alternate_video_urls: video_urls.append(["(opción 2) [rtve]", video_url[1]]) except: import traceback logger.info("tvalacarta.server.rtve get_video_url " + traceback.format_exc()) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="", page_data="" ): logger.info("tvalacarta.server.rtvcm get_video_url page_url"+page_url) data = scrapertools.cache_page(page_url) #<script src="http://cdnapi.kaltura.com/p/2288691/sp/228869100/embedIframeJs/uiconf_id/39784151/partner_id/2288691?autoembed=true&playerId=kaltura_player_1496914486&entry_id=0_3e1eijre&flashvars[streamerType]=auto&width=640&height=360&flashvars[streamerType]=auto"></script> </div> partner_id = scrapertools.find_single_match(data,'<script src="http://cdnapi.kaltura.com/p/\d+/sp/\d+/embedIframeJs/uiconf_id/\d+/partner_id/(\d+)') logger.info("tvalacarta.server.rtvcm get_video_url partner_id="+partner_id) video_id = scrapertools.find_single_match(data,'<script src="http://cdnapi.kaltura.com/p/\d+/sp/\d+/embedIframeJs/uiconf_id/\d+/partner_id/\d+.autoembed=true&playerId=kaltura_player_\d+&entry_id=([^\&]+)\&') logger.info("tvalacarta.server.rtvcm get_video_url video_id="+video_id) media_url = "kaltura:"+partner_id+":"+video_id logger.info("tvalacarta.server.rtvcm get_video_url media_url="+media_url) ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(media_url, download=False) logger.info("tvalacarta.server.rtvcm get_video_url result="+repr(result)) video_urls = [] if "ext" in result and "url" in result: video_urls.append(["[rtvcm]", scrapertools.safe_unicode(result['url']).encode('utf-8')+"|User-Agent=Mozilla/5.0"]) else: if "entries" in result: for entry in result["entries"]: video_urls.append(["[rtvcm]", scrapertools.safe_unicode(entry['url']).encode('utf-8')+"|User-Agent=Mozilla/5.0"]) return video_urls
def get_video_url(page_url, premium=False, user="", password="", video_password="", page_data=""): logger.info("tvalacarta.server.navarratv get_video_url page_url" + page_url) ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) video_urls = [] if 'formats' in result: for entry in result['formats']: logger.info("entry=" + repr(entry)) if 'http' in entry['protocol']: video_urls.append([ scrapertools.safe_unicode(entry['format']).encode('utf-8'), scrapertools.safe_unicode(entry['url']).encode('utf-8') ]) #logger.info('Append: {}'.format(entry['url'])) video_urls.reverse() return video_urls
def get_video_url(page_url, premium=False, user="", password="", video_password="", page_data=""): logger.info("tvalacarta.servers.youtube get_video_url page_url=" + page_url) if not page_url.startswith("http"): page_url = "http://www.youtube.com/watch?v=" + page_url ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) video_urls = [] if 'formats' in result: for entry in result['formats']: logger.info("entry=" + repr(entry)) video_urls.append([ scrapertools.safe_unicode(entry['format']).encode('utf-8'), scrapertools.safe_unicode(entry['url']).encode('utf-8') ]) video_urls.reverse() return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="", page_data="" ): logger.info("tvalacarta.server.rtvcm get_video_url page_url"+page_url) data = scrapertools.cache_page(page_url) partner_id = scrapertools.find_single_match(data,'<meta property="og:image" content="https://www.kaltura.com/p/(\d+)/sp/\d+/thumbnail') logger.info("tvalacarta.server.rtvcm get_video_url partner_id="+partner_id) video_id = scrapertools.find_single_match(data,'<meta property="og:image" content="https://www.kaltura.com/p/\d+/sp/\d+/thumbnail/entry_id/([^/]+)') logger.info("tvalacarta.server.rtvcm get_video_url video_id="+video_id) media_url = "kaltura:"+partner_id+":"+video_id logger.info("tvalacarta.server.rtvcm get_video_url media_url="+media_url) ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(media_url, download=False) logger.info("tvalacarta.server.rtvcm get_video_url result="+repr(result)) video_urls = [] if "ext" in result and "url" in result: video_urls.append(["[rtvcm]", scrapertools.safe_unicode(result['url']).encode('utf-8')+"|User-Agent=Mozilla/5.0"]) else: if "entries" in result: for entry in result["entries"]: video_urls.append(["[rtvcm]", scrapertools.safe_unicode(entry['url']).encode('utf-8')+"|User-Agent=Mozilla/5.0"]) return video_urls
def get_video_url(page_url, premium=False, user="", password="", video_password=""): logger.info("tvalacarta.servers.telemadrid get_video_url(page_url='%s')" % page_url) ''' <video id="5c04f64ee88f4" data-video-id="5971764264001" data-account="104403117001" data-player="SkevQBitbl" data-embed="default" class="video-js" controls></video> ''' #http://players.brightcove.net/104403117001/SkevQBitbl_default/index.html?videoId=5971764264001 data = scrapertools.cache_page(page_url) account = scrapertools.find_single_match(data, 'data-account="([^"]+)"') logger.info("account=" + account) player = scrapertools.find_single_match(data, 'data-player="([^"]+)"') logger.info("player=" + account) video_id = scrapertools.find_single_match(data, 'data-video-id="([^"]+)"') logger.info("video_id=" + video_id) api_url = "http://players.brightcove.net/" + account + "/" + player + "_default/index.html?videoId=" + video_id ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(api_url, download=False) logger.info("tvalacarta.servers.telemadrid get_video_url result=" + repr(result)) video_urls = [] if "ext" in result and "url" in result: video_urls.append([ "(.m3u8)", scrapertools.safe_unicode(result['url']).encode('utf-8') ]) else: if "entries" in result: for entry in result["entries"]: video_urls.append([ "(.m3u8)", scrapertools.safe_unicode(entry['url']).encode('utf-8') ]) for video_url in video_urls: logger.info("tvalacarta.servers.telemadrid %s - %s" % (video_url[0], video_url[1])) return video_urls
def get_video_url(page_url, premium=False, user="", password="", video_password="", page_data=""): logger.info("tvalacarta.servers.beteve get_video_url(page_url='%s')" % page_url) data = scrapertools.cache_page(page_url) #<meta property="og:image" content="https://cdnsecakmi.kaltura.com/p/2346171/thumbnail/entry_id/1_vfk43038/height/weight/76kXI.jpg" /> thumb = scrapertools.find_single_match( data, '<meta property="og:image" content="([^"]+)"') p = scrapertools.find_single_match(thumb, "p\/([^\/]+)\/") entry_id = scrapertools.find_single_match(thumb, "entry_id\/([^\/]+)\/") url = "kaltura:" + p + ":" + entry_id ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) logger.info("tvalacarta.server.beteve get_video_url result=" + repr(result)) video_urls = [] try: if "ext" in result and "url" in result: video_urls.append([ "(.m3u8)", scrapertools.safe_unicode(result['url']).encode('utf-8') + "|User-Agent=Mozilla/5.0" ]) else: if "entries" in result: for entry in result["entries"]: video_urls.append([ "(.m3u8)", scrapertools.safe_unicode(entry['url']).encode('utf-8') + "|User-Agent=Mozilla/5.0" ]) except: import traceback logger.info("tvalacarta.server.beteve get_video_url " + traceback.format_exc()) for video_url in video_urls: logger.info("tvalacarta.servers.beteve %s - %s" % (video_url[0], video_url[1])) return video_urls
def get_video_url(page_url, premium=False, user="", password="", video_password="", page_data=""): logger.info("tvalacarta.server.rtvcm get_video_url page_url" + page_url) data = scrapertools.cache_page(page_url) #<script src="http://cdnapi.kaltura.com/p/2288691/sp/228869100/embedIframeJs/uiconf_id/39784151/partner_id/2288691?autoembed=true&playerId=kaltura_player_1496914486&entry_id=0_3e1eijre&flashvars[streamerType]=auto&width=640&height=360&flashvars[streamerType]=auto"></script> </div> partner_id = scrapertools.find_single_match( data, '<script src="http://cdnapi.kaltura.com/p/\d+/sp/\d+/embedIframeJs/uiconf_id/\d+/partner_id/(\d+)' ) logger.info("tvalacarta.server.rtvcm get_video_url partner_id=" + partner_id) video_id = scrapertools.find_single_match( data, '<script src="http://cdnapi.kaltura.com/p/\d+/sp/\d+/embedIframeJs/uiconf_id/\d+/partner_id/\d+.autoembed=true&playerId=kaltura_player_\d+&entry_id=([^\&]+)\&' ) logger.info("tvalacarta.server.rtvcm get_video_url video_id=" + video_id) media_url = "kaltura:" + partner_id + ":" + video_id logger.info("tvalacarta.server.rtvcm get_video_url media_url=" + media_url) ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(media_url, download=False) logger.info("tvalacarta.server.rtvcm get_video_url result=" + repr(result)) video_urls = [] if "ext" in result and "url" in result: video_urls.append([ "[rtvcm]", scrapertools.safe_unicode(result['url']).encode('utf-8') + "|User-Agent=Mozilla/5.0" ]) else: if "entries" in result: for entry in result["entries"]: video_urls.append([ "[rtvcm]", scrapertools.safe_unicode(entry['url']).encode('utf-8') + "|User-Agent=Mozilla/5.0" ]) return video_urls
def episodios(item,load_all_pages=True): logger.info("tvalacarta.channels.navarratv episodios") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) patron = '<div class="Programa"[^<]+' patron += '<a href="([^"]+)"[^<]+' patron += '<img src="([^"]+)" alt="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl,scrapedthumbnail,scrapedtitle in matches: thumbnail = urlparse.urljoin(item.url,scrapedthumbnail) yt_id = scrapertools.find_single_match(scrapedurl,"p=([A-Za-z0-9_\-]+)") url = "https://www.youtube.com/watch?v="+yt_id title = scrapertools.safe_unicode(scrapedtitle).encode("utf-8").strip() plot = "" itemlist.append( Item(channel=__channel__, action="play", server="youtube", title=title, url=url, thumbnail=thumbnail, fanart=thumbnail, show=item.show, plot=plot, folder=True)) return itemlist
def get_video_url(page_url, premium=False, user="", password="", video_password="", page_data=""): logger.info("tvalacarta.servers.azteca get_video_url page_url=" + page_url) data = scrapertools.cache_page(page_url) page_url = scrapertools.find_single_match( data, '<meta property="og:video" content="([^"]+)"/>') logger.info("tvalacarta.servers.azteca get_video_url page_url=" + page_url) ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) video_urls = [] if 'formats' in result: for entry in result['formats']: logger.info("entry=" + repr(entry)) ''' {u'http_headers': { u'Accept-Charset': u'ISO-8859-1,utf-8;q=0.7,*;q=0.7', u'Accept-Language': u'en-us,en;q=0.5', u'Accept-Encoding': u'gzip, deflate', u'Accept': u'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', u'User-Agent': u'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)'}, u'protocol': u'm3u8_native', u'format': u'hls-582 - 640x360', u'url': u'http://tvazvod-i.akamaihd.net/i/p/459791/sp/45979100/serveFlavor/entryId/0_pcso1cnx/v/2/flavorId/0_qx9uhlyx/index_0_av.m3u8', u'tbr': 582, u'height': 360, u'width': 640, u'ext': u'mp4', u'preference': None, u'format_id': u'hls-582' } ''' video_urls.append([ scrapertools.safe_unicode(entry['format']).encode('utf-8'), scrapertools.safe_unicode(entry['url']).encode('utf-8') ]) #logger.info('Append: {}'.format(entry['url'])) video_urls.reverse() return video_urls
def episodios(item, load_all_pages=True): logger.info("tvalacarta.channels.navarratv episodios") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) """ <div class="Bloque2Noticias"> <div class="ImpactoBloque W50 H120 FranjaRoja "> <div class="ImpactoBloqueImagen W98" style="height: 150px;"> <div class="ImpactoContenedorImagen" style="height: 150px; cursor: pointer; background-image: url('https://i.ytimg.com/vi/RXBDpg7oduk/mqdefault.jpg');" onclick="location.href='/AlaCarta/92C813D7-1676-E17B-1AB1E57E2065947C/yt/RXBDpg7oduk/IMPLICADOS-18-DE-JUNIO-DE-2016';"/></div> </div> <div class="ImpactoBloqueContenido W98"> <h2><a href="/AlaCarta/92C813D7-1676-E17B-1AB1E57E2065947C/yt/RXBDpg7oduk/IMPLICADOS-18-DE-JUNIO-DE-2016" class="TextoNeutro">IMPLICADOS 18 DE JUNIO DE 2016</a></h2> <p>IMPLICADOS 18 DE JUNIO DE 2016</p> </div> </div> <div class="W3"></div> """ patron = '<div class="Bloque2Noticias"[^<]+' patron += '<div class="ImpactoBloque W50 H120 FranjaRoja[^<]+' patron += '<div class="ImpactoBloqueImagen[^<]+' patron += "<div class=\"ImpactoContenedorImagen\".*?url\('([^']+)'\)[^<]+</div[^<]+" patron += "</div[^<]+" patron += '<div class="ImpactoBloqueContenido[^<]+' patron += '<h2><a href="([^"]+)" class="TextoNeutro">([^<]+)</a></h2[^<]+' patron += "<p>([^<]*)</p>" matches = re.compile(patron, re.DOTALL).findall(data) for scrapedthumbnail, scrapedurl, scrapedtitle, scrapedplot in matches: thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) yt_id = scrapertools.find_single_match(scrapedurl, "/yt/([^/]+)/") url = "https://www.youtube.com/watch?v=" + yt_id title = scrapertools.safe_unicode(scrapedtitle).encode("utf-8").strip() plot = "" itemlist.append( Item( channel=__channel__, action="play", server="youtube", title=title, url=url, thumbnail=thumbnail, fanart=thumbnail, show=item.show, plot=plot, folder=True, ) ) return itemlist
def get_video_url( page_url , premium = False , user="" , password="", video_password="", page_data="" ): logger.info("tvalacarta.server.rtve get_video_url page_url"+page_url) ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) logger.info("tvalacarta.server.rtve get_video_url result="+repr(result)) video_urls = [] if "ext" in result and "url" in result: video_urls.append(["."+scrapertools.safe_unicode(result['ext']).encode('utf-8')+" [rtve]", scrapertools.safe_unicode(result['url']).encode('utf-8')]) else: if "entries" in result: for entry in result["entries"]: video_urls.append(["."+scrapertools.safe_unicode(entry['ext']).encode('utf-8')+" [rtve]", scrapertools.safe_unicode(entry['url']).encode('utf-8')]) return video_urls
def episodios(item, load_all_pages=True): logger.info("tvalacarta.channels.navarratv episodios") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) ''' <div class="Bloque2Noticias"> <div class="ImpactoBloque W50 H120 FranjaRoja "> <div class="ImpactoBloqueImagen W98" style="height: 150px;"> <div class="ImpactoContenedorImagen" style="height: 150px; cursor: pointer; background-image: url('https://i.ytimg.com/vi/RXBDpg7oduk/mqdefault.jpg');" onclick="location.href='/AlaCarta/92C813D7-1676-E17B-1AB1E57E2065947C/yt/RXBDpg7oduk/IMPLICADOS-18-DE-JUNIO-DE-2016';"/></div> </div> <div class="ImpactoBloqueContenido W98"> <h2><a href="/AlaCarta/92C813D7-1676-E17B-1AB1E57E2065947C/yt/RXBDpg7oduk/IMPLICADOS-18-DE-JUNIO-DE-2016" class="TextoNeutro">IMPLICADOS 18 DE JUNIO DE 2016</a></h2> <p>IMPLICADOS 18 DE JUNIO DE 2016</p> </div> </div> <div class="W3"></div> ''' patron = '<div class="Bloque2Noticias"[^<]+' patron += '<div class="ImpactoBloque W50 H120 FranjaRoja[^<]+' patron += '<div class="ImpactoBloqueImagen[^<]+' patron += "<div class=\"ImpactoContenedorImagen\".*?url\('([^']+)'\)[^<]+</div[^<]+" patron += '</div[^<]+' patron += '<div class="ImpactoBloqueContenido[^<]+' patron += '<h2><a href="([^"]+)" class="TextoNeutro">([^<]+)</a></h2[^<]+' patron += '<p>([^<]*)</p>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedthumbnail, scrapedurl, scrapedtitle, scrapedplot in matches: thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) yt_id = scrapertools.find_single_match(scrapedurl, "/yt/([^/]+)/") url = "https://www.youtube.com/watch?v=" + yt_id title = scrapertools.safe_unicode(scrapedtitle).encode("utf-8").strip() plot = scrapedplot.strip() aired_date = scrapertools.parse_date(title) itemlist.append( Item(channel=__channel__, action="play", server="navarratv", title=title, url=url, thumbnail=thumbnail, fanart=thumbnail, show=item.show, aired_date=aired_date, plot=plot, folder=False)) return itemlist
def get_video_url(page_url, premium=False, user="", password="", video_password="", page_data=""): logger.info("tvalacarta.server.rtve get_video_url page_url" + page_url) video_urls = [] ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) logger.info("tvalacarta.server.rtve get_video_url result=" + repr(result)) if "ext" in result and "url" in result: video_urls.append([ "(opción 1) [rtve]", scrapertools.safe_unicode(result['url']).encode('utf-8') + "|User-Agent=Mozilla/5.0" ]) else: if "entries" in result: for entry in result["entries"]: video_urls.append([ "(opción 1) [rtve]", scrapertools.safe_unicode(entry['url']).encode('utf-8') + "|User-Agent=Mozilla/5.0" ]) try: alternate_video_urls = descargavideos.get_video_url(page_url) for video_url in alternate_video_urls: video_urls.append([ "(opción 2) [rtve]", video_url[1] + "|User-Agent=Mozilla/5.0" ]) except: import traceback logger.info("tvalacarta.server.rtve get_video_url " + traceback.format_exc()) return video_urls
def get_video_url(page_url, premium=False, user="", password="", video_password=""): logger.info("tvalacarta.servers.telemadrid get_video_url(page_url='%s')" % page_url) data = scrapertools.cache_page(page_url) iframe = scrapertools.find_single_match( data, '<iframe id="miVideotm"[^<]+</iframe') media_url = scrapertools.find_single_match(iframe, 'src="([^"]+)"') media_url = media_url + "http:" ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(media_url, download=False) logger.info("tvalacarta.servers.telemadrid get_video_url result=" + repr(result)) video_urls = [] if "ext" in result and "url" in result: video_urls.append([ "[telemadrid]", scrapertools.safe_unicode(result['url']).encode('utf-8') ]) else: if "entries" in result: for entry in result["entries"]: video_urls.append([ "[telemadrid]", scrapertools.safe_unicode(entry['url']).encode('utf-8') ]) for video_url in video_urls: logger.info("tvalacarta.servers.telemadrid %s - %s" % (video_url[0], video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[mitele.py] get_video_url(page_url='%s')" % page_url) video_urls = [] ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) logger.info("tvalacarta.server.mitele get_video_url result="+repr(result)) for entries in result["formats"]: if entries["ext"] != "rtmp": video_url = scrapertools.safe_unicode(entries['url']).encode('utf-8') video_url = video_url.replace("http://ignore.mediaset.es", "http://miteleooyala-a.akamaihd.net") if entries["ext"] != "mp4": title = scrapertools.safe_unicode(entries["format"]).encode('utf-8') elif entries["ext"] == "mp4": if entries.has_key("vbr"): title = "mp4-" + scrapertools.safe_unicode(str(entries["vbr"])).encode('utf-8') + " " + scrapertools.safe_unicode(entries["format"]).encode('utf-8').rsplit("-",1)[1] else: title = scrapertools.safe_unicode(entries["format"]).encode('utf-8') try: calidad = int(scrapertools.safe_unicode(str(entries["vbr"]))) except: try: calidad = int(title.split("-")[1].strip()) except: calidad = 3000 video_urls.append(["%s" % title, video_url, 0, False, calidad]) video_urls.sort(key=lambda video_urls: video_urls[4], reverse=True) for url in video_urls: logger.info("[mitele.py] %s - %s" % (url[0],url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="", page_data="" ): logger.info("[mitele.py] get_video_url(page_url='%s')" % page_url) ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) video_urls = [] if 'formats' in result: for entry in result['formats']: logger.info("entry="+repr(entry)) if 'hls' in entry['format']: video_urls.append([scrapertools.safe_unicode(entry['format']).encode('utf-8'), scrapertools.safe_unicode(entry['url']).encode('utf-8')]) video_urls.reverse() return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="", page_data="" ): logger.info("tvalacarta.server.rtvcm get_video_url page_url"+page_url) data = scrapertools.cache_page(page_url) #<iframe id="flumotion_iframe_player" name="flumotion_iframe_player" src="http://cdnapi.kaltura.com/p/2288691/sp/228869100/embedIframeJs/uiconf_id/39784151/partner_id/2288691?iframeembed=true&playerId=kaltura_player_1496914486&entry_id=0_7nkaf0ce&flashvars[streamerType]=auto" media_url = scrapertools.find_single_match(data,'<iframe id="flumotion_iframe_player" name="flumotion_iframe_player" src="([^"]+)"') video_urls = [] ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(media_url, download=False) logger.info("tvalacarta.server.rtvcm get_video_url result="+repr(result)) if "ext" in result and "url" in result: video_urls.append(["[rtvcm]", scrapertools.safe_unicode(result['url']).encode('utf-8')+"|User-Agent=Mozilla/5.0"]) else: if "entries" in result: for entry in result["entries"]: video_urls.append(["[rtvcm]", scrapertools.safe_unicode(entry['url']).encode('utf-8')+"|User-Agent=Mozilla/5.0"]) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("tvalacarta.servers.telemundo get_video_url page_url="+page_url) video_urls = [] ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) logger.info("tvalacarta.servers.telemadrid get_video_url result="+repr(result)) video_urls = [] if "ext" in result and "url" in result: video_urls.append(["[telemundo]", scrapertools.safe_unicode(result['url']).encode('utf-8')]) else: if "entries" in result: for entry in result["entries"]: video_urls.append(["[telemundo]", scrapertools.safe_unicode(entry['url']).encode('utf-8')]) for video_url in video_urls: logger.info("tvalacarta.servers.telemundo %s - %s" % (video_url[0],video_url[1])) return video_urls
def programas(item): logger.info("tvalacarta.channels.navarratv programas") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) """ <div class="ImpactoBloque W50 H120 FranjaRoja Noticia2Col"> <div class="ImpactoBloqueImagen W98" style="height: 150px;"> <div class="ImpactoContenedorImagen" style="height: 150px; cursor: pointer; background-image: url('http://i.natv.es/imagenes/A82E542A-FD3D-485F-C933A8E7C294C438.JPG');" onclick="location.href='/AlaCarta/92C813D7-1676-E17B-1AB1E57E2065947C/Implicados';"/></div> </div> <div class="ImpactoBloqueContenido W98"> <h2><a href="/AlaCarta/92C813D7-1676-E17B-1AB1E57E2065947C/Implicados" class="TextoNeutro">Implicados</a></h2> <p>Programa semanal que presenta el periodista Alejandro Palacios y que sirve para dar a conocer la labor de las personas que se implican por lograr una sociedad mejor.</p> </div> </div> """ patron = '<div class="ImpactoBloque W50 H120 FranjaRoja[^<]+' patron += '<div class="ImpactoBloqueImagen[^<]+' patron += "<div class=\"ImpactoContenedorImagen\".*?url\('([^']+)'\)[^<]+</div[^<]+" patron += "</div[^<]+" patron += '<div class="ImpactoBloqueContenido[^<]+' patron += '<h2><a href="([^"]+)" class="TextoNeutro">([^<]+)</a></h2[^<]+' patron += "<p>([^<]*)</p>" matches = re.compile(patron, re.DOTALL).findall(data) for scrapedthumbnail, scrapedurl, scrapedtitle, scrapedplot in matches: thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) url = urlparse.urljoin(item.url, scrapedurl) title = scrapertools.safe_unicode(scrapedtitle).encode("utf-8").strip() plot = scrapedplot.strip() itemlist.append( Item( channel=__channel__, action="episodios", title=title, show=title, url=url, thumbnail=thumbnail, fanart=thumbnail, plot=plot, folder=True, ) ) return itemlist
def programas(item): logger.info("tvalacarta.channels.navarratv programas") itemlist = [] item.url = "http://www.natv.es/Alacarta" item.view = "programs" # Descarga la página data = scrapertools.cache_page(item.url) ''' <div class="ImpactoBloque W50 H120 FranjaRoja Noticia2Col"> <div class="ImpactoBloqueImagen W98" style="height: 150px;"> <div class="ImpactoContenedorImagen" style="height: 150px; cursor: pointer; background-image: url('http://i.natv.es/imagenes/A82E542A-FD3D-485F-C933A8E7C294C438.JPG');" onclick="location.href='/AlaCarta/92C813D7-1676-E17B-1AB1E57E2065947C/Implicados';"/></div> </div> <div class="ImpactoBloqueContenido W98"> <h2><a href="/AlaCarta/92C813D7-1676-E17B-1AB1E57E2065947C/Implicados" class="TextoNeutro">Implicados</a></h2> <p>Programa semanal que presenta el periodista Alejandro Palacios y que sirve para dar a conocer la labor de las personas que se implican por lograr una sociedad mejor.</p> </div> </div> ''' patron = '<div class="ImpactoBloque W50 H120 FranjaRoja[^<]+' patron += '<div class="ImpactoBloqueImagen[^<]+' patron += "<div class=\"ImpactoContenedorImagen\".*?url\('([^']+)'\)[^<]+</div[^<]+" patron += '</div[^<]+' patron += '<div class="ImpactoBloqueContenido[^<]+' patron += '<h2><a href="([^"]+)" class="TextoNeutro">([^<]+)</a></h2[^<]+' patron += '<p>([^<]*)</p>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedthumbnail, scrapedurl, scrapedtitle, scrapedplot in matches: thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) url = urlparse.urljoin(item.url, scrapedurl) title = scrapertools.safe_unicode(scrapedtitle).encode("utf-8").strip() plot = scrapedplot.strip() itemlist.append( Item(channel=__channel__, action="episodios", title=title, show=title, url=url, thumbnail=thumbnail, fanart=thumbnail, plot=plot, folder=True)) return itemlist
def get_video_url( page_url , premium = False , user="" , password="", video_password="", page_data="" ): logger.info("[discoverymax.py] get_video_url(page_url='%s')" % page_url) ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s', 'no_color': True}) result = ydl.extract_info(page_url, download=False) video_urls = [] if 'formats' in result: for entry in result['formats']: video_urls.append( [scrapertools.safe_unicode(entry['format']).encode('utf-8') , scrapertools.safe_unicode(entry['url']).encode('utf-8') ]) # Para que ponga la calidad más alta primero video_urls.reverse() return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="", page_data="" ): logger.info("[eitb.py] get_video_url(page_url='%s')" % page_url) ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) video_urls = [] if 'formats' in result: for entry in result['formats']: logger.info("entry="+repr(entry)) if 'http' in entry['protocol']: video_urls.append([scrapertools.safe_unicode(entry['format']).encode('utf-8'), scrapertools.safe_unicode(entry['url']).encode('utf-8')]) #logger.info('Append: {}'.format(entry['url'])) video_urls.reverse() return video_urls
def get_video_url(page_url, premium=False, user="", password="", video_password=""): logger.info("[mitele.py] get_video_url(page_url='%s')" % page_url) video_urls = [] ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) logger.info("tvalacarta.server.mitele get_video_url result=" + repr(result)) for entries in result["formats"]: if entries["ext"] != "rtmp": video_url = scrapertools.safe_unicode( entries['url']).encode('utf-8') video_url = video_url.replace( "http://ignore.mediaset.es", "http://miteleooyala-a.akamaihd.net") if entries["ext"] != "mp4": title = scrapertools.safe_unicode( entries["format"]).encode('utf-8') elif entries["ext"] == "mp4": if entries.has_key("vbr"): title = "mp4-" + scrapertools.safe_unicode( str(entries["vbr"]) ).encode('utf-8') + " " + scrapertools.safe_unicode( entries["format"]).encode('utf-8').rsplit("-", 1)[1] else: title = scrapertools.safe_unicode( entries["format"]).encode('utf-8') try: calidad = int(scrapertools.safe_unicode(str(entries["vbr"]))) except: try: calidad = int(title.split("-")[1].strip()) except: calidad = 3000 video_urls.append(["%s" % title, video_url, 0, False, calidad]) video_urls.sort(key=lambda video_urls: video_urls[4], reverse=True) for url in video_urls: logger.info("[mitele.py] %s - %s" % (url[0], url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="", page_data="" ): logger.info("tvalacarta.servers.youtube get_video_url page_url="+page_url) if not page_url.startswith("http"): page_url = "http://www.youtube.com/watch?v="+page_url ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) video_urls = [] if 'formats' in result: for entry in result['formats']: logger.info("entry="+repr(entry)) video_urls.append([scrapertools.safe_unicode(entry['format']).encode('utf-8'), scrapertools.safe_unicode(entry['url']).encode('utf-8')]) video_urls.reverse() return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("tvalacarta.servers.disneychannel get_video_url(page_url='%s')" % page_url) video_urls = [] data = scrapertools.cache_page(page_url) page_url = scrapertools.find_single_match(data,'"downloadUrl"\:"([^"]+)"') logger.info("tvalacarta.servers.disneychannel get_video_url page_url="+page_url) ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) video_urls = [] if 'formats' in result: for entry in result['formats']: logger.info("entry="+repr(entry)) ''' {u'http_headers': { u'Accept-Charset': u'ISO-8859-1,utf-8;q=0.7,*;q=0.7', u'Accept-Language': u'en-us,en;q=0.5', u'Accept-Encoding': u'gzip, deflate', u'Accept': u'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', u'User-Agent': u'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)'}, u'protocol': u'm3u8_native', u'format': u'hls-582 - 640x360', u'url': u'http://tvazvod-i.akamaihd.net/i/p/459791/sp/45979100/serveFlavor/entryId/0_pcso1cnx/v/2/flavorId/0_qx9uhlyx/index_0_av.m3u8', u'tbr': 582, u'height': 360, u'width': 640, u'ext': u'mp4', u'preference': None, u'format_id': u'hls-582' } ''' video_urls.append(["."+scrapertools.safe_unicode(entry['ext']).encode('utf-8'), scrapertools.safe_unicode(entry['url']).encode('utf-8')]) #logger.info('Append: {}'.format(entry['url'])) video_urls.reverse() return video_urls
def programas(item): logger.info("tvalacarta.channels.navarratv programas") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) patron = '<div class="Programa"[^<]+' patron += '<a href="([^"]+)"[^<]+' patron += '<img src="([^"]+)" alt="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl,scrapedthumbnail,scrapedtitle in matches: thumbnail = urlparse.urljoin(item.url,scrapedthumbnail) url = urlparse.urljoin(item.url,scrapedurl) title = scrapertools.safe_unicode(scrapedtitle).encode("utf-8").strip() plot = "" itemlist.append( Item(channel=__channel__, action="episodios", title=title, show=title, url=url, thumbnail=thumbnail, fanart=thumbnail, plot=plot, folder=True)) return itemlist
def get_video_url( page_url , premium = False , user="" , password="", video_password="", page_data="" ): logger.info("tvalacarta.servers.youtube get_video_url page_url="+page_url) if not page_url.startswith("http"): page_url = "http://www.youtube.com/watch?v="+page_url logger.info("tvalacarta.servers.youtube get_video_url page_url="+page_url) # Quita la playlist if "&list=" in page_url: import re page_url = re.compile("\&list\=[A-Za-z0-9\-_]+",re.DOTALL).sub("",page_url) logger.info("tvalacarta.servers.youtube get_video_url page_url="+page_url) if "?list=" in page_url: import re page_url = re.compile("\?list\=[^\&]+&",re.DOTALL).sub("?",page_url) logger.info("tvalacarta.servers.youtube get_video_url page_url="+page_url) ydl = youtube_dl.YoutubeDL({'outtmpl': u'%(id)s%(ext)s'}) result = ydl.extract_info(page_url, download=False) logger.info("result="+repr(result)) video_urls = [] if 'formats' in result: for entry in result['formats']: logger.info("entry="+repr(entry)) extension = "" try: if entry['ext'] is not None: extension = scrapertools.safe_unicode("("+entry['ext']+")").encode('utf-8') except: import traceback logger.info(traceback.format_exc()) resolution = "" try: if entry['width'] is not None and entry['height'] is not None: resolution = scrapertools.safe_unicode(" ("+str(entry['width'])+"x"+str(entry['height'])+")").encode('utf-8') except: import traceback logger.info(traceback.format_exc()) tag = "" try: if entry['acodec']=='none': tag=" (Solo video)" if entry['vcodec']=='none': tag=" (Solo audio)" if config.get_setting("youtube_special_formats")=="false" and tag<>"": continue except: import traceback logger.info(traceback.format_exc()) video_urls.append([ extension+resolution+tag , scrapertools.safe_unicode(entry['url']).encode('utf-8')]) video_urls.reverse() return video_urls