def downloadMMS(self, folder, options, grabber): mms = Utils.getMMSUrl(self.grabber, self.mms) try: import libmimms.core localFilename = os.path.join(folder, self.filename + ".wmv") if (not options.overwrite) and os.path.exists(localFilename): print("{0} already there as {1}".format( self.pid, localFilename)) return opt = Utils.Obj() opt.quiet = False opt.url = mms opt.resume = False opt.bandwidth = 1e6 opt.filename = localFilename opt.clobber = True opt.time = 0 libmimms.core.download(opt) except ImportError: print("\nMissing libmimms.\nCannot download: {0}.".format(mms))
def __init__(self, grabber, url, channel, date, pid, title, desc, h264, m3u8, wmv): super(Program, self).__init__() self.url = url self.pid = pid self.title = title self.description = desc self.channel = channel if date: self.datetime = datetime.datetime.strptime(date, "%d/%m/%Y") else: self.datetime = datetime.datetime.now() Utils.addH264Url(self.h264, 0, h264) if m3u8: self.ts = m3u8 if wmv: self.mms = wmv self.grabber = grabber self.filename = Utils.makeFilename(self.title) self.canFollow = True
def __init__(self, pid, grabber, data): super(Elem, self).__init__() self.pid = pid self.title = data.findtext("titolo") self.description = data.findtext("descrizione") self.channel = data.findtext("dominio") self.grabber = grabber strTime = data.findtext("datapubblicazione") strTime = strTime.replace("-", "/") self.datetime = datetime.datetime.strptime(strTime, "%d/%m/%Y") # extra experimental data h264 = data.findtext("h264") Utils.addH264Url(self.h264, 0, h264) self.ts = data.findtext("m3u8") self.id = data.findtext("localid") self.length = data.findtext("durata") web = data.findtext("web") if not web: web = RAIUrls.getWebFromID(self.id) self.url = RAIUrls.base + web self.filename = Utils.makeFilename(self.title) self.canFollow = True
def display(self, width): print("=" * width) print("PID:", self.pid) print("Channel:", self.channel) print("Title:", self.title) if self.description: print("Description:", self.description) if self.datetime: print("Date:", Utils.strDate(self.datetime)) if self.length: print("Length:", self.length) if self.filename: print("Filename:", self.filename) print() if self.canFollow: print("Follow: ENABLED") print() m3 = self.getTabletPlaylist() Utils.displayH264(self.getH264()) if self.getTS() or self.mms: if self.getTS(): print("ts:", self.getTS()) if self.mms: print("mms:", self.mms) print() Utils.displayM3U8(m3)
def process(grabber, f, db): o = json.load(f) for v in o: pid = Utils.getNewPID(db, None) p = Group(grabber, pid, v["title"], v["linkDemand"], v["date"], v["editore"]) Utils.addToDB(db, p)
def download(db, grabber, downType): progress = Utils.getProgress() name = Utils.httpFilename(RAIUrls.info) folder = Config.tgFolder localName = os.path.join(folder, name) f = Utils.download(grabber, progress, RAIUrls.info, localName, downType, "utf-8", True) process(grabber, progress, downType, f, db)
def processItem(grabber, progress, downType, title, time, url, db): folder = Config.tgFolder name = Utils.httpFilename(url) localName = os.path.join(folder, name) f = Utils.download(grabber, progress, url, localName, downType, "utf-8", True) if f: processSet(grabber, title, time, f, db)
def download(db, grabber, downType): page = Utils.httpFilename(RAIUrls.junior) folder = Config.juniorFolder localFilename = os.path.join(folder, page) progress = Utils.getProgress() f = Utils.download(grabber, progress, RAIUrls.junior, localFilename, downType, None, True) process(grabber, progress, folder, f, db, downType)
def getH264(self): if self.h264: return self.h264 content = Utils.getStringFromUrl(self.grabber, self.url) root = ElementTree.fromstring(content) if root.tag == "smil": url = root.find("body").find("switch").find("video").attrib.get("src") Utils.addH264Url(self.h264, 0, url) return self.h264
def download(db, grabber, downType): page = Utils.httpFilename(RAIUrls.onDemand) folder = Config.demandFolder localFilename = os.path.join(folder, page) progress = Utils.getProgress() f = Utils.download(grabber, progress, RAIUrls.onDemand, localFilename, downType, "raw-unicode-escape", True) process(grabber, f, db)
def extractH264Ext(value): res = {} reg = "^h264_(\d+)" for k in value: m = re.match(reg, k) url = value[k] if m and url: bwidth = int(m.group(1)) Utils.addH264Url(res, bwidth, url) return res
def __init__(self, grabber, url, downType, pid): super(Demand, self).__init__() self.grabber = grabber parts = urllib.parse.urlparse(url) if not parts.scheme: url = RAIUrls.getItemUrl(url) self.url = url self.pid = pid folder = Config.itemFolder localFilename = os.path.join(folder, Utils.httpFilename(self.url)) f = Utils.download(grabber, None, self.url, localFilename, downType, "utf-8") parser = VideoHTMLParser() parser.feed(f.read()) self.values = parser.values self.channel = "item" self.title = self.values.title self.ts = self.values.videoUrlM3U8 Utils.addH264Url(self.h264, 0, self.values.videoUrlH264) if self.values.date: self.datetime = datetime.datetime.strptime(self.values.date, "%d/%m/%Y") self.mms = None if self.values.type and self.values.type != "Video": # this is a case of a Photogallery self.url = None self.filename = None return if not self.values.videoUrl: self.values.videoUrl = self.values.videoPath #sometimes we get .mp4 which does not work self.values.videoUrl = self.values.videoUrl.replace( "relinkerServlet.mp4", "relinkerServlet.htm") #make a nice filename self.filename = Utils.makeFilename(self.title) self.mms = self.values.videoUrl
def download(db, grabber, downType): progress = Utils.getProgress() for channel in channels: url = getCatalogueUrl(channel) name = Utils.httpFilename(url) + "." + channel folder = Config.m6Folder localName = os.path.join(folder, name) f = Utils.download(grabber, progress, url, localName, downType, "utf-8", True) if (f): process(grabber, downType, f, channel, db)
def processPage(grabber, progress, folder, f, db, downType): root = ElementTree.parse(f).getroot().find('menu') for e in root: if e.tag == "item" and e.attrib.get("id") == "video": path = e.find("src").attrib.get("path") url = RAIUrls.getJuniorBlock(path) name = Utils.httpFilename(url) localFilename = os.path.join(folder, name) g = Utils.download(grabber, progress, url, localFilename, downType, None, True) if g: processBlock(grabber, progress, folder, g, db, downType)
def download(db, grabber, downType): progress = Utils.getProgress() folder = Config.tf1Folder localName = os.path.join(folder, "news.json") f = Utils.download(grabber, progress, newsUrl, localName, downType, "utf-8", True) processNews(grabber, f, folder, progress, downType, db) localName = os.path.join(folder, "programs.json") f = Utils.download(grabber, progress, programsUrl, localName, downType, "utf-8", True) processPrograms(grabber, f, folder, progress, downType, db)
def __init__(self, grabber, url, downType, pid): super(Demand, self).__init__() self.grabber = grabber parts = urllib.parse.urlparse(url) if not parts.scheme: url = RAIUrls.getItemUrl(url) self.url = url self.pid = pid folder = Config.itemFolder localFilename = os.path.join(folder, Utils.httpFilename(self.url)) f = Utils.download(grabber, None, self.url, localFilename, downType, "utf-8") parser = VideoHTMLParser() parser.feed(f.read()) self.values = parser.values self.channel = "item" self.title = self.values.title self.ts = self.values.videoUrlM3U8 Utils.addH264Url(self.h264, 0, self.values.videoUrlH264) if self.values.date: self.datetime = datetime.datetime.strptime(self.values.date, "%d/%m/%Y") self.mms = None if self.values.type and self.values.type != "Video": # this is a case of a Photogallery self.url = None self.filename = None return if not self.values.videoUrl: self.values.videoUrl = self.values.videoPath #sometimes we get .mp4 which does not work self.values.videoUrl = self.values.videoUrl.replace("relinkerServlet.mp4", "relinkerServlet.htm") #make a nice filename self.filename = Utils.makeFilename(self.title) self.mms = self.values.videoUrl
def display(grabber, width, password): rai = Utils.getStringFromUrl(grabber, userLocation) ip = Utils.getStringFromUrl(grabber, userIP) tor = Utils.getTorExitNodes(password) print("=" * width) print("Root folder:", Config.rootFolder) print("ExitNodes: ", tor) print("Location: ", Config.programFolder) print("RAI: ", rai) print("IP: ", ip) print()
def downloadItems(grabber, url, which, conf, folder, progress, downType, db): name = Utils.httpFilename(url) localName = os.path.join(folder, name) f = Utils.download(grabber, progress, url, localName, downType, "utf-8", True) if f: if which == FULL_VIDEO: processFullVideo(grabber, f, "episodi_interi", conf, folder, progress, downType, db) elif which == PROGRAM_LIST: processProgramList(grabber, f, conf, folder, progress, downType, db) elif which == PROGRAM: processProgram(grabber, f, conf, folder, progress, downType, db) elif which == PROGRAM_VIDEO: processFullVideo(grabber, f, "brand", conf, folder, progress, downType, db)
def parseItem(grabber, prog, db): pid = prog["id_diffusion"] date = prog["date"] hour = prog["heure"] url = baseUrl + prog["url_video"] desc = prog["accroche"] channel = prog["chaine"] name = prog["titre"] minutes = prog["duree"] length = datetime.timedelta(minutes = int(minutes)) pid = Utils.getNewPID(db, pid) p = Program(grabber, channel, date, hour, pid, length, name, desc, url) Utils.addToDB(db, p)
def parseItem(grabber, prog, db): pid = prog["id_diffusion"] date = prog["date"] hour = prog["heure"] url = baseUrl + prog["url_video"] desc = prog["accroche"] channel = prog["chaine"] name = prog["titre"] minutes = prog["duree"] length = datetime.timedelta(minutes=int(minutes)) pid = Utils.getNewPID(db, pid) p = Program(grabber, channel, date, hour, pid, length, name, desc, url) Utils.addToDB(db, p)
def process(grabber, f, db): o = json.load(f) for prog in o["list"]: url = prog["weblink"] h264 = prog["h264"] m3u8 = prog["m3u8"] wmv = prog["wmv"] title = prog["name"] date = prog["date"] description = prog["desc"] channel = "search" pid = Utils.getNewPID(db, None) p = Program(grabber, url, channel, date, pid, title, description, h264, m3u8, wmv) Utils.addToDB(db, p)
def downloadMMS(self, folder, options, grabber): mms = Utils.getMMSUrl(self.grabber, self.mms) try: import libmimms.core localFilename = os.path.join(folder, self.filename + ".wmv") if (not options.overwrite) and os.path.exists(localFilename): print("{0} already there as {1}".format(self.pid, localFilename)) return opt = Utils.Obj() opt.quiet = False opt.url = mms opt.resume = False opt.bandwidth = 1e6 opt.filename = localFilename opt.clobber = True opt.time = 0 libmimms.core.download(opt) except ImportError: print("\nMissing libmimms.\nCannot download: {0}.".format(mms))
def download(db, grabber, term, downType): dataUrl = RAIUrls.getSearchUrl(term, 100) folder = Config.searchFolder localFilename = os.path.join(folder, term + ".json") f = Utils.download(grabber, None, dataUrl, localFilename, downType, "utf-8") process(grabber, f, db)
def processBlock(grabber, progress, folder, f, db, downType): h = html.parser.HTMLParser() root = ElementTree.parse(f).getroot() group = root.find("label").text group = h.unescape(group) categoria = root.findall('categoria') for e in categoria: video = e.find("video") if video is not None: name = e.find("label").text name = h.unescape(name) path = video.text pid = Utils.getNewPID(db, None) item = Item(pid, grabber, path, downType, group, name) Utils.addToDB(db, item)
def parseItem(grabber, prog, name, db): pid = str(prog["id"]) desc = prog["longTitle"] pubDate = prog["publicationDate"] duration = prog["duration"] name = name + " - " + prog["shortTitle"] wat = prog["watId"] category = prog["videoCategory"] length = datetime.timedelta(seconds=duration) date = datetime.datetime.strptime(pubDate, "%Y-%m-%d %H:%M:%S") # ignore the countless "extract", "bonus", "short" which last just a few minutes if category == "fullvideo": pid = Utils.getNewPID(db, pid) p = Program(grabber, date, length, pid, name, desc, wat, category) Utils.addToDB(db, p)
def getTS(self): if self.ts: return self.ts folder = Config.m6Folder name = Utils.httpFilename(self.url) localName = os.path.join(folder, name) progress = Utils.getProgress() f = Utils.download(self.grabber, progress, self.url, localName, self.downType, "utf-8", True) if (f): root = ElementTree.parse(f).getroot() asset = root.find("asset") for v in asset.findall("assetItem"): u = v.find("url").text self.ts = getTSUrl(u) return self.ts
def download(db, grabber, downType): progress = Utils.getProgress() name = Utils.httpFilename(infoUrl) folder = Config.pluzzFolder localName = os.path.join(folder, name) f = Utils.download(grabber, progress, infoUrl, localName, downType, None, True) z = zipfile.ZipFile(f, "r") decoder = codecs.getreader("ascii") for a in z.namelist(): if a.find("catch_up_") == 0: with z.open(a) as f: process(grabber, decoder(f), db)
def short(self, fmt): if self.datetime: ts = Utils.strDate(self.datetime) else: ts = None str1 = fmt.format(self.pid, ts, self.channel, self.title) return str1
def process(grabber, progress, folder, f, db, downType): root = ElementTree.parse(f).getroot() for e in root: if e.tag == "elemento": uniqueNameNode = e.find("uniqueName") if uniqueNameNode is not None: uniqueName = uniqueNameNode.text if uniqueName: url = RAIUrls.getJuniorPage(uniqueName) name = Utils.httpFilename(url) localFilename = os.path.join(folder, name) g = Utils.download(grabber, progress, url, localFilename, downType, None, True) if g: processPage(grabber, progress, folder, g, db, downType)
def __init__(self, pid, grabber, title, description, date, length, url, h264, ts, mms): super(Episode, self).__init__() self.pid = pid self.grabber = grabber self.title = title self.description = description self.url = url self.datetime = datetime.datetime.strptime(date, "%d-%m-%Y") self.length = length Utils.addH264Url(self.h264, 0, h264) self.ts = ts self.mms = mms self.filename = Utils.makeFilename(self.title)
def download(db, grabber, downType, mediasetType): progress = Utils.getProgress() name = Utils.httpFilename(configUrl) folder = Config.mediasetFolder localName = os.path.join(folder, name) f = Utils.download(grabber, progress, configUrl, localName, downType, None, True) s = f.read().strip() root = ElementTree.fromstring(s) conf = parseConfig(root) if mediasetType == "tg5": url = conf["FullVideoRequestUrl"].replace("http://ww.", "http://www.") downloadItems(grabber, url, FULL_VIDEO, conf, folder, progress, downType, db) else: url = conf["ProgramListRequestUrl"] downloadItems(grabber, url, PROGRAM_LIST, conf, folder, progress, downType, db)
def __init__(self, grabber, url, channel, date, pid, title, desc, h264, m3u8): super(Program, self).__init__() self.url = url self.pid = pid self.title = title self.description = desc self.channel = channel strtime = date.replace("-", "/") self.datetime = datetime.datetime.strptime(strtime, "%d/%m/%Y %H:%M") Utils.addH264Url(self.h264, 0, h264) if m3u8: self.ts = m3u8 self.grabber = grabber name = Utils.makeFilename(self.title) self.filename = name + "-" + self.datetime.strftime("%Y-%m-%d") self.canFollow = True
def process(grabber, downType, f, channel, db): root = ElementTree.parse(f).getroot() clpList = root.find("clpList") for clp in clpList: k = clp.get("id") title = clp.find("programName").text + " - " + clp.find("clpName").text desc = clp.find("desc").text date = clp.find("antennaDate").text if not date: date = clp.find("publiDate").text seconds = clp.find("duration").text length = datetime.timedelta(seconds = int(seconds)) pid = Utils.getNewPID(db, k) p = Program(grabber, downType, channel, date, pid, k, length, title, desc) Utils.addToDB(db, p)
def process(grabber, downType, f, channel, db): root = ElementTree.parse(f).getroot() clpList = root.find("clpList") for clp in clpList: k = clp.get("id") title = clp.find("programName").text + " - " + clp.find("clpName").text desc = clp.find("desc").text date = clp.find("antennaDate").text if not date: date = clp.find("publiDate").text seconds = clp.find("duration").text length = datetime.timedelta(seconds=int(seconds)) pid = Utils.getNewPID(db, k) p = Program(grabber, downType, channel, date, pid, k, length, title, desc) Utils.addToDB(db, p)
def download(db, grabber, downType): progress = Utils.getProgress() today = datetime.date.today() folder = Config.replayFolder for x in range(1, 8): day = today - datetime.timedelta(days=x) strDate = day.strftime("_%Y_%m_%d") for channel in channels.values(): filename = channel + strDate + ".html" url = RAIUrls.replay + "/" + filename localName = os.path.join(folder, filename) f = Utils.download(grabber, progress, url, localName, downType, "utf-8") if f: process(grabber, f, db)
def getTabletPlaylist(self): if self.m3: return self.m3 ts = self.getTS() if ts: try: self.m3 = Utils.load_m3u8_from_url(self.grabber, ts) except urllib.error.HTTPError: pass return self.m3
def processNews(grabber, f, folder, progress, downType, db): o = json.load(f) for prog in o: name = prog["programName"] groupId = prog["programId"] downloadGroup(grabber, name, groupId, folder, progress, downType, db) # this group contains the info of the most recent Item # we add an other item with the group name # some info will still be missing title = prog["title"] wat = prog["linkAttributes"]["watId"] category = prog["linkAttributes"]["videoCategory"] pid = Utils.getNewPID(db, groupId) p = Program(grabber, datetime.datetime.now(), None, pid, name, title, wat, category) Utils.addToDB(db, p)
def processEpisode(grabber, e, db): h = html.parser.HTMLParser() title = e.attrib.get("name") date = e.attrib.get("createDate") url = e.attrib.get("uniquename") units = e.find("units") length = None description = None for u in units.findall("textUnit"): typ = u.attrib.get("type") if typ == "Durata": length = u.find("text").text elif typ == "Testo breve": description = u.find("text").text if description: description = h.unescape(description) video = units.find("videoUnit") if video == None: # no video, skip this episode return mms = video.find("url").text h264 = None ts = None for a in video.find("attributes").findall("attribute"): key = a.find("key").text value = a.find("value").text if key == "m3u8": ts = value elif key == "h264": h264 = value pid = Utils.getNewPID(db, None) item = Episode(pid, grabber, title, description, date, length, url, h264, ts, mms) Utils.addToDB(db, item)
def parseItem(grabber, channel, date, time, value, db): name = value["t"] desc = value["d"] secs = value["l"] length = None if secs != "": length = datetime.timedelta(seconds=int(secs)) h264 = extractH264Ext(value) # if the detailed h264 is not found, try with "h264" if not h264: single = value["h264"] Utils.addH264Url(h264, 0, single) # sometimes RAI puts the same url for h264 and TS # normally this is only a valid h264, # so we skip it in TS h264Urls = h264.values() tablet = value["urlTablet"] if tablet in h264Urls: tablet = None smartPhone = value["urlSmartPhone"] if smartPhone in h264Urls: smartPhone = None pid = value["i"] if h264 or tablet or smartPhone: pid = Utils.getNewPID(db, pid) p = Program(grabber, channels[channel], date, time, pid, length, name, desc, h264, tablet, smartPhone) Utils.addToDB(db, p)