def process(grabber, f, db): o = json.load(f) for v in o: pid = Utils.getNewPID(db, None) p = Group(grabber, pid, v["title"], v["linkDemand"], v["date"], v["editore"]) Utils.addToDB(db, p)
def processSet(grabber, title, time, f, db): o = json.load(f) channel = "TG" prog = o.get("integrale") if prog: url = prog["weblink"] h264 = prog["h264"] m3u8 = prog["m3u8"] if time == "LIS": # strange time for some TG3 time = "00:00" description = prog["name"] date = prog["date"] # the filed "date" seems to be always TODAY # so we might actually get a program from yesterday date = isThereADate(date, description) datetime = date + " " + time pid = Utils.getNewPID(db, None) p = Program(grabber, url, channel, datetime, pid, title, description, h264, m3u8) Utils.addToDB(db, p) else: # get the list lst = o.get("list") if not lst: return for prg in lst: tp = prg["type"] if tp != "empty": url = prg["weblink"] h264 = prg["h264"] m3u8 = prg["m3u8"] dt = prg["date"] + " 00:00" aTitle = title + "-" + prg["name"] description = prg["desc"] pid = Utils.getNewPID(db, None) p = Program(grabber, url, channel, dt, pid, aTitle, description, h264, m3u8) Utils.addToDB(db, p)
def parseItem(grabber, prog, db): pid = prog["id_diffusion"] date = prog["date"] hour = prog["heure"] url = baseUrl + prog["url_video"] desc = prog["accroche"] channel = prog["chaine"] name = prog["titre"] minutes = prog["duree"] length = datetime.timedelta(minutes = int(minutes)) pid = Utils.getNewPID(db, pid) p = Program(grabber, channel, date, hour, pid, length, name, desc, url) Utils.addToDB(db, p)
def parseItem(grabber, prog, db): pid = prog["id_diffusion"] date = prog["date"] hour = prog["heure"] url = baseUrl + prog["url_video"] desc = prog["accroche"] channel = prog["chaine"] name = prog["titre"] minutes = prog["duree"] length = datetime.timedelta(minutes=int(minutes)) pid = Utils.getNewPID(db, pid) p = Program(grabber, channel, date, hour, pid, length, name, desc, url) Utils.addToDB(db, p)
def process(grabber, f, db): o = json.load(f) for prog in o["list"]: url = prog["weblink"] h264 = prog["h264"] m3u8 = prog["m3u8"] wmv = prog["wmv"] title = prog["name"] date = prog["date"] description = prog["desc"] channel = "search" pid = Utils.getNewPID(db, None) p = Program(grabber, url, channel, date, pid, title, description, h264, m3u8, wmv) Utils.addToDB(db, p)
def processBlock(grabber, progress, folder, f, db, downType): h = html.parser.HTMLParser() root = ElementTree.parse(f).getroot() group = root.find("label").text group = h.unescape(group) categoria = root.findall('categoria') for e in categoria: video = e.find("video") if video is not None: name = e.find("label").text name = h.unescape(name) path = video.text pid = Utils.getNewPID(db, None) item = Item(pid, grabber, path, downType, group, name) Utils.addToDB(db, item)
def parseItem(grabber, prog, name, db): pid = str(prog["id"]) desc = prog["longTitle"] pubDate = prog["publicationDate"] duration = prog["duration"] name = name + " - " + prog["shortTitle"] wat = prog["watId"] category = prog["videoCategory"] length = datetime.timedelta(seconds=duration) date = datetime.datetime.strptime(pubDate, "%Y-%m-%d %H:%M:%S") # ignore the countless "extract", "bonus", "short" which last just a few minutes if category == "fullvideo": pid = Utils.getNewPID(db, pid) p = Program(grabber, date, length, pid, name, desc, wat, category) Utils.addToDB(db, p)
def download(db, grabber, url, downType): page = Utils.httpFilename(url) page = os.path.splitext(page)[0] dataUrl = RAIUrls.getPageDataUrl(page) folder = Config.pageFolder localFilename = os.path.join(folder, page + ".xml") f = Utils.download(grabber, None, dataUrl, localFilename, downType, "utf-8") # ElementTree does not like unicode, it prefers byte strings s = f.read().strip() s = Utils.removeInvalidXMLCharacters(s) root = ElementTree.fromstring(s) for child in root.findall("content"): pid = Utils.getNewPID(db, None) it = Elem(pid, grabber, child) Utils.addToDB(db, it)
def processFullVideo(grabber, f, tag, conf, folder, progress, downType, db): o = json.load(f) videos = o[tag]["video"] for v in videos: title = v["brand"]["value"] + " " + v["title"] desc = v["desc"] channel = v["channel"] date = datetime.datetime.strptime(v["date"], "%d/%m/%Y") length = v["duration"] num = v["id"] category = v["subbrand"]["name"] if category == "full": pid = Utils.getNewPID(db, num) p = Program(grabber, conf, date, length, pid, title, desc, num, channel) Utils.addToDB(db, p)
def process(grabber, downType, f, channel, db): root = ElementTree.parse(f).getroot() clpList = root.find("clpList") for clp in clpList: k = clp.get("id") title = clp.find("programName").text + " - " + clp.find("clpName").text desc = clp.find("desc").text date = clp.find("antennaDate").text if not date: date = clp.find("publiDate").text seconds = clp.find("duration").text length = datetime.timedelta(seconds = int(seconds)) pid = Utils.getNewPID(db, k) p = Program(grabber, downType, channel, date, pid, k, length, title, desc) Utils.addToDB(db, p)
def processNews(grabber, f, folder, progress, downType, db): o = json.load(f) for prog in o: name = prog["programName"] groupId = prog["programId"] downloadGroup(grabber, name, groupId, folder, progress, downType, db) # this group contains the info of the most recent Item # we add an other item with the group name # some info will still be missing title = prog["title"] wat = prog["linkAttributes"]["watId"] category = prog["linkAttributes"]["videoCategory"] pid = Utils.getNewPID(db, groupId) p = Program(grabber, datetime.datetime.now(), None, pid, name, title, wat, category) Utils.addToDB(db, p)
def process(grabber, downType, f, channel, db): root = ElementTree.parse(f).getroot() clpList = root.find("clpList") for clp in clpList: k = clp.get("id") title = clp.find("programName").text + " - " + clp.find("clpName").text desc = clp.find("desc").text date = clp.find("antennaDate").text if not date: date = clp.find("publiDate").text seconds = clp.find("duration").text length = datetime.timedelta(seconds=int(seconds)) pid = Utils.getNewPID(db, k) p = Program(grabber, downType, channel, date, pid, k, length, title, desc) Utils.addToDB(db, p)
def processEpisode(grabber, e, db): h = html.parser.HTMLParser() title = e.attrib.get("name") date = e.attrib.get("createDate") url = e.attrib.get("uniquename") units = e.find("units") length = None description = None for u in units.findall("textUnit"): typ = u.attrib.get("type") if typ == "Durata": length = u.find("text").text elif typ == "Testo breve": description = u.find("text").text if description: description = h.unescape(description) video = units.find("videoUnit") if video == None: # no video, skip this episode return mms = video.find("url").text h264 = None ts = None for a in video.find("attributes").findall("attribute"): key = a.find("key").text value = a.find("value").text if key == "m3u8": ts = value elif key == "h264": h264 = value pid = Utils.getNewPID(db, None) item = Episode(pid, grabber, title, description, date, length, url, h264, ts, mms) Utils.addToDB(db, item)
def parseItem(grabber, channel, date, time, value, db): name = value["t"] desc = value["d"] secs = value["l"] length = None if secs != "": length = datetime.timedelta(seconds=int(secs)) h264 = extractH264Ext(value) # if the detailed h264 is not found, try with "h264" if not h264: single = value["h264"] Utils.addH264Url(h264, 0, single) # sometimes RAI puts the same url for h264 and TS # normally this is only a valid h264, # so we skip it in TS h264Urls = h264.values() tablet = value["urlTablet"] if tablet in h264Urls: tablet = None smartPhone = value["urlSmartPhone"] if smartPhone in h264Urls: smartPhone = None pid = value["i"] if h264 or tablet or smartPhone: pid = Utils.getNewPID(db, pid) p = Program(grabber, channels[channel], date, time, pid, length, name, desc, h264, tablet, smartPhone) Utils.addToDB(db, p)
def parseItem(grabber, channel, date, time, value, db): name = value["t"] desc = value["d"] secs = value["l"] length = None if secs != "": length = datetime.timedelta(seconds = int(secs)) h264 = extractH264Ext(value) # if the detailed h264 is not found, try with "h264" if not h264: single = value["h264"] Utils.addH264Url(h264, 0, single) # sometimes RAI puts the same url for h264 and TS # normally this is only a valid h264, # so we skip it in TS h264Urls = h264.values() tablet = value["urlTablet"] if tablet in h264Urls: tablet = None smartPhone = value["urlSmartPhone"] if smartPhone in h264Urls: smartPhone = None pid = value["i"] if h264 or tablet or smartPhone: pid = Utils.getNewPID(db, pid) p = Program(grabber, channels[channel], date, time, pid, length, name, desc, h264, tablet, smartPhone) Utils.addToDB(db, p)
def follow(self, db, downType): pid = Utils.getNewPID(db, self.pid) p = Item.Demand(self.grabber, self.url, downType, pid) Utils.addToDB(db, p)