Beispiel #1
0
    def __init__(self, pid, grabber, data):

        super(Elem, self).__init__()

        self.pid           = pid
        self.title         = data.findtext("titolo")
        self.description   = data.findtext("descrizione")
        self.channel       = data.findtext("dominio")
        self.grabber       = grabber
        strTime            = data.findtext("datapubblicazione")

        strTime            = strTime.replace("-", "/")
        self.datetime      = datetime.datetime.strptime(strTime, "%d/%m/%Y")

        # extra experimental data
        h264               = data.findtext("h264")
        Utils.addH264Url(self.h264, 0, h264)

        self.ts            = data.findtext("m3u8")

        self.id            = data.findtext("localid")
        self.length        = data.findtext("durata")
        web =  data.findtext("web")
        if not web:
            web = RAIUrls.getWebFromID(self.id)
        self.url           = RAIUrls.base + web

        self.filename      = Utils.makeFilename(self.title)

        self.canFollow     = True
Beispiel #2
0
def download(db, grabber, term, downType):
    dataUrl = RAIUrls.getSearchUrl(term, 100)

    folder = Config.searchFolder
    localFilename = os.path.join(folder, term + ".json")
    f = Utils.download(grabber, None, dataUrl, localFilename, downType, "utf-8")

    process(grabber, f, db)
Beispiel #3
0
def download(db, grabber, term, downType):
    dataUrl = RAIUrls.getSearchUrl(term, 100)

    folder = Config.searchFolder
    localFilename = os.path.join(folder, term + ".json")
    f = Utils.download(grabber, None, dataUrl, localFilename, downType,
                       "utf-8")

    process(grabber, f, db)
Beispiel #4
0
    def __init__(self, grabber, url, downType, pid):
        super(Demand, self).__init__()

        self.grabber = grabber

        parts = urllib.parse.urlparse(url)
        if not parts.scheme:
            url = RAIUrls.getItemUrl(url)

        self.url = url
        self.pid = pid

        folder = Config.itemFolder
        localFilename = os.path.join(folder, Utils.httpFilename(self.url))

        f = Utils.download(grabber, None, self.url, localFilename, downType,
                           "utf-8")

        parser = VideoHTMLParser()
        parser.feed(f.read())

        self.values = parser.values

        self.channel = "item"
        self.title = self.values.title
        self.ts = self.values.videoUrlM3U8

        Utils.addH264Url(self.h264, 0, self.values.videoUrlH264)

        if self.values.date:
            self.datetime = datetime.datetime.strptime(self.values.date,
                                                       "%d/%m/%Y")

        self.mms = None

        if self.values.type and self.values.type != "Video":
            # this is a case of a Photogallery
            self.url = None
            self.filename = None
            return

        if not self.values.videoUrl:
            self.values.videoUrl = self.values.videoPath

        #sometimes we get .mp4 which does not work
        self.values.videoUrl = self.values.videoUrl.replace(
            "relinkerServlet.mp4", "relinkerServlet.htm")

        #make a nice filename
        self.filename = Utils.makeFilename(self.title)

        self.mms = self.values.videoUrl
Beispiel #5
0
    def handle_starttag(self, tag, attrs):
        if tag == "meta":
            val = self.extract(attrs, "videourl")
            if val:
                self.values.videoUrl = val

            val = self.extract(attrs, "videourl_mp4")
            if val:
                self.values.videoUrlMP4 = val

            val = self.extract(attrs, "videourl_h264")
            if val:
                self.values.videoUrlH264 = val

            val = self.extract(attrs, "videourl_m3u8")
            if val:
                self.values.videoUrlM3U8 = val

            val = self.extract(attrs, "title")
            if val:
                self.values.title = val

            val = self.extract(attrs, "programmaTV")
            if val:
                self.values.program = val

            val = self.extract(attrs, "description")
            if val:
                self.values.description = val

            val = self.extract(attrs, "tipo")
            if val:
                self.values.type = val

            val = self.extract(attrs, "itemDate")
            if val:
                self.values.date = val

            val = self.extract(attrs, "idPageProgramma")
            if val:
                self.values.page = RAIUrls.base + RAIUrls.getWebFromID(val)

        elif tag == "param":
            if len(attrs) > 0:
                if attrs[0][0] == "value":
                    path = attrs[0][1]
                    if path.find("videoPath") == 0:
                        firstEqual = path.find("=")
                        firstComma = path.find(",")
                        self.values.videoPath = path[firstEqual + 1: firstComma]
Beispiel #6
0
    def handle_starttag(self, tag, attrs):
        if tag == "meta":
            val = self.extract(attrs, "videourl")
            if val:
                self.values.videoUrl = val

            val = self.extract(attrs, "videourl_mp4")
            if val:
                self.values.videoUrlMP4 = val

            val = self.extract(attrs, "videourl_h264")
            if val:
                self.values.videoUrlH264 = val

            val = self.extract(attrs, "videourl_m3u8")
            if val:
                self.values.videoUrlM3U8 = val

            val = self.extract(attrs, "title")
            if val:
                self.values.title = val

            val = self.extract(attrs, "programmaTV")
            if val:
                self.values.program = val

            val = self.extract(attrs, "description")
            if val:
                self.values.description = val

            val = self.extract(attrs, "tipo")
            if val:
                self.values.type = val

            val = self.extract(attrs, "itemDate")
            if val:
                self.values.date = val

            val = self.extract(attrs, "idPageProgramma")
            if val:
                self.values.page = RAIUrls.base + RAIUrls.getWebFromID(val)

        elif tag == "param":
            if len(attrs) > 0:
                if attrs[0][0] == "value":
                    path = attrs[0][1]
                    if path.find("videoPath") == 0:
                        firstEqual = path.find("=")
                        firstComma = path.find(",")
                        self.values.videoPath = path[firstEqual + 1:firstComma]
Beispiel #7
0
def processPage(grabber, progress, folder, f, db, downType):
    root = ElementTree.parse(f).getroot().find('menu')
    for e in root:
        if e.tag == "item" and e.attrib.get("id") == "video":
            path = e.find("src").attrib.get("path")
            url = RAIUrls.getJuniorBlock(path)

            name = Utils.httpFilename(url)
            localFilename = os.path.join(folder, name)

            g = Utils.download(grabber, progress, url, localFilename, downType, None, True)

            if g:
                processBlock(grabber, progress, folder, g, db, downType)
Beispiel #8
0
    def __init__(self, grabber, url, downType, pid):
        super(Demand, self).__init__()

        self.grabber = grabber

        parts = urllib.parse.urlparse(url)
        if not parts.scheme:
            url = RAIUrls.getItemUrl(url)

        self.url = url
        self.pid = pid

        folder = Config.itemFolder
        localFilename = os.path.join(folder, Utils.httpFilename(self.url))

        f = Utils.download(grabber, None, self.url, localFilename, downType, "utf-8")

        parser = VideoHTMLParser()
        parser.feed(f.read())

        self.values = parser.values

        self.channel = "item"
        self.title = self.values.title
        self.ts = self.values.videoUrlM3U8

        Utils.addH264Url(self.h264, 0, self.values.videoUrlH264)

        if self.values.date:
            self.datetime = datetime.datetime.strptime(self.values.date, "%d/%m/%Y")

        self.mms = None

        if self.values.type and self.values.type != "Video":
            # this is a case of a Photogallery
            self.url = None
            self.filename = None
            return

        if not self.values.videoUrl:
            self.values.videoUrl = self.values.videoPath

        #sometimes we get .mp4 which does not work
        self.values.videoUrl = self.values.videoUrl.replace("relinkerServlet.mp4", "relinkerServlet.htm")

        #make a nice filename
        self.filename = Utils.makeFilename(self.title)

        self.mms = self.values.videoUrl
Beispiel #9
0
def process(grabber, progress, folder, f, db, downType):
    root = ElementTree.parse(f).getroot()

    for e in root:
        if e.tag == "elemento":
            uniqueNameNode = e.find("uniqueName")
            if uniqueNameNode is not None:
                uniqueName = uniqueNameNode.text
                if uniqueName:
                    url = RAIUrls.getJuniorPage(uniqueName)
                    name = Utils.httpFilename(url)

                    localFilename = os.path.join(folder, name)

                    g = Utils.download(grabber, progress, url, localFilename, downType, None, True)

                    if g:
                        processPage(grabber, progress, folder, g, db, downType)
Beispiel #10
0
def download(db, grabber, url, downType):
    page = Utils.httpFilename(url)
    page = os.path.splitext(page)[0]

    dataUrl = RAIUrls.getPageDataUrl(page)

    folder = Config.pageFolder
    localFilename = os.path.join(folder, page + ".xml")
    f = Utils.download(grabber, None, dataUrl, localFilename, downType, "utf-8")

    # ElementTree does not like unicode, it prefers byte strings
    s = f.read().strip()
    s = Utils.removeInvalidXMLCharacters(s)
    root = ElementTree.fromstring(s)

    for child in root.findall("content"):
        pid = Utils.getNewPID(db, None)
        it = Elem(pid, grabber, child)
        Utils.addToDB(db, it)