Esempio n. 1
0
    def parseTitle(self, article, pos, baseUrl):
        aMatch = aPattern.search(article, pos)
        title = None
        url = None
        if aMatch is not None:
            url = aMatch.group(1).strip()
            pos = aMatch.end(0)
            i = pos
            j = article.find('</a>', i)
            # check for '<span class="arrowhover ...'
            k = article.find('<span class="arrowhover', i)
            if k != -1 and k < j:
                j = k
            title = article[i:j]
            title = cleanTags(title)
            title = title.strip()
            pos = j + len('</a>')

        self.title = stripHtml(title)
        self.url = url
        self.contentName = None
        if url is not None:
            if baseUrl is not None and url[0:len(baseUrl)] == baseUrl:
                self.url = url[len(baseUrl):]
            i = url.rfind('.')
            if i != -1:
                self.contentName = '/zdf' + url[0:i]
        return pos
Esempio n. 2
0
    def parseTeaserInfo(self, article, pos, pattern=teaserInfoPattern):

        teaserInfoMatch = pattern.search(article, pos)
        playable = False
        duration = None
        season = None
        episode = None
        genre = None
        if teaserInfoMatch is not None:
            teaserInfo = getTag('dd', article, teaserInfoMatch)
            isTiviMatch = teaserInfoIsTiviPattern.search(teaserInfo)
            if isTiviMatch is not None:
                teaserInfo = teaserInfo[0:isTiviMatch.start(0)]
            teaserInfo = cleanTags(teaserInfo)
            sep = u'\xb7'.encode('utf-8')
            parts = teaserInfo.split(sep)
            for part in parts:
                part = part.strip()
                partMatch = teaserInfoDurationPattern.search(part)
                if partMatch is not None:
                    duration = partMatch.group(1)
                else:
                    partMatch = teaserInfoEpisodePattern.search(part)
                    if partMatch is not None:
                        season = partMatch.group(1)
                        episode = partMatch.group(2)
                    else:
                        genre = part

            if duration is not None and duration.isdigit():
                duration = int(duration) * 60
                playable = True

            pos = teaserInfoMatch.end(0)

        if not self.playable and playable:
            self.playable = playable
        self.duration = duration
        if self.genre is None:
            self.genre = genre
        self.season = season
        self.episode = episode
        return pos
Esempio n. 3
0
    def parseLabel(self, article, pos):
        labelMatch = labelPattern.search(article, pos)
        label = None
        type = None
        if labelMatch is not None:
            labelTags = getTag('div', article, labelMatch)
            iconMatch = iconPattern.search(labelTags)
            if iconMatch is not None:
                type = iconMatch.group(1)
            i = labelTags.find('>') + len('>')
            j = labelTags.rfind('</div>')
            pos = j + len('</div>')
            label = labelTags[i:j]
            label = stripTag('abbr', label)
            label = cleanTags(label)
            label = label.strip()

        self.label = stripHtml(label)
        self.type = type
        return pos
Esempio n. 4
0
    def parseTitle(self, article, pos, baseUrl):
        aMatch = aPattern.search(article, pos)
        title = None
        url = None
        playable = False
        if aMatch is not None:
            url = aMatch.group(1).strip()        
            pos = aMatch.end(0)
            i = pos
            iconMatch = titleIconPattern.search(article, pos)
            if iconMatch is not None:    
                playable =  iconMatch.group(1) == 'play'
                i = article.find('</span>', pos) + len('</span>')

            j = article.find('</a>', i)
            # check for '<span class="arrowhover ...'
            k = article.find('<span class="arrowhover', i)
            if k != -1 and k < j:
                j = k
            title = article[i:j]
            title = cleanTags(title)
            title = title.strip()
            pos = j + len('</a>') 
    
        self.title = stripHtml(title)
        self.url = url
        self.playable = playable
        self.contentName = None
        if url is not None:
            if baseUrl is not None and url[0:len(baseUrl)] == baseUrl:
                self.url = url[len(baseUrl):]
            i = url.rfind('.')
            if i != -1:
                j = url.rfind('/')
                if j != -1:
                    self.contentName = url[j+1:i]
        return pos