Example #1
0
 def checkPage(self, url):
     if url.data.find('<title>How to Enable Cookies</title>') > 0:
         # reconnect and set cookie through it
         url.clear_connection()
         url.setCacheWriteOnly()
     else:
         imgUrl = textextract(url.data, 'src="/captcha/?rnd=', '"')
         if imgUrl:
             url.clear_connection()
             url.setCacheWriteOnly()
             imgUrl = textextract(url.data, 'src="/captcha/?rnd=', '"')
             if imgUrl:
                 log.error("as i said.. a captcha")
                 log.error("please visit http://www.eliteanimes.com/ and enter the captcha and you won't be bothered again")
                 # TODO crack this captcha and return a new url object
                 imgUrl = 'http://www.eliteanimes.com/captcha/?rnd='+imgUrl
                 url = UrlMgr(url=imgUrl, cache_writeonly=True)
                 import sys
                 sys.exit()
     return url
Example #2
0
    def get(self):
        link = self.link
        url = UrlMgr(url=link, cookies=self.cookies, encoding='utf-8')
        name = textextract(textextract(url.data, '<h2>','</h2>'), ' :: ', '</span>')
        media = self.getMedia(name, link)

        if not media:
            return None

        season = 0

        # there is no season information on that page :/
        # look if it is a tvshow by that string and just assume a season
        if "Anime-Serie ::" in url.data:
            season = 1

        root = html.fromstring(url.data)
        try:
            listTable = root.get_element_by_id('partlist')
        except KeyError: # TODO take a more specific exception
            log.error("no partlist table inside data")
            log.error(link)
            log.error(url.data)
            return None
        for row in listTable.iterfind(".//tr[@class='link']"):
            part = media.createSub()
            part.season = season
            curCol = 0
            for column in row.iterfind("td"):
                curCol += 1
                if curCol == 1:
                    part.num = column.text
                elif curCol == 2:
                    part.name = column.text
                elif curCol == 5: # download links
                    pass
                elif curCol == 6: # stream links
                    dlTable = column.find(".//table[@class='dltable']")
                    if dlTable is None:
                        dlTable = column.find(".//table[@class='list']")
                        if dlTable is None:
                            log.error("no downloadtable in %s", link)
                            continue
                    # they use streamCurCol == 4 with the content "Part 1", "Part 2" etc to name this
                    # but sometimes streamCurCol == 4 would be the size.. so it is quite complicated
                    hasMultipleParts = False
                    for streamRow in dlTable.iterfind(".//tr[@class='medialink']"):
                        if hasMultipleParts:
                            rowString = etree.tostring(streamRow)
                            # create an alternative if that row has no "Part XYZ" inside it
                            # or if that row is Part1/Part 1
                            if rowString.find("Part") == -1 or rowString.find("Part 1") != -1 or rowString.find("Part1") != -1:
                                alternative = part.createSub()
                        else:
                            alternative = part.createSub()
                        streamCurCol = 0
                        hasMultipleParts = False
                        for streamColumn in streamRow.iterfind("td"):
                            streamCurCol += 1
                            streamColumnString = etree.tostring(streamColumn)
                            if streamCurCol == 1:
                                tmp = re.search("hoster/(.*?)\.png", streamColumnString)
                                if tmp:
                                    hoster = tmp.group(1)
                                    alternative.hoster = hoster
                                alternativePart = alternative.createSub()
                                redirectUrl = re.search("a href=\"(.*?)\"", streamColumnString)
                                if redirectUrl:
                                    alternativePart.url = redirectUrl.group(1)
                                else:
                                    continue
                                flv_type = re.search('src="images/hoster/(.*?).png"', streamColumnString)
                                if flv_type:
                                    alternativePart.flv_type = flv_type.group(1)
                            if streamCurCol == 2:
                                # there can exist multiple langs but i take just one
                                lang = re.search("lang/(..)\.png", streamColumnString)
                                if lang:
                                    lang = lang.group(1)
                                alternative.language = getLanguage(lang, 'de')
                            if streamCurCol == 3:
                                # there can exist multiple langs but i take just one
                                lang = re.search("lang/(..)\.png", streamColumnString)
                                if lang:
                                    lang = lang.group(1)
                                alternative.subtitle = getLanguage(lang)
                            if streamCurCol == 4:
                                try:
                                    size = int(streamColumn.text)
                                except:
                                    if streamColumn.text[:4] == "Part":
                                        # with the next part 1 we will create a new alternative
                                        hasMultipleParts = True
                                    else:
                                        log.warning("This media file might have multiple parts but not sure: %s", streamColumn.text)
                                else:
                                    alternativePart.size = size

        tags = []
        for i in ('Zielgruppe', 'Genres'):
            newTags = textextract(url.data, '<dt>'+i+'</dt>', '</dd>')
            if newTags:
                newTags = textextract(newTags, '<dd>', '')
                newTags = newTags.split(', ')
                tags.extend(newTags)
        year = textextract(url.data, '<dt>Jahr</dt>', '</dd>')
        year = textextract(year, '<dd>', '')
        try:
            media.year = int(year[:4])
        except ValueError:
            log.warning("Problem with year in %s", link)
        media.addTags(tags)
        return self.afterExtract(media)