コード例 #1
0
    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            query = '%s' % (title)
            query = self.search_link % (urllib.quote_plus(query))
            query = urlparse.urljoin(self.base_link, query)
            cleanedTitle = cleantitle.get(title)

            result = client.request(query)

            result = result.decode('iso-8859-1').encode('utf-8')

            items = client.parseDOM(result, "item")

            for item in items:
                linkTitle = client.parseDOM(item, 'title')[0]

                try:
                    parsed = re.compile('(.+?) \((\d{4})\) ').findall(
                        linkTitle)[0]
                    parsedTitle = parsed[0]
                    parsedYears = parsed[1]
                except:
                    parsedTitle = ''
                    pass

                if cleanedTitle == cleantitle.get(parsedTitle):
                    url = client.parseDOM(item, "link")[0]
            return self.sources(client.replaceHTMLCodes(url))
        except Exception as e:
            logger.error('[%s] Exception : %s' % (self.__class__, e))
            pass
        return []
コード例 #2
0
ファイル: einthusan.py プロジェクト: nadzvee/ProjectSwan
    def scrape_movie(self, title, year, imdb, debrid = False):
        try:
            langMap = {'hi':'hindi', 'ta':'tamil', 'te':'telugu', 'ml':'malayalam', 'kn':'kannada', 'bn':'bengali', 'mr':'marathi', 'pa':'punjabi'}

            lang = 'http://www.imdb.com/title/%s/' % imdb
            lang = client.request(lang)
            lang = re.findall('href\s*=\s*[\'|\"](.+?)[\'|\"]', lang)
            lang = [i for i in lang if 'primary_language' in i]
            lang = [urlparse.parse_qs(urlparse.urlparse(i).query) for i in lang]
            lang = [i['primary_language'] for i in lang if 'primary_language' in i]
            lang = langMap[lang[0][0]]

            q = self.search_link % (lang, urllib.quote_plus(title))
            q = urlparse.urljoin(self.base_link, q)

            t = cleantitle.get(title)

            r = self.request(q)

            r = client.parseDOM(r, 'li')
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'h3'), client.parseDOM(i, 'div', attrs = {'class': 'info'})) for i in r]
            r = [(i[0][0], i[1][0], i[2][0]) for i in r if i[0] and i[1] and i[2]]
            r = [(re.findall('(\d+)', i[0]), i[1], re.findall('(\d{4})', i[2])) for i in r]
            r = [(i[0][0], i[1], i[2][0]) for i in r if i[0] and i[2]]
            r = [i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2]][0]

            url = str(r)
            return self.sources(client.replaceHTMLCodes(url))
        except Exception as e:
            logger.error('[%s] Exception : %s' % (self.__class__, e))
            pass

        return []
コード例 #3
0
    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            query = '%s %s' % (title, year)
            query = self.search_link % (urllib.quote_plus(query))
            query = urlparse.urljoin(self.base_link, query)

            result = client.request(query)

            result = result.decode('iso-8859-1').encode('utf-8')
            result = client.parseDOM(result, "item")

            cleanedTitle = cleantitle.get(title)

            for item in result:

                linkTitle = client.parseDOM(item, "title")[0]

                if cleanedTitle == cleantitle.get(linkTitle):
                    url = client.parseDOM(item, "link")[0]
                    break

            return self.sources(client.replaceHTMLCodes(url))

        except Exception as e:
            logger.error('[%s] Exception : %s' % (self.__class__, e))
            pass
        return []
コード例 #4
0
    def scrape_movie(self, title, year, imdb, debrid = False):
        try:
            t = cleantitle.get(title)

            try:
                query = '%s %s' % (title, year)
                query = base64.b64decode(self.search_link) % urllib.quote_plus(query)

                result = client.request(query)
                result = json.loads(result)['items']
                r = [(i['link'], i['title']) for i in result]
                r = [(i[0], re.compile('(.+?) [\d{4}|(\d{4})]').findall(i[1])) for i in r]
                r = [(i[0], i[1][0]) for i in r if len(i[1]) > 0]
                r = [x for y,x in enumerate(r) if x not in r[:y]]
                r = [i for i in r if t == cleantitle.get(i[1])]
                #u = [i[0] for i in r][0]
                if r == None:
                    raise Exception
            except:
                return


            return self.sources(r)
        except Exception as e:
            logger.error('[%s] Exception : %s' % (self.__class__, e))
            pass
        return []
コード例 #5
0
ファイル: rajtamil.py プロジェクト: nadzvee/ProjectSwan
    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            query = '%s %s' % (title, year)
            query = self.search_link % (urllib.quote_plus(query))
            query = urlparse.urljoin(self.base_link, query)

            result = client.request(query, error=True)

            items = client.parseDOM(result, "item")

            cleanedTitle = cleantitle.get(title)

            for item in items:
                linkTitle = client.parseDOM(item, "title")[0]

                if cleanedTitle in cleantitle.get(linkTitle):
                    url = client.parseDOM(item,
                                          "a",
                                          attrs={"rel": "nofollow"},
                                          ret="href")[0]

                    break

            return self.sources(client.replaceHTMLCodes(url))
        except Exception as e:
            logger.error('[%s] Exception : %s' % (self.__class__, e))
            pass
        return []
コード例 #6
0
    def scrape_episode(self,
                       title,
                       show_year,
                       year,
                       season,
                       episode,
                       imdb,
                       tvdb,
                       debrid=False):
        try:

            query = '%s %s' % (title, episode)
            query = self.search_link % (urllib.quote_plus(query))

            result = client.request(self.base_link + query)

            result = result.decode('iso-8859-1').encode('utf-8')

            result = result.replace('\n', '').replace('\t', '')

            items = client.parseDOM(result, 'item')

            cleanedTitle = cleantitle.get('%s %s' % (title, episode))

            for item in items:
                linkTitle = client.parseDOM(item, 'title')[0]
                linkTitle = cleantitle.get(linkTitle).replace(
                    'watchonlineepisodehd', '')
                if cleanedTitle == linkTitle:
                    url = client.parseDOM(item, "link")[0]
                    break

            return self.sources(client.replaceHTMLCodes(url))
        except:
            return self.srcs
コード例 #7
0
ファイル: watchfree.py プロジェクト: nadzvee/ProjectSwan
    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            query = self.moviesearch_link % urllib.quote_plus(
                cleantitle.query(title))
            query = urlparse.urljoin(self.base_link, query)

            result = str(proxy.request(query, 'item'))
            if 'page=2' in result or 'page%3D2' in result:
                result += str(proxy.request(query + '&page=2', 'item'))

            result = client.parseDOM(result, 'div', attrs={'class': 'item'})

            title = 'watchputlocker' + cleantitle.get(title)
            years = [
                '(%s)' % str(year),
                '(%s)' % str(int(year) + 1),
                '(%s)' % str(int(year) - 1)
            ]

            result = [(client.parseDOM(i, 'a', ret='href'),
                       client.parseDOM(i, 'a', ret='title')) for i in result]
            result = [(i[0][0], i[1][0]) for i in result
                      if len(i[0]) > 0 and len(i[1]) > 0]
            result = [i for i in result if any(x in i[1] for x in years)]

            r = [(proxy.parse(i[0]), i[1]) for i in result]

            match = [
                i[0] for i in r
                if title == cleantitle.get(i[1]) and '(%s)' % str(year) in i[1]
            ]

            match2 = [i[0] for i in r]
            match2 = [x for y, x in enumerate(match2) if x not in match2[:y]]
            if match2 == []: return

            for i in match2[:5]:
                try:
                    if len(match) > 0:
                        url = match[0]
                        break
                    r = proxy.request(urlparse.urljoin(self.base_link, i),
                                      'link_ite')
                    r = re.findall('(tt\d+)', r)
                    if imdb in r:
                        url = i
                        break
                except:
                    pass

            url = re.findall('(?://.+?|)(/.+)', url)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')
            return self.sources(client.replaceHTMLCodes(url))
        except Exception as e:
            logger.error('[%s] Exception : %s' % (self.__class__, e))
            pass
        return []
コード例 #8
0
    def sources(self, url):
        try:
            logger.debug('SOURCES URL %s' % url, __name__)

            if url == None: return []

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']

            cleanedTitle = cleantitle.get(title)

            hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year']

            query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year'])
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            url = self.search_link % urllib.quote_plus(query)
            url = urlparse.urljoin(self.base_link, url)

            r = client.request(url)

            posts = client.parseDOM(r, 'item')

            items = []

            for post in posts:
                try:
                    t = client.parseDOM(post, 'title')[0]
                    post = post.replace('\n','').replace('\t','')
                    post = re.compile('<span style="color: #ff0000">Single Link</b></span><br />(.+?)<span style="color: #ff0000">').findall(post)[0]
                    u = re.findall('<a href="(http(?:s|)://.+?)">', post)
                    items += [(t, i) for i in u]
                except:
                    pass

            for item in items:
                try:
                    name = client.replaceHTMLCodes(item[0])

                    linkTitle = re.sub('(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name)

                    if not cleanedTitle == cleantitle.get(linkTitle): raise Exception()

                    year = re.findall('[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper()

                    if not year == hdlr: raise Exception()

                    self.source(item)
                except:
                    pass

            logger.debug('SOURCES [%s]' % self.srcs, __name__)
            return self.srcs
        except:
            return self.srcs
コード例 #9
0
ファイル: desirulez.py プロジェクト: nadzvee/ProjectSwan
 def scrape_movie(self, title, year, imdb, debrid=False):
     try:
         movies = cache.get(self.desiRulezCache, 168)
         url = [
             i['url'] for i in movies if cleantitle.get(i['title'].decode(
                 'UTF-8')) == cleantitle.get(title)
         ][0]
         return self.sources(client.replaceHTMLCodes(url))
     except Exception as e:
         logger.error(e)
         pass
     return []
コード例 #10
0
    def scrape_movie(self, title, year, imdb, debrid = False):
        try:
            query = cleantitle.get(title)
            query = '/watch?v=%s_%s' % (query.replace(' ','_'),year)
            query = urlparse.urljoin(self.base_link, query)
            headers = {'User-Agent':self.userAgent}

            result = client.request(query, headers=headers)

            varid = re.compile('var frame_url = "(.+?)"',re.DOTALL).findall(result)[0].replace('/embed/','/streamdrive/info/')
            res_chk = re.compile('class="title"><h1>(.+?)</h1>',re.DOTALL).findall(result)[0]
            varid = 'http:'+varid
            holder = client.request(varid,headers=headers).content
            links = re.compile('"src":"(.+?)"',re.DOTALL).findall(holder)
            count = 0
            for link in links:
                link = link.replace('\\/redirect?url=','')
                link = urllib.unquote(link).decode('utf8')
                if '1080' in res_chk:
                    res= '1080p'
                elif '720' in res_chk:
                    res='720p'
                else:
                    res='DVD'
                count +=1
                self.srcs.append({'source': 'Googlelink','parts' : '1', 'quality': res,'scraper': self.name,'url':link,'direct': False})

            return self.srcs
        except Exception as e:
            logger.error('[%s] Exception : %s' % (self.__class__, e))
            pass
        return []
コード例 #11
0
ファイル: hl.py プロジェクト: nadzvee/ProjectSwan
    def get_muscic_url(scraper,
                       title,
                       artist,
                       cache_location,
                       maximum_age,
                       debrid=False):
        cache_enabled = control.setting("cache_enabled") == 'true'
        try:
            dbcon = database.connect(cache_location)
            dbcur = dbcon.cursor()

            try:
                dbcur.execute("SELECT * FROM version")
                match = dbcur.fetchone()
            except:
                desiscrapers.clear_cache()
                dbcur.execute("CREATE TABLE version (" "version TEXT)")
                dbcur.execute("INSERT INTO version Values ('0.5.4')")
                dbcon.commit()

            dbcur.execute("CREATE TABLE IF NOT EXISTS rel_music_src ("
                          "scraper TEXT, "
                          "title Text, "
                          "artist TEXT, "
                          "urls TEXT, "
                          "added TEXT, "
                          "UNIQUE(scraper, title, artist)"
                          ");")
        except:
            pass

        if cache_enabled:
            try:
                sources = []
                dbcur.execute(
                    "SELECT * FROM rel_music_src WHERE scraper = '%s' AND title = '%s' AND artist = '%s'"
                    % (scraper.name, cleantitle.get(title).upper(),
                       artist.upper()))
                match = dbcur.fetchone()
                t1 = int(re.sub('[^0-9]', '', str(match[4])))
                t2 = int(datetime.datetime.now().strftime("%Y%m%d%H%M"))
                update = abs(t2 - t1) > maximum_age
                if update == False:
                    sources = json.loads(match[3])
                    return sources
            except:
                pass

        try:
            sources = scraper.scrape_music(title, artist, debrid=debrid)
            if sources == None:
                sources = []
            else:
                if cache_enabled:
                    dbcur.execute(
                        "DELETE FROM rel_music_src WHERE scraper = '%s' AND title = '%s' AND artist = '%s'"
                        % (scraper.name, cleantitle.get(title).upper(),
                           artist.upper))
                    dbcur.execute(
                        "INSERT INTO rel_music_src Values (?, ?, ?, ?, ?)",
                        (scraper.name, cleantitle.get(title).upper(),
                         artist.upper(), json.dumps(sources),
                         datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
                    dbcon.commit()

            return sources
        except:
            pass
コード例 #12
0
ファイル: hl.py プロジェクト: nadzvee/ProjectSwan
    def get_url(scraper,
                title,
                show_year,
                year,
                season,
                episode,
                imdb,
                tvdb,
                type,
                cache_location,
                maximum_age,
                check_url=False,
                debrid=False):
        cache_enabled = control.setting("cache_enabled") == 'true'
        try:
            dbcon = database.connect(cache_location)
            dbcur = dbcon.cursor()
            try:
                dbcur.execute("SELECT * FROM version")
                match = dbcur.fetchone()
            except:
                desiscrapers.clear_cache()
                dbcur.execute("CREATE TABLE version (" "version TEXT)")
                dbcur.execute("INSERT INTO version Values ('0.5.4')")
                dbcon.commit()

            dbcur.execute("CREATE TABLE IF NOT EXISTS rel_src ("
                          "scraper TEXT, "
                          "title Text, show_year TEXT, year TEXT, "
                          "season TEXT, "
                          "episode TEXT, "
                          "imdb_id TEXT, "
                          "urls TEXT, "
                          "added TEXT, "
                          "UNIQUE(scraper, title, year, season, episode)"
                          ");")
        except:
            pass

        if cache_enabled:
            try:
                sources = []
                dbcur.execute(
                    "SELECT * FROM rel_src WHERE scraper = '%s' AND title = '%s' AND show_year= '%s' AND year = '%s' AND season = '%s' AND episode = '%s'"
                    % (scraper.name, cleantitle.get(title).upper(), show_year,
                       year, season, episode))
                match = dbcur.fetchone()
                t1 = int(re.sub('[^0-9]', '', str(match[8])))
                t2 = int(datetime.datetime.now().strftime("%Y%m%d%H%M"))
                update = abs(t2 - t1) > maximum_age
                if update == False:
                    sources = json.loads(match[7])
                    return sources
            except:
                pass

        try:
            sources = []
            if type == "movie":
                sources = scraper.scrape_movie(title,
                                               year,
                                               imdb,
                                               debrid=debrid)
            elif type == "episode":
                sources = scraper.scrape_episode(title,
                                                 show_year,
                                                 year,
                                                 season,
                                                 episode,
                                                 imdb,
                                                 tvdb,
                                                 debrid=debrid)
            if sources == None:
                sources = []
            else:
                if cache_enabled:
                    dbcur.execute(
                        "DELETE FROM rel_src WHERE scraper = '%s' AND title = '%s' AND show_year= '%s' AND year = '%s' AND season = '%s' AND episode = '%s'"
                        % (scraper.name, cleantitle.get(title).upper(),
                           show_year, year, season, episode))
                    dbcur.execute(
                        "INSERT INTO rel_src Values (?, ?, ?, ?, ?, ?, ?, ?, ?)",
                        (scraper.name, cleantitle.get(title).upper(),
                         show_year, year, season, episode, imdb,
                         json.dumps(sources),
                         datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
                    dbcon.commit()

            if check_url:
                noresolver = False
                try:
                    import urlresolver
                except:
                    try:
                        import urlresolver9 as urlresolver
                    except:
                        noresolver = True
                new_sources = []
                from common import check_playable
                for source in sources:
                    if source["direct"]:
                        check = check_playable(source["url"])
                        if check:
                            new_sources.append(source)
                    elif not noresolver:
                        try:
                            hmf = urlresolver.HostedMediaFile(
                                url=source['url'],
                                include_disabled=False,
                                include_universal=False)
                            if hmf.valid_url():
                                resolved_url = hmf.resolve()
                                check = check_playable(resolved_url)
                                if check:
                                    new_sources.append(source)
                        except:
                            pass
                    else:
                        new_sources.append(source)
                sources = new_sources
            return sources
        except:
            pass
コード例 #13
0
ファイル: world4u.py プロジェクト: nadzvee/ProjectSwan
    def scrape_movie(self, title, year, imdb, debrid=False):
        try:
            query = '%s %s' % (title, year)
            query = self.search_link % (urllib.quote_plus(query))
            query = urlparse.urljoin(self.base_link, query)

            result = client.request(query)

            result = result.decode('iso-8859-1').encode('utf-8')
            posts = client.parseDOM(result, "item")

            items = []

            for post in posts:
                try:
                    t = client.parseDOM(post, 'title')[0]
                    if 'trailer' in cleantitle.get(t):
                        raise Exception()

                    try:
                        s = re.findall(
                            '((?:\d+\.\d+|\d+\,\d+|\d+)(?:GB|GiB|MB|MiB|mb|gb))',
                            t)[0]
                    except:
                        s = '0'

                    i = client.parseDOM(post, 'link')[0]

                    items += [{'name': t, 'url': i, 'size': s}]
                except:
                    pass

            title = cleantitle.get(title)
            for item in items:
                try:
                    name = item.get('name')
                    t = re.sub(
                        '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)',
                        '', name)
                    if cleantitle.get(title) == cleantitle.get(t):
                        y = re.findall(
                            '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]',
                            name)[-1].upper()

                        if not y == year: raise Exception()

                        fmt = re.sub(
                            '(.+)(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*)(\.|\)|\]|\s)',
                            '', name.upper())
                        fmt = re.split('\.|\(|\)|\[|\]|\s|\-', fmt)
                        fmt = [i.lower() for i in fmt]

                        if any(
                                i.endswith(('subs', 'sub', 'dubbed', 'dub'))
                                for i in fmt):
                            raise Exception()
                        if any(i in ['extras'] for i in fmt): raise Exception()

                        if '1080p' in fmt: quality = '1080p'
                        elif '720p' in fmt: quality = 'HD'
                        else: quality = 'SD'
                        if any(i in ['dvdscr', 'r5', 'r6'] for i in fmt):
                            quality = 'SCR'
                        elif any(i in [
                                'camrip', 'tsrip', 'hdcam', 'hdts', 'dvdcam',
                                'dvdts', 'cam', 'telesync', 'ts'
                        ] for i in fmt):
                            quality = 'CAM'

                        info = []

                        if '3d' in fmt: info.append('3D')

                        try:
                            size = re.findall(
                                '((?:\d+\.\d+|\d+\,\d+|\d+)(?:GB|GiB|MB|MiB|mb|gb))',
                                item.get('size'))[-1]
                            div = 1 if size.endswith(('GB', 'GiB')) else 1024
                            size = float(re.sub('[^0-9|/.|/,]', '',
                                                size)) / div
                            size = '%.2f GB' % size
                            info.append(size)
                        except:
                            pass

                        if any(i in ['hevc', 'h265', 'x265'] for i in fmt):
                            info.append('HEVC')

                        info = ' | '.join(info)

                        movieurl = item.get('url')

                        result = client.request(movieurl)
                        result = result.decode('iso-8859-1').encode('utf-8')
                        result = result.replace('\n', '').replace('\t', '')

                        result = client.parseDOM(result,
                                                 'div',
                                                 attrs={'class': 'entry'})[0]
                        links = client.parseDOM(result,
                                                'a',
                                                attrs={'target': '_blank'},
                                                ret='href')

                        for link in links:
                            if 'http' in link:
                                host = client.host(link)

                                self.srcs.append({
                                    'source': host,
                                    'parts': '1',
                                    'quality': quality,
                                    'scraper': self.name,
                                    'url': link,
                                    'direct': False,
                                    'info': info
                                })
                except:
                    pass
            logger.debug('SOURCES [%s]' % self.srcs, __name__)
            return self.srcs
        except Exception as e:
            logger.error('[%s] Exception : %s' % (self.__class__, e))
            pass
        return []
コード例 #14
0
ファイル: crazy4ad.py プロジェクト: nadzvee/ProjectSwan
    def sources(self, url):
        try:
            logger.debug('SOURCES URL %s' % url, __name__)

            if url == None: return self.srcs

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '')
                         for i in data])

            title = data['tvshowtitle'] if 'tvshowtitle' in data else data[
                'title']

            cleanedTitle = cleantitle.get(title)

            hdlr = 'S%02dE%02d' % (int(data['season']), int(
                data['episode'])) if 'tvshowtitle' in data else data['year']

            query = '%s S%02dE%02d' % (
                data['tvshowtitle'], int(data['season']),
                int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % (
                    data['title'], data['year'])
            query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query)

            url = self.search_link % urllib.quote_plus(query)
            url = urlparse.urljoin(self.base_link, url)

            r = client.request(url)

            posts = client.parseDOM(r, 'item')

            link = client.parseDOM(posts[0], 'link')[0]

            items = []

            result = client.request(link)

            posts = client.parseDOM(result, 'div', attrs={'id': 'content'})

            for post in posts:
                try:
                    items += zip(
                        client.parseDOM(post, 'a', attrs={'target': '_blank'}),
                        client.parseDOM(post,
                                        'a',
                                        ret='href',
                                        attrs={'target': '_blank'}))
                except:
                    pass

            for item in items:
                try:
                    name = client.replaceHTMLCodes(item[0])

                    t = re.sub(
                        '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)',
                        '', name)

                    if not cleanedTitle == cleantitle.get(t): raise Exception()

                    y = re.findall(
                        '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]',
                        name)[-1].upper()

                    if not y == hdlr: raise Exception()
                    self.source(item)
                except:
                    pass
            logger.debug('SOURCES [%s]' % self.srcs, __name__)
            return self.srcs
        except:
            return self.srcs
コード例 #15
0
ファイル: default.py プロジェクト: nadzvee/ProjectSwan
def test():
    global movies, shows
    try:
        test_movies = []
        test_episodes = []
        profile_path = xbmc.translatePath(control.addonInfo('profile')).decode('utf-8')
        test_file = xbmcvfs.File(os.path.join(profile_path, "testings.xml"))
        xml = BeautifulStoneSoup(test_file.read())
        test_file.close()
        items = xml.findAll("item")
        for item in items:
            try:
                content = item.find("content")
                if content:
                    if "movie" in content.text:
                        meta = item.find("meta")
                        test_movies.append({
                            'title': meta.find("title").text,
                            'imdb': meta.find("imdb").text,
                            'year': meta.find("year").text,
                        })
                    elif "episode" in content.text:
                        meta = item.find("meta")
                        test_episodes.append({
                            'title': meta.find("tvshowtitle").text,
                            'show_year': int(meta.find("premiered").text[0:4]),
                            'year': meta.find("year").text,
                            'season': meta.find("season").text,
                            'episode': meta.find("season").text,
                            'imdb': meta.find("imdb").text,
                        })
            except:
                pass

            movies = test_movies
            shows = test_episodes
    except:
        pass

    dialog = xbmcgui.Dialog()
    pDialog = xbmcgui.DialogProgress()
    if dialog.yesno("Desiscrapers Testing Mode", 'Clear cache?'):
        desiscrapers.clear_cache()
    try:
        dbcon = database.connect(control.cacheFile)
        dbcur = dbcon.cursor()
    except:
        dialog.ok("Desiscrapers Testing Mode", 'Error connecting to db')
        sys.exit()

    num_movies = len(movies)
    if num_movies > 0:
        pDialog.create('Desiscrapers Testing mode active', 'please wait')
        index = 0
        for movie in movies:
            index += 1
            title = movie['title']
            year = movie['year']
            imdb = movie['imdb']
            if pDialog.iscanceled():
                pDialog.close()
                break
            pDialog.update((index / num_movies) * 100, "Scraping movie {} of {}".format(index, num_movies), title)
            links_scraper = desiscrapers.scrape_movie(title, year, imdb)
            links_scraper = links_scraper()
            for scraper_links in links_scraper:
                if pDialog.iscanceled():
                    break
                if scraper_links:
                    logger.debug(scraper_links, __name__)
                    random.shuffle(scraper_links)

        pDialog.close()
        dbcur.execute("SELECT COUNT(DISTINCT(scraper)) FROM rel_src where episode = ''")
        match = dbcur.fetchone()
        num_movie_scrapers = match[0]

        logger.debug('num_movie_scrapers %s' % num_movie_scrapers, __name__)

        dbcur.execute("SELECT scraper, count(distinct(urls)) FROM rel_src where episode = '' group by scraper")
        matches = dbcur.fetchall()
        failed = []
        for match in matches:
            if int(match[1]) <= 1:
                failed.append(match[0])

        if len(failed) > 0:
            failedstring = "Failed: {}".format(len(failed))
            for fail in failed:
                failedstring += "\n        - {}".format(str(fail))
        else:
            failedstring = "Failed: {}".format(len(failed))

        logger.debug('failedString %s' % failedstring, __name__)

        dbcur.execute("SELECT title, count(distinct(urls)) FROM rel_src where episode = '' group by title")
        matches = dbcur.fetchall()
        failed_movies = []
        for match in matches:
            if int(match[1]) <= 1:
                if int(match[1]) == 1:
                    dbcur.execute(
                        "SELECT scraper, urls FROM rel_src where episode == '' and title == '{}' group by scraper".format(
                            match[0]))
                    new_matches = dbcur.fetchall()
                    found = False
                    for new_match in new_matches:
                        if new_match[1] == "[]":
                            continue
                        else:
                            found = True
                    if not found:
                        failed_movies.append(match[0])
                else:
                    failed_movies.append(match[0])

        if len(failed_movies) > 0:
            failed_movie_string = "Failed movies: {}".format(len(failed_movies))
            for fail in failed_movies:
                for movie in movies:
                    if cleantitle.get(movie['title']).upper() == str(fail):
                        failed_movie_string += "\n        - {}".format(movie["title"])

        else:
            failed_movie_string = ""

    num_shows = len(shows)
    if num_shows > 0:
        pDialog.create('Desiscrapers Testing mode active', 'please wait')
        index = 0
        for show in shows:
            index += 1
            title = show['title']
            show_year = show['show_year']
            year = show['year']
            season = show['season']
            episode = show['episode']
            imdb = show['imdb']
            tvdb = show.get('tvdb', '')

            if pDialog.iscanceled():
                pDialog.close()
                break
            pDialog.update((index / num_shows) * 100, "Scraping show {} of {}".format(index, num_shows), title)
            links_scraper = desiscrapers.scrape_episode(title, show_year, year, season, episode, imdb, tvdb)
            links_scraper = links_scraper()
            for scraper_links in links_scraper:
                if pDialog.iscanceled():
                    break
                if scraper_links:
                    random.shuffle(scraper_links)

        pDialog.close()
        dbcur.execute("SELECT COUNT(DISTINCT(scraper)) FROM rel_src where episode != ''")
        match = dbcur.fetchone()
        num_show_scrapers = match[0]

        dbcur.execute("SELECT scraper, count(distinct(urls)) FROM rel_src where episode != '' group by scraper")
        matches = dbcur.fetchall()
        failed = []
        for match in matches:
            if int(match[1]) <= 1:
                if int(match[1]) == 1:
                    dbcur.execute(
                        "SELECT scraper, urls FROM rel_src where episode != '' and scraper == '{}' group by scraper".format(
                            match[0]))
                    match = dbcur.fetchone()
                    if match[1] == "[]":
                        failed.append(match[0])
                else:
                    failed.append(match[0])

        if len(failed) > 0:
            show_scraper_failedstring = "Failed: {}".format(len(failed))
            for fail in failed:
                show_scraper_failedstring += "\n        - {}".format(str(fail))
        else:
            show_scraper_failedstring = ""

        dbcur.execute("SELECT title, count(distinct(urls)) FROM rel_src where episode != '' group by title")
        matches = dbcur.fetchall()
        failed_shows = []
        for match in matches:
            if int(match[1]) <= 1:
                if int(match[1]) == 1:
                    dbcur.execute(
                        "SELECT scraper, urls FROM rel_src where episode != '' and title == '{}' group by scraper".format(
                            match[0]))
                    new_matches = dbcur.fetchall()
                    found = False
                    for new_match in new_matches:
                        if new_match[1] == "[]":
                            continue
                        else:
                            found = True
                    if not found:
                        failed_shows.append(match[0])
                else:
                    failed_shows.append(match[0])

        if len(failed_shows) > 0:
            failed_show_string = "Failed shows: {}".format(len(failed_shows))
            for fail in failed_shows:
                for show in shows:
                    if cleantitle.get(show['title']).upper() == str(fail):
                        failed_show_string += "\n        - {} S{}-E{}".format(show["title"], show["season"],
                                                                              show["episode"])

        else:
            failed_show_string = ""

    resultstring = 'Results:\n'
    if num_movies > 0:
        resultstring = resultstring + \
                       '    Movie Scrapers: {}\n' \
                       '    {}\n' \
                       '    {}\n'.format(num_movie_scrapers, failedstring, failed_movie_string)
    if num_shows > 0:
        resultstring = resultstring + \
                       '    Episode Scrapers: {}\n' \
                       '    {}\n' \
                       '    {}\n'.format(num_show_scrapers, show_scraper_failedstring, failed_show_string)

    dialog.textviewer("Desiscrapers Testing Mode", resultstring)
コード例 #16
0
    def super_info(self, i):
        try:
            if self.list[i]['metacache'] == True: raise Exception()
            try:
                imdb = self.list[i]['imdb']
            except:
                imdb = '0'
            try:
                tvdb = self.list[i]['tvdb']
            except:
                tvdb = '0'

            self.list[i].update({"imdb": imdb, "tvdb": tvdb})

            title = self.list[i]['title']
            cleanedTitle = cleantitle.get(title)

            if 'season' in title.lower():
                title = title[:title.index('Season') - 1]
            else:
                # strip end #'s
                title = title.replace(' 10', '')

            logger.debug('Super_Info Title : %s' % title, __name__)
            url = self.burp_search_link % urllib.quote_plus(
                title.encode('utf-8'), safe=':/')

            result = client.request(url)
            result = result.decode('iso-8859-1').encode('utf-8')

            result = result.replace('\n', '').replace('\t', '')

            result = client.parseDOM(result,
                                     name="table",
                                     attrs={"class": "result"})[0]

            result = client.parseDOM(result,
                                     name="td",
                                     attrs={"class": "resultTitle"})

            showUrl = None

            for item in result:
                linkTitle = client.parseDOM(item,
                                            name="a",
                                            attrs={"class": "title"})[0]
                linkTitle = client.parseDOM(linkTitle, name="strong")[0]

                if cleanedTitle == cleantitle.get(linkTitle):
                    showUrl = client.parseDOM(item,
                                              name="a",
                                              attrs={"class": "title"},
                                              ret="href")[0]
                    break

            if showUrl == None:
                raise Exception()

            result = client.request(showUrl)

            if 'No information available!' in result:
                raise Exception()

            result = result.decode('iso-8859-1').encode('utf-8')

            right = client.parseDOM(result, "div", attrs={"class": "Right"})[0]
            showDetails = client.parseDOM(result,
                                          "td",
                                          attrs={"class": "showDetails"})[0]

            try:
                genre = client.parseDOM(showDetails, "tr")
                for item in genre:
                    if "genre" in item.lower():
                        genre = client.parseDOM(item, "td")[0]
                        genre = genre.replace(',', ' / ').strip()
                    elif "show type" in item.lower():
                        genre = client.parseDOM(item, "td")[0]
                        genre = genre.replace(',', ' / ').strip()
            except Exception as e:
                logger.error(e)
                genre = ''

            if genre == '': genre = '0'
            genre = client.replaceHTMLCodes(genre)
            genre = genre.encode('utf-8')
            if not genre == '0': self.list[i].update({'genre': genre})

            try:
                poster = client.parseDOM(result,
                                         "td",
                                         attrs={"class": "showPics"})[0]
                poster = client.parseDOM(poster, "img", ret="src")[0]
            except:
                poster = ''

            if poster == '' or poster == None: poster = '0'
            poster = client.replaceHTMLCodes(poster)
            poster = poster.encode('utf-8')
            if not poster == '0': self.list[i].update({'poster': poster})

            try:
                plot = client.parseDOM(right,
                                       "div",
                                       attrs={"class": "synopsis"})[0].strip()
                try:
                    plot += client.parseDOM(right,
                                            "span",
                                            attrs={"id":
                                                   "morecontent"})[0].strip()
                except:
                    pass
            except:
                plot = ''
            if plot == '': plot = '0'
            plot = client.replaceHTMLCodes(plot)
            plot = plot.encode('utf-8')
            if not plot == '0': self.list[i].update({'plot': plot})

            try:
                metaHTML = client.parseDOM(right,
                                           "table",
                                           attrs={"class": "meta"})[0]
            except:
                metaHTML = None

            if metaHTML:
                items = client.parseDOM(metaHTML, "tr")
                premiered = cast = None
                for item in items:
                    if "release date" in item.lower():
                        premiered = client.parseDOM(item,
                                                    "span",
                                                    attrs={"itemprop":
                                                           "name"})[0]
                        premiered = premiered.encode('utf-8')
                    elif "Actor" in item:
                        cast = client.parseDOM(item,
                                               "span",
                                               attrs={"itemprop": "name"})[0]
                        cast = cast.split(',')

                if premiered != None:
                    try:
                        year = re.compile('(\d{4})').findall(premiered)[0]
                    except:
                        year = ''
                    if year == '': year = '0'
                    year = year.encode('utf-8')
                    self.list[i].update({'year': year})
                    self.list[i].update({'premiered': premiered})
                if cast != None and len(cast) > 0:
                    self.list[i].update({'cast': cast})

            imdb = cleantitle.get(title)
            tvdb = banner = fanart = studio = duration = rating = votes = mpaa = tmdb = '0'

            self.meta.append({
                'year': year,
                'tmdb': tmdb,
                'imdb': imdb,
                'tvdb': tvdb,
                'lang': self.info_lang,
                'item': {
                    'code': imdb,
                    'imdb': imdb,
                    'tvdb': tvdb,
                    'poster': poster,
                    'banner': banner,
                    'fanart': fanart,
                    'premiered': premiered,
                    'studio': studio,
                    'genre': genre,
                    'duration': duration,
                    'rating': rating,
                    'votes': votes,
                    'mpaa': mpaa,
                    'cast': cast,
                    'plot': plot
                }
            })
        except:
            pass