Example #1
0
class LibraryManager():
    def __init__(self, dest_path, platform):
        self.dest_path = dest_path
        self.platform = platform

    def check_update(self):
        need_update = False
        if __settings__.getSetting('plugin_name') != __plugin__:
            __settings__.setSetting('plugin_name', __plugin__)
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                self.sizepath = os.path.join(self.dest_path,
                                             libname + '.size.txt')
                size = str(os.path.getsize(self.libpath))
                size_old = open(self.sizepath, "r").read()
                if size_old != size:
                    need_update = True
        return need_update

    def update(self):
        if self.check_update():
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                xbmcvfs.delete(self.libpath)
            self.download()

    def download(self):
        xbmcvfs.mkdirs(self.dest_path)
        for libname in get_libname(self.platform):
            dest = os.path.join(self.dest_path, libname)
            log("try to fetch %s" % libname)
            url = "%s/%s/%s.zip" % (__libbaseurl__, self.platform, libname)
            try:
                self.http = HTTP()
                self.http.fetch(url, download=dest + ".zip", progress=True)
                log("%s -> %s" % (url, dest))
                xbmc.executebuiltin(
                    'XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path),
                    True)
                xbmcvfs.delete(dest + ".zip")
            except:
                text = 'Failed download %s!' % libname
                xbmc.executebuiltin("XBMC.Notification(%s,%s,%s)" %
                                    (__plugin__, text, 750))
        return True
Example #2
0
class LibraryManager():
    def __init__(self, dest_path, platform):
        self.dest_path = dest_path
        self.platform = platform

    def check_update(self):
        need_update=False
        if __settings__.getSetting('plugin_name')!=__plugin__:
            __settings__.setSetting('plugin_name', __plugin__)
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                self.sizepath=os.path.join(self.dest_path, libname+'.size.txt')
                size=str(os.path.getsize(self.libpath))
                size_old=open( self.sizepath, "r" ).read()
                if size_old!=size:
                    need_update=True
        return need_update

    def update(self):
        if self.check_update():
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                xbmcvfs.delete(self.libpath)
            self.download()

    def download(self):
        xbmcvfs.mkdirs(self.dest_path)
        for libname in get_libname(self.platform):
            dest = os.path.join(self.dest_path, libname)
            log("try to fetch %s" % libname)
            url = "%s/%s/%s.zip" % (__libbaseurl__, self.platform, libname)
            try:
                self.http = HTTP()
                self.http.fetch(url, download=dest + ".zip", progress=True)
                log("%s -> %s" % (url, dest))
                xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True)
                xbmcvfs.delete(dest + ".zip")
            except:
                text = 'Failed download %s!' % libname
                xbmc.executebuiltin("XBMC.Notification(%s,%s,%s)" % (__plugin__,text,750))
        return True
class DownloaderClass():
    def __init__(self, dest_path):
        self.dest_path = dest_path
        self.platform = get_platform()
        tempdir(self.dest_path)

    def tools_download(self):
        for libname in get_libname(self.platform):
            dest = os.path.join(self.dest_path, libname)
            log("try to fetch %s" % libname)
            url = "%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'], libname)
            if libname!='liblibtorrent.so':
                try:
                    self.http = HTTP()
                    self.http.fetch(url, download=dest + ".zip", progress=True)
                    log("%s -> %s" % (url, dest))
                    xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True)
                    xbmcvfs.delete(dest + ".zip")
                except:
                    text = 'Failed download %s!' % libname
                    xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" % (__plugin__,text,750,__icon__))
            else:
                x=xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'), dest)
        return True
Example #4
0
class LibraryManager():
    def __init__(self, dest_path, platform):
        self.dest_path = dest_path
        self.platform = platform
        self.root = os.path.dirname(os.path.dirname(__file__))

    def check_exist(self):
        for libname in get_libname(self.platform):
            if not xbmcvfs.exists(os.path.join(self.dest_path, libname)):
                return False
        return True

    def check_update(self):
        need_update = False
        for libname in get_libname(self.platform):
            if libname != 'liblibtorrent.so':
                self.libpath = os.path.join(self.dest_path, libname)
                self.sizepath = os.path.join(self.root,
                                             self.platform['system'],
                                             self.platform['version'],
                                             libname + '.size.txt')
                size = str(os.path.getsize(self.libpath))
                size_old = open(self.sizepath, "r").read()
                if size_old != size:
                    need_update = True
        return need_update

    def update(self):
        if self.check_update():
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                xbmcvfs.delete(self.libpath)
            self.download()

    def download(self):
        xbmcvfs.mkdirs(self.dest_path)
        for libname in get_libname(self.platform):
            dest = os.path.join(self.dest_path, libname)
            log("try to fetch %s" % libname)
            url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'],
                                       self.platform['version'], libname)
            if libname != 'liblibtorrent.so':
                try:
                    self.http = HTTP()
                    self.http.fetch(url, download=dest + ".zip", progress=True)
                    log("%s -> %s" % (url, dest))
                    xbmc.executebuiltin(
                        'XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path),
                        True)
                    xbmcvfs.delete(dest + ".zip")
                except:
                    text = 'Failed download %s!' % libname
                    xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" %
                                        (__plugin__, text, 750, __icon__))
            else:
                xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'),
                             dest)
        return True

    def android_workaround(self, new_dest_path):
        for libname in get_libname(self.platform):
            libpath = os.path.join(self.dest_path, libname)
            size = str(os.path.getsize(libpath))
            new_libpath = os.path.join(new_dest_path, libname)

            if not xbmcvfs.exists(new_libpath):
                xbmcvfs.copy(libpath, new_libpath)
                log('Copied %s -> %s' % (libpath, new_libpath))
            else:
                new_size = str(os.path.getsize(new_libpath))
                if size != new_size:
                    xbmcvfs.delete(new_libpath)
                    xbmcvfs.copy(libpath, new_libpath)
                    log('Deleted and copied (%s) %s -> (%s) %s' %
                        (size, libpath, new_size, new_libpath))
        return new_dest_path
Example #5
0
class LibraryManager():
    def __init__(self, dest_path, platform):
        self.dest_path = dest_path
        self.platform = platform
        self.root = os.path.dirname(os.path.dirname(__file__))

    def check_exist(self):
        for libname in get_libname(self.platform):
            if not xbmcvfs.exists(os.path.join(self.dest_path, libname)):
                return False
        return True

    def check_update(self):
        need_update = False
        for libname in get_libname(self.platform):
            if libname != 'liblibtorrent.so':
                self.libpath = os.path.join(self.dest_path, libname)
                self.sizepath = os.path.join(self.root,
                                             self.platform['system'],
                                             self.platform['version'],
                                             libname + '.size.txt')
                size = str(os.path.getsize(self.libpath))
                size_old = open(self.sizepath, "r").read()
                if size_old != size:
                    need_update = True
        return need_update

    def update(self):
        if self.check_update():
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                xbmcvfs.delete(self.libpath)
            self.download()

    def download(self):
        __settings__ = xbmcaddon.Addon(id='plugin.video.alfa')  ### Alfa
        xbmcvfs.mkdirs(self.dest_path)
        for libname in get_libname(self.platform):
            dest = os.path.join(self.dest_path, libname)
            log("try to fetch %s" % libname)
            url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'],
                                       self.platform['version'], libname)
            if libname != 'liblibtorrent.so':
                try:
                    self.http = HTTP()
                    self.http.fetch(url,
                                    download=dest + ".zip",
                                    progress=False)  ### Alfa
                    log("%s -> %s" % (url, dest))
                    xbmc.executebuiltin(
                        'XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path),
                        True)
                    xbmcvfs.delete(dest + ".zip")
                except:
                    text = 'Failed download %s!' % libname
                    xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" %
                                        (__plugin__, text, 750, __icon__))
            else:
                xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'),
                             dest,
                             silent=True)  ### Alfa
            dest_alfa = os.path.join(xbmc.translatePath(__settings__.getAddonInfo('Path')), \
                            'lib', libname)                                     ### Alfa
            xbmcvfs.copy(dest, dest_alfa, silent=True)  ### Alfa
            dest_alfa = os.path.join(xbmc.translatePath(__settings__.getAddonInfo('Profile')), \
                            'custom_code', 'lib', libname)                      ### Alfa
            xbmcvfs.copy(dest, dest_alfa, silent=True)  ### Alfa
        return True

    def android_workaround(self, new_dest_path):  ### Alfa (entera)
        import subprocess

        for libname in get_libname(self.platform):
            libpath = os.path.join(self.dest_path, libname)
            size = str(os.path.getsize(libpath))
            new_libpath = os.path.join(new_dest_path, libname)

            if xbmcvfs.exists(new_libpath):
                new_size = str(os.path.getsize(new_libpath))
                if size != new_size:
                    xbmcvfs.delete(new_libpath)
                    if xbmcvfs.exists(new_libpath):
                        try:
                            command = ['su', '-c', 'rm', '%s' % new_libpath]
                            p = subprocess.Popen(command,
                                                 stdout=subprocess.PIPE,
                                                 stderr=subprocess.PIPE)
                            output_cmd, error_cmd = p.communicate()
                            log('Comando ROOT: %s' % str(command))
                        except:
                            log('Sin PERMISOS ROOT: %s' % str(command))

                    if not xbmcvfs.exists(new_libpath):
                        log('Deleted: (%s) %s -> (%s) %s' %
                            (size, libpath, new_size, new_libpath))

            if not xbmcvfs.exists(new_libpath):
                xbmcvfs.copy(libpath, new_libpath, silent=True)  ### ALFA
                log('Copying... %s -> %s' % (libpath, new_libpath))

                if not xbmcvfs.exists(new_libpath):
                    try:
                        command = [
                            'su', '-c', 'cp',
                            '%s' % libpath,
                            '%s' % new_libpath
                        ]
                        p = subprocess.Popen(command,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE)
                        output_cmd, error_cmd = p.communicate()
                        log('Comando ROOT: %s' % str(command))

                        command = [
                            'su', '-c', 'chmod', '775',
                            '%s' % new_libpath
                        ]
                        p = subprocess.Popen(command,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE)
                        output_cmd, error_cmd = p.communicate()
                        log('Comando ROOT: %s' % str(command))
                    except:
                        log('Sin PERMISOS ROOT: %s' % str(command))

                    if not xbmcvfs.exists(new_libpath):
                        log('ROOT Copy Failed!')

                else:
                    command = ['chmod', '775', '%s' % new_libpath]
                    p = subprocess.Popen(command,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.PIPE)
                    output_cmd, error_cmd = p.communicate()
                    log('Comando: %s' % str(command))
            else:
                log('Module exists.  Not copied... %s' % new_libpath)  ### ALFA

        return new_dest_path
class LibraryManager():
    def __init__(self, dest_path, platform):
        self.dest_path = dest_path
        self.platform = platform
        self.root=os.path.dirname(__file__)

    def check_exist(self):
        for libname in get_libname(self.platform):
            if not xbmcvfs.exists(os.path.join(self.dest_path,libname)):
                return False
        return True

    def check_update(self):
        need_update=False
        for libname in get_libname(self.platform):
            if libname!='liblibtorrent.so':
                self.libpath = os.path.join(self.dest_path, libname)
                self.sizepath=os.path.join(self.root, self.platform['system'], self.platform['version'], libname+'.size.txt')
                size=str(os.path.getsize(self.libpath))
                size_old=open( self.sizepath, "r" ).read()
                if size_old!=size:
                    need_update=True
        return need_update

    def update(self):
        if self.check_update():
            for libname in get_libname(self.platform):
                self.libpath = os.path.join(self.dest_path, libname)
                xbmcvfs.delete(self.libpath)
            self.download()

    def download(self):
        xbmcvfs.mkdirs(self.dest_path)
        for libname in get_libname(self.platform):
            dest = os.path.join(self.dest_path, libname)
            log("try to fetch %s" % libname)
            url = "%s/%s/%s/%s.zip" % (__libbaseurl__, self.platform['system'], self.platform['version'], libname)
            if libname!='liblibtorrent.so':
                try:
                    self.http = HTTP()
                    self.http.fetch(url, download=dest + ".zip", progress=True)
                    log("%s -> %s" % (url, dest))
                    xbmc.executebuiltin('XBMC.Extract("%s.zip","%s")' % (dest, self.dest_path), True)
                    xbmcvfs.delete(dest + ".zip")
                except:
                    text = 'Failed download %s!' % libname
                    xbmc.executebuiltin("XBMC.Notification(%s,%s,%s,%s)" % (__plugin__,text,750,__icon__))
            else:
                xbmcvfs.copy(os.path.join(self.dest_path, 'libtorrent.so'), dest)
        return True

    def android_workaround(self, new_dest_path):
        for libname in get_libname(self.platform):
            libpath=os.path.join(self.dest_path, libname)
            size=str(os.path.getsize(libpath))
            new_libpath=os.path.join(new_dest_path, libname)

            if not xbmcvfs.exists(new_libpath):
                xbmcvfs.copy(libpath, new_libpath)
                log('Copied %s -> %s' %(libpath, new_libpath))
            else:
                new_size=str(os.path.getsize(new_libpath))
                if size!=new_size:
                    xbmcvfs.delete(new_libpath)
                    xbmcvfs.copy(libpath, new_libpath)
                    log('Deleted and copied (%s) %s -> (%s) %s' %(size, libpath, new_size, new_libpath))
        return new_dest_path
class TvDb:
    """
    
    API:
        scraper  - скрапер
        search   - поиск сериалов
        movie    - профайл фильма
        
    """
    
    def __init__(self):
        self.api_key = '33DBB309BB2B0ADB'
        
        self.cache = Cache('tvdb.db', 1.0)
        
        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.thetvdb.com/'
        }
        
        
    # API
    
    def scraper(self, search, year=None, season=None):
        try:
            if not isinstance(search, list):
                search = [search]
            tag = 'scraper:' + urllib.quote_plus(":".join(search).encode('utf8'))
        except:
            return None
        else:
            
            if year:
                tag += ':' + str(year)

            
            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None

            if season: return self.get_banners(id)
            
            return self.movie(id)

    def get_banners(self, id):
        import xml.etree.ElementTree as ET
        dirname = tempfile.mkdtemp()
        response = self.http.fetch('http://www.thetvdb.com/api/' + self.api_key + '/series/' + str(id) + '/all/ru.zip', headers=self.headers, download=os.path.join(dirname, 'movie.zip'))
        if response.error:
            self._movie_clear(dirname)
            return False, None

        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r')
            filezip.extractall(dirname)
            filezip.close()
            movie = file(os.path.join(dirname, 'banners.xml'), 'rb').read().decode('utf8')
        except:
            self._movie_clear(dirname)
            return False, None

        self._movie_clear(dirname)

        dom = ET.fromstring(movie)
        if not len(dom):
            return

        def dom2dict(node):
            ret = {}
            for child in node:
                if len(child):
                    ret.setdefault(child.tag.lower(), []).append(dom2dict(child))
                else:
                    ret[child.tag.lower()] = child.text
            return ret

        def update_image_urls(meta):
            if isinstance(meta, dict):
                for k, v in meta.items():
                    if isinstance(v, list):
                        map(update_image_urls, v)
                    elif isinstance(v, dict):
                        update_image_urls(v)
                    elif k in ["banner", "fanart", "poster", "filename", "bannerpath", "vignettepath", "thumbnailpath"] and isinstance(v, basestring):
                        meta[k] = image_url(v)
            return meta

        def image_url(fragment):
            return "%s/banners/%s" % ("http://www.thetvdb.com", fragment)

        return update_image_urls(dom2dict(dom))["banner"]

    def search(self, name):
        return self._search(name)
    
    
    def movie(self, id):
        id = str(id)
        return self.cache.get('movie:' + id, self._movie, id)
    
    
    def _movie(self, id):
        dirname = tempfile.mkdtemp()
        response = self.http.fetch('http://www.thetvdb.com/api/' + self.api_key + '/series/' + id + '/all/ru.zip', headers=self.headers, download=os.path.join(dirname, 'movie.zip'))
        if response.error:
            self._movie_clear(dirname)
            return False, None
        
        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r')
            filezip.extractall(dirname)
            filezip.close()
            movie = file(os.path.join(dirname, 'ru.xml'), 'rb').read().decode('utf8')
        except:
            self._movie_clear(dirname)
            return False, None
        
        self._movie_clear(dirname)
        
        body = re.compile(r'<Series>(.+?)</Series>', re.U|re.S).search(movie)
        if not body:
            return False, None
        
        body = body.group(1)
        
        res = {
            'icon' : None,
            'thumbnail': None,
            'properties': {
                'fanart_image': None,
            },
            'info': {
                'count' : int(id)
            }
        }
        
        # режисеры и сценаристы
        for tag in ('Director', 'Writer'):
            people = {}
            people_list = []
            [people_list.extend(x.split('|')) for x in re.compile(r'<' + tag + r'>([^<]+)</' + tag + r'>', re.U|re.S).findall(movie)]
            [people.update({x: 1}) for x in [x.strip() for x in people_list] if x]
            if people:
                res['info'][tag.lower()] = u', '.join([x for x in people.keys() if x])
        
        for tag, retag, typeof, targettype in (
                    ('plot', 'Overview', None, None),
                    ('mpaa', 'ContentRating', None, None),
                    ('premiered', 'FirstAired', None, None),
                    ('studio', 'Network', None, None),
                    ('title', 'SeriesName', None, None),
                    ('runtime', 'Runtime', None, None),
                    ('votes', 'RatingCount', None, None),
                    ('rating', 'Rating', float, None),
                    ('genre', 'Genre', list, unicode),
                    ('cast', 'Actors', list, None)
                    ):
            r = re.compile(r'<' + retag + r'>([^<]+)</' + retag + r'>', re.U|re.S).search(body)
            if r:
                r = r.group(1).strip()
                if typeof == float:
                    res['info'][tag] = float(r)
                elif typeof == list:
                    if targettype == unicode:
                        res['info'][tag] = u', '.join([x for x in [x.strip() for x in r.split(u'|')] if x])
                    else:
                        res['info'][tag] = [x for x in [x.strip() for x in r.split(u'|')] if x]
                else:
                    res['info'][tag] = r
        
        # год
        if 'premiered' in res['info']:
            res['info']['year'] = int(res['info']['premiered'].split('-')[0])
        
        # постер
        r = re.compile(r'<poster>([^<]+)</poster>', re.U|re.S).search(body)
        if r:
            res['icon'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
            res['thumbnail'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
        
        # фанарт
        r = re.compile(r'<fanart>([^<]+)</fanart>', re.U|re.S).search(body)
        if r:
            res['properties']['fanart_image'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
        
        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or int(res['info']['year']) >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        return timeout, res
            
    
    def _movie_clear(self, dirname):
        for filename in os.listdir(dirname):
            try:
                os.unlink(os.path.join(dirname, filename))
            except:
                raise
        try:
            os.rmdir(dirname)
        except:
            raise
        
    
    def _search(self, search):
        i=-1
        for name in search:
            i+=1
            response = self.http.fetch('http://www.thetvdb.com/api/GetSeries.php?language=ru&seriesname=' + urllib.quote_plus(name.encode('utf-8','ignore')), headers=self.headers)
            if response.error:
                return None
        
            res = []
            rows = re.compile('<Series>(.+?)</Series>', re.U|re.S).findall(response.body.decode('utf8'))
            if rows:
                recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U|re.S)
            
                for row in [x for x in rows if x.find(u'<language>ru</language>') != -1]:
                    r = recmd.search(row)
                    if r:
                        res.append(int(r.group(1)))
                # в некоторых случаях можно найти только по оригинальному названию, 
                # но при этом русское описание есть
                if not res:
                    for row in [x for x in rows if x.find(u'<language>en</language>') != -1]:
                        r = recmd.search(row)
                        if r:
                            res.append(int(r.group(1)))

            if res:
                break
                
        return {'pages': (1, 0, 1, 0), 'data': res}
    
    
    def _scraper(self, name, year):
        timeout = True
        
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        ids = self._search(name)
        
        if ids is None:
            return False, None
        
        elif not ids['data']:
            # сохраняем пустой результат на 3-е суток
            return 259200, None
        
        else:
            return timeout, ids['data'][0]
Example #8
0
class Cache:
    def __init__(self, name, version, expire=0, size=0, step=100):
        self.name = name
        self.version = version
        self._connect()
        if expire:
            self.expire(expire)
        if size:
            self.size(size, step)

    def get(self, token, callback, *param):
        cur = self.db.cursor()
        cur.execute('select expire,data from cache where id=? limit 1', (token,))
        row = cur.fetchone()
        cur.close()

        if row:
            if row[0] and row[0] < int(time.time()):
                pass
            else:
                try:
                    obj = pickle.loads(row[1])
                except:
                    pass
                else:
                    return obj

        response = callback(*param)

        if response[0]:
            obj = sqlite.Binary(pickle.dumps(response[1]))
            curtime = int(time.time())
            cur = self.db.cursor()
            if isinstance(response[0], bool):
                cur.execute('replace into cache(id,addtime,expire,data) values(?,?,?,?)', (token, curtime, None, obj))
            else:
                cur.execute('replace into cache(id,addtime,expire,data) values(?,?,?,?)',
                            (token, curtime, curtime + response[0], obj))
            self.db.commit()
            cur.close()

        return response[1]

    def expire(self, expire):
        # with rtrCache_lock:
        cur = self.db.cursor()
        cur.execute('delete from cache where addtime<?', (int(time.time()) - expire,))
        self.db.commit()
        cur.close()

    def size(self, size, step=100):
        # with rtrCache_lock:
        while True:
            if os.path.getsize(self.filename) < size:
                break
            cur = self.db.cursor()
            cur.execute('select id from cache order by addtime asc limit ?', (step,))
            rows = cur.fetchall()
            if not rows:
                cur.close()
                break
            cur.execute('delete from cache where id in (' + ','.join(len(rows) * '?') + ')', [x[0] for x in rows])
            self.db.commit()
            cur.close()

    def flush(self):
        # with rtrCache_lock:
        cur = self.db.cursor()
        cur.execute('delete from cache')
        self.db.commit()
        cur.close()

    def _connect(self):
        with rtrCache_lock:
            dirname = xbmc.translatePath('special://temp')
            for subdir in ('xbmcup', 'plugin.video.torrenter'):
                dirname = os.path.join(dirname, subdir)
                if not xbmcvfs.exists(dirname):
                    xbmcvfs.mkdir(dirname)

            self.filename = os.path.join(dirname, self.name)

            first = False
            if not xbmcvfs.exists(self.filename):
                first = True

            self.db = sqlite.connect(self.filename, check_same_thread=False)
            if not first:
                cur = self.db.cursor()
                try:
                    cur.execute('select version from db_ver')
                    row = cur.fetchone()
                    if not row or float(row[0]) != self.version:
                        cur.execute('drop table cache')
                        cur.execute('drop table if exists db_ver')
                        first = True
                except:
                    cur.execute('drop table cache')
                    first = True
                self.db.commit()
                cur.close()

            if first and not self.first_time():
                cur = self.db.cursor()
                cur.execute('pragma auto_vacuum=1')
                cur.execute('create table cache(id varchar(255) unique, addtime integer, expire integer, data blob)')
                cur.execute('create index time on cache(addtime asc)')
                cur.execute('create table db_ver(version real)')
                cur.execute('insert into db_ver(version) values(?)', (self.version,))
                self.db.commit()
                cur.close()

    def first_time(self):
        scrapers = {'tvdb': 'TheTVDB.com', 'tmdb': 'TheMovieDB.org', 'kinopoisk': 'KinoPoisk.ru'}
        ok = xbmcgui.Dialog().yesno(Localization.localize('Content Lists'),
                                    Localization.localize('Do you want to preload full metadata?') + ' (%s)' % (
                                        scrapers[os.path.basename(self.filename).split('.')[0]]),
                                    Localization.localize('It is highly recommended!'))
        if ok:
            return self.download()
        else:
            return False

    def download(self):
        dirname = os.path.dirname(self.filename)
        zipname = os.path.basename(self.filename).replace('.db', '') + '.zip'
        url = 'http://www.tat-store.ru/torrenter/' + zipname
        self.http = HTTP()
        response = self.http.fetch(url, download=os.path.join(dirname, zipname), progress=True)
        if response.error:
            return False

        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, zipname), 'r')
            filezip.extractall(dirname)
            filezip.close()
        except:
            return False

        return True
Example #9
0
class KinoPoisk:
    """
    
    API:
        scraper  - скрапер
        movie    - профайл фильма
        search   - поиск фильма
        best     - поиск лучших фильмов
        person   - поиск персон
        work     - информация о работах персоны
        
    """
    def __init__(self, language='ru'):
        dbname = 'kinopoisk.%s.db' % language
        self.cache = Cache(dbname, 1.0)
        self.html = Clear()

        self.timeout = 60.0

        self.http = HTTP()
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }

    # API

    def scraper(self, search, year=None):

        try:
            if not isinstance(search, list):
                search = [search]
            tag = 'scraper:' + urllib.quote_plus(
                ":".join(search).encode('utf8'))
        except:
            return None
        else:

            if year:
                tag += ':' + str(year)

            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None

            return self.movie(id)

    def movie(self, id):
        id = str(id)
        return self.cache.get('movie:' + id, self._movie, id)

    def search(self, search, year):
        return self._search_movie(search, year)

    def countries(self):
        return COUNTRIES

    def country(self, id, default=None):
        country = [x[1] for x in COUNTRIES if x[0] == id]
        return country[0] if country else default

    def _search_movie(self, search, year=None):
        parser = kinopoisk.pageparser.PageParser(kinopoisk.LOGGER,
                                                 isDebug=True)
        orginalname = search[0]
        if len(search) > 1:
            name = search[1]
        else:
            name = None
        results = parser.fetchAndParseSearchResults(orginalname, year, name)
        if results and results[0][3] > 70:
            return results[0][0]

    def _scraper(self, search, year):
        timeout = True

        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year > time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60 * 4  # 4 week

        movie_id = self._search_movie(search, year)

        if movie_id is None:
            # сохраняем пустой результат на 4 week
            return 7 * 24 * 60 * 60 * 4, None

        else:
            return timeout, movie_id

    def _movie(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/',
                                   headers=self.headers,
                                   timeout=self.timeout)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'icon': None,
            'thumbnail': None,
            'properties': {
                'fanart_image': None,
            },
            'info': {
                'count': int(id)
            }
        }

        # имя, оригинальное имя, девиз, цензура, год, top250
        # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла)
        for tag, reg, cb in (
            ('title', '<title>(.+?)</title>', self.html.string),
            ('originaltitle', 'itemprop="alternativeHeadline">([^<]*)</span>',
             self.html.string),
            ('tagline',
             '<td style="color\: #555">&laquo;(.+?)&raquo;</td></tr>',
             self.html.string), ('mpaa', 'images/mpaa/([^\.]+).gif',
                                 self.html.string),
            ('runtime',
             '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>',
             self.html.string),
            ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"',
             int), ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', int)):
            r = re.compile(reg, re.U).search(html)
            if r:
                value = r.group(1).strip()
                if value:
                    res['info'][tag] = cb(value)

        # режисеры, сценаристы, жанры
        for tag, reg in (('director', u'<td itemprop="director">(.+?)</td>'), (
                'writer',
                u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'),
                         ('genre', u'<span itemprop="genre">(.+?)</span>')):
            r = re.compile(reg, re.U | re.S).search(html)
            if r:
                r2 = []
                for r in re.compile('<a href="[^"]+">([^<]+)</a>',
                                    re.U).findall(r.group(1)):
                    r = self.html.string(r)
                    if r and r != '...':
                        r2.append(r)
                if r2:
                    res['info'][tag] = u', '.join(r2)

        # актеры
        r = re.compile(u'<h4>В главных ролях:</h4>(.+?)</ul>',
                       re.U | re.S).search(html)
        if r:
            actors = []
            for r in re.compile(
                    '<li itemprop="actors"><a [^>]+>([^<]+)</a></li>',
                    re.U).findall(r.group(1)):
                r = self.html.string(r)
                if r and r != '...':
                    actors.append(r)
            if actors:
                res['info']['cast'] = actors[:]
                # res['info']['castandrole'] = actors[:]

        # описание фильма
        r = re.compile(
            '<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>',
            re.U).search(html)
        if r:
            plot = self.html.text(r.group(1).replace('<=end=>', '\n'))
            if plot:
                res['info']['plot'] = plot

        # IMDB
        r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>',
                       re.U).search(html)
        if r:
            res['info']['rating'] = float(r.group(1).strip())
            res['info']['votes'] = r.group(2).strip()

        # премьера
        r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>',
                       re.U | re.S).search(html)
        if r:
            r = re.compile(u'data\-ical\-date="([^"]+)"',
                           re.U | re.S).search(r.group(1))
            if r:
                data = r.group(1).split(' ')
                if len(data) == 3:
                    i = 0
                    for mon in (u'января', u'февраля', u'марта', u'апреля',
                                u'мая', u'июня', u'июля', u'августа',
                                u'сентября', u'октября', u'ноября',
                                u'декабря'):
                        i += 1
                        if mon == data[1]:
                            mon = str(i)
                            if len(mon) == 1:
                                mon = '0' + mon
                            day = data[0]
                            if len(day) == 1:
                                day = '0' + day
                            res['info']['premiered'] = '-'.join(
                                [data[2], mon, day])
                            break

        # постер
        r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)',
                       re.U | re.S).search(html)
        if r:
            poster = r.group(1).replace("'", '').strip()
            if poster:
                res['thumbnail'] = res['icon'] = 'http://kinopoisk.ru' + poster

        menu = re.compile(
            '<ul id="newMenuSub" class="clearfix(.+?)<!\-\- /menu \-\->',
            re.U | re.S).search(html)
        if menu:
            menu = menu.group(1)

            # фанарт
            if menu.find('/film/' + id + '/wall/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/wall/',
                                           headers=self.headers,
                                           timeout=self.timeout)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile(
                        '<a href="/picture/([0-9]+)/w_size/([0-9]+)/">',
                        re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1),
                                    (id2, size2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch(
                            'http://www.kinopoisk.ru/picture/' +
                            fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/',
                            headers=self.headers,
                            timeout=self.timeout)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"',
                                           re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(
                                    1).strip()

            # если нет фанарта (обоев), то пробуем получить кадры
            if not res['properties']['fanart_image'] and menu.find(
                    '/film/' + id + '/stills/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/stills/',
                                           headers=self.headers,
                                           timeout=self.timeout)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile(
                        '<a href="/picture/([0-9]+)/"><img  src="[^<]+</a>[^<]+<b><i>([0-9]+)&times;([0-9]+)</i>',
                        re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1, t1), (
                            id2, size2, t2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [
                            x for x in fanart
                            if int(x[1]) <= 1280 and int(x[1]) > int(x[2])
                        ]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch(
                            'http://www.kinopoisk.ru/picture/' +
                            fanart[-1][0] + '/',
                            headers=self.headers,
                            timeout=self.timeout)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"',
                                           re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(
                                    1).strip()

            # студии
            if menu.find('/film/' + id + '/studio/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/studio/',
                                           headers=self.headers,
                                           timeout=self.timeout)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    r = re.compile(u'<b>Производство:</b>(.+?)</table>',
                                   re.U | re.S).search(html)
                    if r:
                        studio = []
                        for r in re.compile(
                                '<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>',
                                re.U).findall(r.group(1)):
                            r = self.html.string(r)
                            if r:
                                studio.append(r)
                        if studio:
                            res['info']['studio'] = u', '.join(studio)

        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or not res['properties']['fanart_image'] \
                or int(res['info']['year']) > time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60 * 4  # 4 week

        return timeout, res
Example #10
0
class TvDb:
    """
    
    API:
        scraper  - скрапер
        search   - поиск сериалов
        movie    - профайл фильма
        
    """

    def __init__(self, language='en'):
        self.api_key = '33DBB309BB2B0ADB'
        dbname='tvdb.%s.db' % language
        self.cache = Cache(dbname, 1.0)

        self.language = language

        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.thetvdb.com/'
        }


    # API

    def scraper(self, search, year=None):
        try:
            if not isinstance(search, list):
                search = [search]
            tag = 'scraper:' + urllib.quote_plus(":".join(search).encode('utf8'))
        except:
            return None
        else:

            if year:
                tag += ':' + str(year)

            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None

            return self.movie(id)

    def search(self, search, year=None):
        return self._search(search, year)


    def movie(self, id):
        id = str(id)
        return self.cache.get('movie:' + id, self._movie, id)


    def _movie(self, id):
        try:
            dirname = tempfile.mkdtemp()
        except:
            dirname = xbmc.translatePath('special://temp')
            for subdir in ('xbmcup', 'plugin.video.torrenter'):
                dirname = os.path.join(dirname, subdir)
                if not os.path.exists(dirname):
                    os.mkdir(dirname)

        url = 'http://www.thetvdb.com/api/' + self.api_key + '/series/' + id + '/all/' + self.language + '.zip'
        # print url
        response = self.http.fetch(url, headers=self.headers, download=os.path.join(dirname, 'movie.zip'), timeout=20)
        if response.error:
            print "ERRRRRROR! " + str(response.error)
            self._movie_clear(dirname)
            return False, None

        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r')
            filezip.extractall(dirname)
            filezip.close()
            movie = file(os.path.join(dirname, self.language + '.xml'), 'rb').read().decode('utf8')
        except:
            self._movie_clear(dirname)
            return False, None

        self._movie_clear(dirname)

        body = re.compile(r'<Series>(.+?)</Series>', re.U | re.S).search(movie)
        if not body:
            return False, None

        body = body.group(1)

        res = {
            'icon': None,
            'thumbnail': None,
            'properties': {
                'fanart_image': None,
            },
            'info': {
                'count': int(id)
            }
        }

        # режисеры и сценаристы
        for tag in ('Director', 'Writer'):
            people = {}
            people_list = []
            [people_list.extend(x.split('|')) for x in
             re.compile(r'<' + tag + r'>([^<]+)</' + tag + r'>', re.U | re.S).findall(movie)]
            [people.update({x: 1}) for x in [x.strip() for x in people_list] if x]
            if people:
                res['info'][tag.lower()] = u', '.join([x for x in people.keys() if x])

        for tag, retag, typeof, targettype in (
                ('plot', 'Overview', None, None),
                ('mpaa', 'ContentRating', None, None),
                ('premiered', 'FirstAired', None, None),
                ('studio', 'Network', None, None),
                ('title', 'SeriesName', None, None),
                ('runtime', 'Runtime', None, None),
                ('votes', 'RatingCount', None, None),
                ('rating', 'Rating', float, None),
                ('genre', 'Genre', list, unicode),
                ('cast', 'Actors', list, None)
        ):
            r = re.compile(r'<' + retag + r'>([^<]+)</' + retag + r'>', re.U | re.S).search(body)
            if r:
                r = r.group(1).strip()
                if typeof == float:
                    res['info'][tag] = float(r)
                elif typeof == list:
                    if targettype == unicode:
                        res['info'][tag] = u', '.join([x for x in [x.strip() for x in r.split(u'|')] if x])
                    else:
                        res['info'][tag] = [x for x in [x.strip() for x in r.split(u'|')] if x]
                else:
                    res['info'][tag] = r

        # год
        if 'premiered' in res['info']:
            res['info']['year'] = int(res['info']['premiered'].split('-')[0])

        # постер
        r = re.compile(r'<poster>([^<]+)</poster>', re.U | re.S).search(body)
        if r:
            res['icon'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
            res['thumbnail'] = 'http://thetvdb.com/banners/' + r.group(1).strip()

        # фанарт
        r = re.compile(r'<fanart>([^<]+)</fanart>', re.U | re.S).search(body)
        if r:
            res['properties']['fanart_image'] = 'http://thetvdb.com/banners/' + r.group(1).strip()

        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or not res['properties']['fanart_image'] \
                or int(res['info']['year']) > time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60 * 4  #4 week

        return timeout, res


    def _movie_clear(self, dirname):
        for filename in os.listdir(dirname):
            try:
                os.unlink(os.path.join(dirname, filename))
            except:
                raise
        try:
            os.rmdir(dirname)
        except:
            raise


    def _search(self, search, year=None):
        i = -1
        id = None
        for name in search:
            # print urllib.quote_plus(name.encode('utf-8'))
            url = 'http://www.thetvdb.com/api/GetSeries.php?language=' + self.language + '&seriesname=' + urllib.quote_plus(
                name.encode('utf-8'))
            #print url
            i += 1
            response = self.http.fetch(url, headers=self.headers, timeout=20)
            #print response.body
            if response.error:
                #print "ERRRRRROR! "+str(response.error)
                return None

            res = []
            rows = re.compile('<Series>(.+?)</Series>', re.U | re.S).findall(response.body.decode('utf8'))
            if rows:
                recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U | re.S)

                for row in [x for x in rows if x.find(u'<language>%s</language>' % self.language.decode('utf8')) != -1]:
                    r = recmd.search(row)
                    if r:
                        res.append(int(r.group(1)))
                # в некоторых случаях можно найти только по оригинальному названию, 
                # но при этом русское описание есть
                if not res and self.language != 'en':
                    for row in [x for x in rows if x.find(u'<language>en</language>') != -1]:
                        r = recmd.search(row)
                        if r:
                            res.append(int(r.group(1)))

                if len(res) > 1:
                    Data = []
                    for id in res:
                        for row in rows:
                            recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U | re.S)
                            r = recmd.search(row)
                            if int(r.group(1)) == id:
                                title = re.compile('<SeriesName>(.+?)</SeriesName>', re.U | re.S).search(row)
                                Syear = re.compile('<FirstAired>(.+?)</FirstAired>', re.U | re.S).search(row)
                                if not Syear:
                                    Syear = 0
                                else:
                                    Syear = Syear.group(1)
                                Data.append((title.group(1), Syear, id))

                    index = get_best(Data, search, year)
                    if index and index['rate'] > 70:
                        id = str(index['id'])
                elif len(res) == 1:
                    id = str(res[0])

            if id:
                break

        return id


    def _scraper(self, search, year):
        timeout = True

        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year > time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60 * 4  # 4week

        id = self._search(search, year)

        if id is None:
            return 7 * 24 * 60 * 60 * 4, None

        else:
            # print str((timeout, ids['data'][0]))
            return timeout, id
Example #11
0
class KinoPoisk:
    """
    
    API:
        scraper  - скрапер
        movie    - профайл фильма
        search   - поиск фильма
        best     - поиск лучших фильмов
        person   - поиск персон
        work     - информация о работах персоны
        
    """
    def __init__(self):
        self.cache = Cache('kinopoisk.db', 1.0)
        self.html = Clear()

        self.http = HTTP()
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }

    # API

    def scraper(self, search, year=None, trailer_quality=None):

        try:
            if isinstance(search, list):
                search = search[0] or ""
            tag = 'scraper:' + urllib.quote_plus(search.encode('windows-1251'))
        except:
            return None
        else:

            if year:
                tag += ':' + str(year)

            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None

            return self.movie(id, trailer_quality)

    def movie(self, id, trailer_quality=None):
        id = str(id)

        if trailer_quality is None:
            trailer_quality = 6

        movie = self.cache.get('movie:' + id, self._movie, id)
        if not movie:
            return None

        if 'trailers' in movie and movie['trailers']:
            # компилируем список с нужным нам качеством
            video = []
            for m in movie['trailers']:
                url = [x for x in m['video'] if x[0] <= trailer_quality]
                if url:
                    m['video'] = url[-1]
                    video.append(m)

            movie['trailers'] = video

            if movie['trailers']:
                # готовим главный трейлер
                r = [x for x in movie['trailers'] if x['trailer']]
                if r:
                    movie['info']['trailer'] = r[0]['video'][1]
                else:
                    # если трейлер не найден, то отдаем что попало...
                    movie['info']['trailer'] = movie['trailers'][0]['video'][1]

        return movie

    def search(self, name, trailer_quality=None):
        return self._search_movie(name)

    def best(self, **kwarg):
        page = kwarg.get('page', 1)
        limit = kwarg.get('limit', 50)

        url = 'http://www.kinopoisk.ru/top/navigator/m_act%5Bis_film%5D/on/m_act%5Bnum_vote%5D/' + str(
            kwarg.get('votes', 100)) + '/'

        if kwarg.get('dvd'):
            url += 'm_act%5Bis_dvd%5D/on/'

        if kwarg.get('decade'):
            url += 'm_act%5Bdecade%5D/' + str(kwarg['decade']) + '/'

        if kwarg.get('genre'):
            url += 'm_act%5Bgenre%5D/' + str(GENRE[kwarg['genre']]) + '/'

        if kwarg.get('country'):
            url += 'm_act%5Bcountry%5D/' + str(kwarg['country']) + '/'

        if kwarg.get('rate'):
            url += 'm_act%5Brating%5D/' + str(kwarg['rate']) + ':/'

        if kwarg.get('mpaa'):
            url += 'm_act%5Bmpaa%5D/' + str(kwarg['mpaa']) + '/'

        url += 'perpage/' + str(limit) + '/order/ex_rating/'

        if page > 1:
            url += 'page/' + str(page) + '/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None

        res = {'pages': (1, 0, 1, 0), 'data': []}

        r = re.compile('<div class="pagesFromTo(.+?)<div class="pagesFromTo',
                       re.U | re.S).search(
                           response.body.decode('windows-1251'))
        if r:

            body = r.group(1)

            # compile pagelist
            p = re.compile('>([0-9]+)&mdash;[0-9]+[^0-9]+?([0-9]+)',
                           re.U).search(body)
            if p:
                page = (int(p.group(1)) - 1) / limit + 1
                total = int(p.group(2))
                pages = total / limit
                if limit * pages != total:
                    pages += 1
                res['pages'] = (pages, 0 if page == 1 else page - 1, page,
                                0 if page == pages else page + 1)
            # end compile

            for id in re.compile('<div id="tr_([0-9]+)"',
                                 re.U | re.S).findall(body):
                res['data'].append(int(id))

        return res

    def person(self, name):
        response = self.http.fetch(
            'http://www.kinopoisk.ru/s/type/people/list/1/find/' +
            urllib.quote_plus(name.encode('windows-1251')) +
            '/order/relevant/',
            headers=self.headers)
        if response.error:
            return None

        res = []
        body = re.compile(
            '<div class="navigator">(.+?)<div class="navigator">',
            re.U | re.S).search(response.body.decode('windows-1251'))
        if body:

            for block in re.compile('<p class="pic">(.+?)<div class="clear">',
                                    re.U | re.S).findall(body.group(1)):

                id, name, original, year, poster = None, None, None, None, None

                r = re.compile(
                    '<p class="name"><a href="http://www\.kinopoisk\.ru/level/4/people/([0-9]+)[^>]+>([^<]+)</a>',
                    re.U | re.S).search(block)
                if r:
                    id = r.group(1)
                    name = r.group(2).strip()

                    if id and name:

                        r = re.compile('<span class="gray">([^<]+)</span>',
                                       re.U | re.S).search(block)
                        if r:
                            original = r.group(1).strip()
                            if not original:
                                original = None

                        r = re.compile('<span class="year">([0-9]{4})</span>',
                                       re.U | re.S).search(block)
                        if r:
                            year = int(r.group(1))

                        if block.find('no-poster.gif') == -1:
                            poster = 'http://st.kinopoisk.ru/images/actor/' + id + '.jpg'

                        res.append({
                            'id': int(id),
                            'name': name,
                            'originalname': original,
                            'year': year,
                            'poster': poster
                        })

        return {'pages': (1, 0, 1, 0), 'data': res}

    def work(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/name/' + str(id) +
                                   '/',
                                   headers=self.headers)
        if response.error:
            return None

        res = {}

        r = re.compile('id="sort_block">(.+?)<style>', re.U | re.S).search(
            response.body.decode('windows-1251'))
        if r:
            for block in r.group(1).split(
                    u'<table cellspacing="0" cellpadding="0" border="0" width="100%">'
            ):
                work = None

                for w in ('actor', 'director', 'writer', 'producer',
                          'producer_ussr', 'composer', 'operator', 'editor',
                          'design', 'voice', 'voice_director'):
                    if block.find(u'id="' + w + u'"') != -1:
                        work = 'producer' if w == 'producer_ussr' else w
                        break

                if work:

                    movies = []

                    for id, name in re.compile(
                            '<span class="name"><a href="/film/([0-9]+)/" >([^<]+?)</a>',
                            re.U).findall(block):
                        for tag in (u'(мини-сериал)', u'(сериал)'):
                            if name.find(tag) != -1:
                                break
                        else:
                            movies.append(int(id))

                    if movies:
                        res.setdefault(work, []).extend(movies)

        return res

    def review(self, id, query):
        query_s = 'all' if query == 'stat' else query
        data = self.cache.get('review:' + str(id) + ':' + query_s,
                              self._review, id, query_s)
        if not data:
            return data
        return data[query]

    def countries(self):
        return COUNTRIES

    def country(self, id, default=None):
        country = [x[1] for x in COUNTRIES if x[0] == id]
        return country[0] if country else default

    # PRIVATE

    def _search_movie(self, name, year=None):
        url = 'http://www.kinopoisk.ru/s/type/film/list/1/find/' + urllib.quote_plus(
            name.encode('windows-1251'))  # + '/order/relevant'
        if year:
            url += '/m_act%5Byear%5D/' + str(year)
        url += '/m_act%5Btype%5D/film/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None

        res = []
        r = re.compile('<div class="navigator">(.+?)<div class="navigator">',
                       re.U | re.S).search(
                           response.body.decode('windows-1251'))
        if r:
            for id in re.compile(
                    '<p class="name"><a href="/level/1/film/([0-9]+)',
                    re.U | re.S).findall(r.group(1)):
                res.append(int(id))

        return {'pages': (1, 0, 1, 0), 'data': res}

    def _scraper(self, name, year):
        timeout = True

        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year >= time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60  #week

        ids = self._search_movie(name, year)

        if ids is None:
            return False, None

        elif not ids['data']:
            # сохраняем пустой результат на 3-е суток
            return 259200, None

        else:
            return timeout, ids['data'][0]

    def _review(self, id, query):
        url = 'http://www.kinopoisk.ru/film/' + str(id) + '/ord/rating/'
        if query in ('good', 'bad', 'neutral'):
            url += 'status/' + query + '/'
        url += 'perpage/200/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'stat': {
                'all': 0,
                'good': 0,
                'bad': 0,
                'neutral': 0
            },
            query: []
        }

        r = re.compile('<ul class="resp_type">(.+?)</ul>',
                       re.U | re.S).search(html)
        if r:
            ul = r.group(1)

            for q, t in (('pos', 'good'), ('neg', 'bad'), ('neut', 'neutral')):
                r = re.compile(
                    '<li class="' + q +
                    '"><a href="[^>]+>[^<]+</a><b>([0-9]+)</b></li>',
                    re.U).search(ul)
                if r:
                    res['stat'][t] = int(r.group(1))

            res['stat']['all'] = res['stat']['good'] + res['stat'][
                'bad'] + res['stat']['neutral']

        r = re.compile('<div class="navigator">(.+?)<div class="navigator">',
                       re.U | re.S).search(html)
        if r:

            for block in r.group(1).split('itemprop="reviews"'):

                review = {
                    'nick': None,
                    'count': None,
                    'title': None,
                    'review': None,
                    'time': None
                }

                r = re.compile('itemprop="reviewBody">(.+?)</div>',
                               re.U | re.S).search(block)
                if r:

                    text = r.group(1)
                    for tag1, tag2 in ((u'<=end=>', u'\n'), (u'<b>', u'[B]'),
                                       (u'</b>', u'[/B]'), (u'<i>', u'[I]'),
                                       (u'</i>', u'[/I]'), (u'<u>', u'[U]'),
                                       (u'</u>', u'[/U]')):
                        text = text.replace(tag1, tag2)

                    r = self.html.text(text)
                    if r:
                        review['review'] = r

                user = None
                r = re.compile(
                    '<p class="profile_name"><s></s><a href="[^>]+>([^<]+)</a></p>'
                ).search(block)
                if r:
                    user = self.html.string(r.group(1))
                else:
                    r = re.compile('<p class="profile_name"><s></s>([^<]+)</p>'
                                   ).search(block)
                    if r:
                        user = self.html.string(r.group(1))
                if user:
                    review['nick'] = user

                r = re.compile('<p class="sub_title"[^>]+>([^<]+)</p>').search(
                    block)
                if r:
                    title = self.html.string(r.group(1))
                    if title:
                        review['title'] = title

                r = re.compile('<span class="date">([^<]+)</span>',
                               re.U | re.S).search(block)
                if r:
                    review['time'] = r.group(1).replace(u' |', u',')

                r = re.compile(u'<a href="[^>]+>рецензии \(([0-9]+)\)</a>',
                               re.U | re.S).search(block)
                if r:
                    review['count'] = int(r.group(1))

                if review['nick'] and review['review']:
                    res[query].append(review)

        return 3600, res  # one hour

    def _movie(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/',
                                   headers=self.headers)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'icon': None,
            'thumbnail': None,
            'info': {
                'count': int(id)
            },
            'properties': {
                'fanart_image': None,
            },
        }

        # имя, оригинальное имя, девиз, цензура, год, top250
        # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла)
        for tag, reg, t in (
            ('title', '<title>(.+?)</title>',
             'str'), ('originaltitle',
                      'itemprop="alternativeHeadline">([^<]*)</span>', 'str'),
            ('tagline',
             '<td style="color\: #555">&laquo;(.+?)&raquo;</td></tr>', 'str'),
            ('mpaa', 'itemprop="contentRating"\s+content="MPAA\s+([^"]+)"',
             'str'),
            ('runtime',
             '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>',
             'str'), ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"',
                      'int'),
            ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', 'int')):
            r = re.compile(reg, re.U).search(html)
            if r:
                value = r.group(1).strip()
                if value:
                    res['info'][tag] = value
                    if t == 'int':
                        res['info'][tag] = int(res['info'][tag])
                    else:
                        res['info'][tag] = self.html.string(res['info'][tag])

        # режисеры, сценаристы, жанры
        for tag, reg in (('director', u'<td itemprop="director">(.+?)</td>'), (
                'writer',
                u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'),
                         ('genre', u'<td itemprop="genre">(.+?)</td>')):
            r = re.compile(reg, re.U | re.S).search(html)
            if r:
                r2 = []
                for r in re.compile('<a href="[^"]+">([^<]+)</a>',
                                    re.U).findall(r.group(1)):
                    r = self.html.string(r)
                    if r and r != '...':
                        r2.append(r)
                if r2:
                    res['info'][tag] = u', '.join(r2)

        # актеры
        r = re.compile(u'<h4>В главных ролях:</h4><ul>(.+?)</ul>',
                       re.U | re.S).search(html)
        if r:
            actors = []
            for r in re.compile(
                    '<li itemprop="actors"><a [^>]+>([^<]+)</a></li>',
                    re.U).findall(r.group(1)):
                r = self.html.string(r)
                if r and r != '...':
                    actors.append(r)
            if actors:
                res['info']['cast'] = actors[:]
                #res['info']['castandrole'] = actors[:]

        # описание фильма
        r = re.compile(
            '<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>',
            re.U).search(html)
        if r:
            plot = self.html.text(r.group(1).replace('<=end=>', '\n'))
            if plot:
                res['info']['plot'] = plot

        # IMDB
        r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>',
                       re.U).search(html)
        if r:
            res['info']['rating'] = float(r.group(1).strip())
            res['info']['votes'] = r.group(2).strip()

        # # премьера
        # r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>', re.U|re.S).search(html)
        # if r:
        #     r = re.compile(u'data\-ical\-date="([^"]+)"', re.U|re.S).search(r.group(1))
        #     if r:
        #         data = r.group(1).split(' ')
        #         if len(data) == 3:
        #             i = 0
        #             for mon in (u'января', u'февраля', u'марта', u'апреля', u'мая', u'июня', u'июля', u'августа', u'сентября', u'октября', u'ноября', u'декабря'):
        #                 i += 1
        #                 if mon == data[1]:
        #                     mon = str(i)
        #                     if len(mon) == 1:
        #                         mon = '0' + mon
        #                     day = data[0]
        #                     if len(day) == 1:
        #                         day = '0' + day
        #                     res['info']['premiered'] = '-'.join([data[2], mon, day])
        #                     break

        # постер
        r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)',
                       re.U | re.S).search(html)
        if r:
            poster = r.group(1).replace("'", '').strip()
            if poster:
                if poster.startswith("/"):
                    poster = "http://www.kinopoisk.ru%s" % poster
                res['icon'] = poster
                res['thumbnail'] = poster

        menu = re.compile('<ul id="newMenuSub" class="clearfix(.+?)</ul>',
                          re.U | re.S).search(html)
        if menu:
            menu = menu.group(1)

            # фанарт
            if menu.find('/film/' + id + '/wall/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/wall/',
                                           headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile(
                        '<a href="/picture/([0-9]+)/w_size/([0-9]+)/">',
                        re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1),
                                    (id2, size2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch(
                            'http://www.kinopoisk.ru/picture/' +
                            fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/',
                            headers=self.headers)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"',
                                           re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(
                                    1).strip()

            # если нет фанарта (обоев), то пробуем получить кадры
            if not res['properties']['fanart_image'] and menu.find(
                    '/film/' + id + '/stills/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/stills/',
                                           headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile(
                        '<a href="/picture/([0-9]+)/"><img  src="[^<]+</a>[^<]+<b><i>([0-9]+)&times;([0-9]+)</i>',
                        re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1, t1), (
                            id2, size2, t2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [
                            x for x in fanart
                            if int(x[1]) <= 1280 and int(x[1]) > int(x[2])
                        ]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch(
                            'http://www.kinopoisk.ru/picture/' +
                            fanart[-1][0] + '/',
                            headers=self.headers)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"',
                                           re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(
                                    1).strip()

            # # студии
            # if menu.find('/film/' + id + '/studio/') != -1:
            #     response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/studio/', headers=self.headers)
            #     if not response.error:
            #         html = response.body.decode('windows-1251')
            #         r = re.compile(u'<b>Производство:</b>(.+?)</table>', re.U|re.S).search(html)
            #         if r:
            #             studio = []
            #             for r in re.compile('<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>', re.U).findall(r.group(1)):
            #                 r = self.html.string(r)
            #                 if r:
            #                     studio.append(r)
            #             if studio:
            #                 res['info']['studio'] = u', '.join(studio)

            # трэйлеры

            # trailers1 = [] # русские трейлеры
            # trailers2 = [] # другие русские видео
            # trailers3 = [] # трейлеры
            # trailers4 = [] # другие видео

            # if menu.find('/film/' + id + '/video/') != -1:
            #     response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/video/', headers=self.headers)
            #     if not response.error:
            #         html = response.body.decode('windows-1251')

            #         for row in re.compile(u'<!-- ролик -->(.+?)<!-- /ролик -->', re.U|re.S).findall(html):

            #             # отсекаем лишние блоки
            #             if row.find(u'>СМОТРЕТЬ</a>') != -1:

            #                 # русский ролик?
            #                 if row.find('class="flag flag2"') == -1:
            #                     is_ru = False
            #                 else:
            #                     is_ru = True

            #                 # получаем имя трейлера
            #                 r = re.compile('<a href="/film/' + id + '/video/[0-9]+/[^>]+ class="all">(.+?)</a>', re.U).search(row)
            #                 if r:
            #                     name = self.html.string(r.group(1))
            #                     if name:

            #                         trailer = {
            #                             'name': name,
            #                             'time': None,
            #                             'trailer': False,
            #                             'ru': is_ru,
            #                             'video': []
            #                         }

            #                         # трейлер или тизер?
            #                         for token in (u'Трейлер', u'трейлер', u'Тизер', u'тизер'):
            #                             if name.find(token) != -1:
            #                                 trailer['trailer'] = True
            #                                 break

            #                         # получаем время трейлера
            #                         r = re.compile(u'clock.gif"[^>]+></td>\s*<td style="color\: #777">[^0-9]*([0-9\:]+)</td>', re.U|re.S).search(row)
            #                         if r:
            #                             trailer['time'] = r.group(1).strip()

            #                         # делим ролики по качеству
            #                         for r in re.compile('trailer/([1-3])a.gif"(.+?)link=([^"]+)" class="continue">.+?<td style="color\:#777">([^<]+)</td>\s*</tr>', re.U|re.S).findall(row):
            #                             quality = int(r[0])
            #                             if r[1].find('icon-hd') != -1:
            #                                 quality += 3

            #                             trailer['video'].append((quality, r[2].strip(), r[3]))

            #                         if trailer['video']:
            #                             if trailer['ru']:
            #                                 if trailer['trailer']:
            #                                     trailers1.append(trailer)
            #                                 else:
            #                                     trailers2.append(trailer)
            #                             else:
            #                                 if trailer['trailer']:
            #                                     trailers3.append(trailer)
            #                                 else:
            #                                     trailers4.append(trailer)

            # # склеиваем трейлеры
            # res['trailers'].extend(trailers1)
            # res['trailers'].extend(trailers2)
            # res['trailers'].extend(trailers3)
            # res['trailers'].extend(trailers4)

        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or int(
                res['info']['year']) >= time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60  #week

        return timeout, res
Example #12
0
class KinoPoisk:
    """

    API:
        scraper  - скрапер
        movie    - профайл фильма
        search   - поиск фильма
        best     - поиск лучших фильмов
        person   - поиск персон
        work     - информация о работах персоны

    """

    def __init__(self):
        self.cache = Cache('kinopoisk.db', 1.0)
        self.html = Clear()

        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }

    # API

    def scraper(self, search, year=None, trailer_quality=None):

        try:
            if isinstance(search, list):
                search = search[0] or ""
            tag = 'scraper:' + urllib.quote_plus(search.encode('windows-1251'))
        except Exception:
            return None
        else:

            if year:
                tag += ':' + str(year)

            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None

            return self.movie(id, trailer_quality)

    def movie(self, id, trailer_quality=None):
        id = str(id)

        if trailer_quality is None:
            trailer_quality = 6

        movie = self.cache.get('movie:' + id, self._movie, id)
        if not movie:
            return None

        if 'trailers' in movie and movie['trailers']:
            # компилируем список с нужным нам качеством
            video = []
            for m in movie['trailers']:
                url = [x for x in m['video'] if x[0] <= trailer_quality]
                if url:
                    m['video'] = url[-1]
                    video.append(m)

            movie['trailers'] = video

            if movie['trailers']:
                # готовим главный трейлер
                r = [x for x in movie['trailers'] if x['trailer']]
                if r:
                    movie['info']['trailer'] = r[0]['video'][1]
                else:
                    # если трейлер не найден, то отдаем что попало...
                    movie['info']['trailer'] = movie['trailers'][0]['video'][1]

        return movie

    def search(self, name, trailer_quality=None):
        return self._search_movie(name)

    def best(self, **kwarg):
        page = kwarg.get('page', 1)
        limit = kwarg.get('limit', 50)

        url = 'http://www.kinopoisk.ru/top/navigator/m_act%5Bis_film%5D/on/m_act%5Bnum_vote%5D/' + str(kwarg.get('votes', 100)) + '/'

        if kwarg.get('dvd'):
            url += 'm_act%5Bis_dvd%5D/on/'

        if kwarg.get('decade'):
            url += 'm_act%5Bdecade%5D/' + str(kwarg['decade']) + '/'

        if kwarg.get('genre'):
            url += 'm_act%5Bgenre%5D/' + str(GENRE[kwarg['genre']]) + '/'

        if kwarg.get('country'):
            url += 'm_act%5Bcountry%5D/' + str(kwarg['country']) + '/'

        if kwarg.get('rate'):
            url += 'm_act%5Brating%5D/' + str(kwarg['rate']) + ':/'

        if kwarg.get('mpaa'):
            url += 'm_act%5Bmpaa%5D/' + str(kwarg['mpaa']) + '/'

        url += 'perpage/' + str(limit) + '/order/ex_rating/'

        if page > 1:
            url += 'page/' + str(page) + '/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None

        res = {'pages': (1, 0, 1, 0), 'data': []}

        r = re.compile('<div class="pagesFromTo(.+?)<div class="pagesFromTo', re.U | re.S).search(response.body.decode('windows-1251'))
        if r:

            body = r.group(1)

            # compile pagelist
            p = re.compile('>([0-9]+)&mdash;[0-9]+[^0-9]+?([0-9]+)', re.U).search(body)
            if p:
                page = (int(p.group(1)) - 1) / limit + 1
                total = int(p.group(2))
                pages = total / limit
                if limit * pages != total:
                    pages += 1
                res['pages'] = (pages, 0 if page == 1 else page - 1, page, 0 if page == pages else page + 1)
            # end compile

            for id in re.compile('<div id="tr_([0-9]+)"', re.U | re.S).findall(body):
                res['data'].append(int(id))

        return res

    def person(self, name):
        response = self.http.fetch('http://www.kinopoisk.ru/s/type/people/list/1/find/' + urllib.quote_plus(name.encode('windows-1251')) + '/order/relevant/', headers=self.headers)
        if response.error:
            return None

        res = []
        body = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U | re.S).search(response.body.decode('windows-1251'))
        if body:

            for block in re.compile('<p class="pic">(.+?)<div class="clear">', re.U | re.S).findall(body.group(1)):

                id, name, original, year, poster = None, None, None, None, None

                r = re.compile('<p class="name"><a href="http://www\.kinopoisk\.ru/level/4/people/([0-9]+)[^>]+>([^<]+)</a>', re.U | re.S).search(block)
                if r:
                    id = r.group(1)
                    name = r.group(2).strip()

                    if id and name:

                        r = re.compile('<span class="gray">([^<]+)</span>', re.U | re.S).search(block)
                        if r:
                            original = r.group(1).strip()
                            if not original:
                                original = None

                        r = re.compile('<span class="year">([0-9]{4})</span>', re.U | re.S).search(block)
                        if r:
                            year = int(r.group(1))

                        if block.find('no-poster.gif') == -1:
                            poster = 'http://st.kinopoisk.ru/images/actor/' + id + '.jpg'

                        res.append({'id': int(id), 'name': name, 'originalname': original, 'year': year, 'poster': poster})

        return {'pages': (1, 0, 1, 0), 'data': res}

    def work(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/name/' + str(id) + '/', headers=self.headers)
        if response.error:
            return None

        res = {}

        r = re.compile('id="sort_block">(.+?)<style>', re.U | re.S).search(response.body.decode('windows-1251'))
        if r:
            for block in r.group(1).split(u'<table cellspacing="0" cellpadding="0" border="0" width="100%">'):
                work = None

                for w in ('actor', 'director', 'writer', 'producer', 'producer_ussr', 'composer', 'operator', 'editor', 'design', 'voice', 'voice_director'):
                    if block.find(u'id="' + w + u'"') != -1:
                        work = 'producer' if w == 'producer_ussr' else w
                        break

                if work:

                    movies = []

                    for id, name in re.compile('<span class="name"><a href="/film/([0-9]+)/" >([^<]+?)</a>', re.U).findall(block):
                        for tag in (u'(мини-сериал)', u'(сериал)'):
                            if name.find(tag) != -1:
                                break
                        else:
                            movies.append(int(id))

                    if movies:
                        res.setdefault(work, []).extend(movies)

        return res

    def review(self, id, query):
        query_s = 'all' if query == 'stat' else query
        data = self.cache.get('review:' + str(id) + ':' + query_s, self._review, id, query_s)
        if not data:
            return data
        return data[query]

    def countries(self):
        return COUNTRIES

    def country(self, id, default=None):
        country = [x[1] for x in COUNTRIES if x[0] == id]
        return country[0] if country else default

    # PRIVATE

    def _search_movie(self, name, year=None):
        url = 'http://www.kinopoisk.ru/s/type/film/list/1/find/' + urllib.quote_plus(name.encode('windows-1251'))  # + '/order/relevant'
        if year:
            url += '/m_act%5Byear%5D/' + str(year)
        url += '/m_act%5Btype%5D/film/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None

        res = []
        r = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U | re.S).search(response.body.decode('windows-1251'))
        if r:
            for id in re.compile('<p class="name"><a href="/level/1/film/([0-9]+)', re.U | re.S).findall(r.group(1)):
                res.append(int(id))

        return {'pages': (1, 0, 1, 0), 'data': res}

    def _scraper(self, name, year):
        timeout = True

        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year >= time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60  # week

        ids = self._search_movie(name, year)

        if ids is None:
            return False, None

        elif not ids['data']:
            # сохраняем пустой результат на 3-е суток
            return 259200, None

        else:
            return timeout, ids['data'][0]

    def _review(self, id, query):
        url = 'http://www.kinopoisk.ru/film/' + str(id) + '/ord/rating/'
        if query in ('good', 'bad', 'neutral'):
            url += 'status/' + query + '/'
        url += 'perpage/200/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'stat': {'all': 0, 'good': 0, 'bad': 0, 'neutral': 0},
            query: []
        }

        r = re.compile('<ul class="resp_type">(.+?)</ul>', re.U | re.S).search(html)
        if r:
            ul = r.group(1)

            for q, t in (('pos', 'good'), ('neg', 'bad'), ('neut', 'neutral')):
                r = re.compile('<li class="' + q + '"><a href="[^>]+>[^<]+</a><b>([0-9]+)</b></li>', re.U).search(ul)
                if r:
                    res['stat'][t] = int(r.group(1))

            res['stat']['all'] = res['stat']['good'] + res['stat']['bad'] + res['stat']['neutral']

        r = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U | re.S).search(html)
        if r:

            for block in r.group(1).split('itemprop="reviews"'):

                review = {
                    'nick': None,
                    'count': None,
                    'title': None,
                    'review': None,
                    'time': None
                }

                r = re.compile('itemprop="reviewBody">(.+?)</div>', re.U | re.S).search(block)
                if r:

                    text = r.group(1)
                    for tag1, tag2 in ((u'<=end=>', u'\n'), (u'<b>', u'[B]'), (u'</b>', u'[/B]'), (u'<i>', u'[I]'), (u'</i>', u'[/I]'), (u'<u>', u'[U]'), (u'</u>', u'[/U]')):
                        text = text.replace(tag1, tag2)

                    r = self.html.text(text)
                    if r:
                        review['review'] = r

                user = None
                r = re.compile('<p class="profile_name"><s></s><a href="[^>]+>([^<]+)</a></p>').search(block)
                if r:
                    user = self.html.string(r.group(1))
                else:
                    r = re.compile('<p class="profile_name"><s></s>([^<]+)</p>').search(block)
                    if r:
                        user = self.html.string(r.group(1))
                if user:
                    review['nick'] = user

                r = re.compile('<p class="sub_title"[^>]+>([^<]+)</p>').search(block)
                if r:
                    title = self.html.string(r.group(1))
                    if title:
                        review['title'] = title

                r = re.compile('<span class="date">([^<]+)</span>', re.U | re.S).search(block)
                if r:
                    review['time'] = r.group(1).replace(u' |', u',')

                r = re.compile(u'<a href="[^>]+>рецензии \(([0-9]+)\)</a>', re.U | re.S).search(block)
                if r:
                    review['count'] = int(r.group(1))

                if review['nick'] and review['review']:
                    res[query].append(review)

        return 3600, res  # one hour

    def _movie(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/', headers=self.headers)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'icon': None,
            'thumbnail': None,
            'info': {
                'count': int(id)
            },
            'properties': {
                'fanart_image': None,
            },
        }

        # имя, оригинальное имя, девиз, цензура, год, top250
        # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла)
        for tag, reg, t in (
            ('title', '<title>(.+?)</title>', 'str'),
            ('originaltitle', 'itemprop="alternativeHeadline">([^<]*)</span>', 'str'),
            ('tagline', '<td style="color\: #555">&laquo;(.+?)&raquo;</td></tr>', 'str'),
            ('mpaa', 'itemprop="contentRating"\s+content="MPAA\s+([^"]+)"', 'str'),
            ('runtime', '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>', 'str'),
            ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"', 'int'),
            ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', 'int')

        ):
            r = re.compile(reg, re.U).search(html)
            if r:
                value = r.group(1).strip()
                if value:
                    res['info'][tag] = value
                    if t == 'int':
                        res['info'][tag] = int(res['info'][tag])
                    else:
                        res['info'][tag] = self.html.string(res['info'][tag])

        # режисеры, сценаристы, жанры
        for tag, reg in (
            ('director', u'<td itemprop="director">(.+?)</td>'),
            ('writer', u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'),
            ('genre', u'<td itemprop="genre">(.+?)</td>')
        ):
            r = re.compile(reg, re.U | re.S).search(html)
            if r:
                r2 = []
                for r in re.compile('<a href="[^"]+">([^<]+)</a>', re.U).findall(r.group(1)):
                    r = self.html.string(r)
                    if r and r != '...':
                        r2.append(r)
                if r2:
                    res['info'][tag] = u', '.join(r2)

        # актеры
        r = re.compile(u'<h4>В главных ролях:</h4><ul>(.+?)</ul>', re.U | re.S).search(html)
        if r:
            actors = []
            for r in re.compile('<li itemprop="actors"><a [^>]+>([^<]+)</a></li>', re.U).findall(r.group(1)):
                r = self.html.string(r)
                if r and r != '...':
                    actors.append(r)
            if actors:
                res['info']['cast'] = actors[:]
                # res['info']['castandrole'] = actors[:]

        # описание фильма
        r = re.compile('<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>', re.U).search(html)
        if r:
            plot = self.html.text(r.group(1).replace('<=end=>', '\n'))
            if plot:
                res['info']['plot'] = plot

        # IMDB
        r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>', re.U).search(html)
        if r:
            res['info']['rating'] = float(r.group(1).strip())
            res['info']['votes'] = r.group(2).strip()

        # # премьера
        # r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>', re.U|re.S).search(html)
        # if r:
        #     r = re.compile(u'data\-ical\-date="([^"]+)"', re.U|re.S).search(r.group(1))
        #     if r:
        #         data = r.group(1).split(' ')
        #         if len(data) == 3:
        #             i = 0
        #             for mon in (u'января', u'февраля', u'марта', u'апреля', u'мая', u'июня', u'июля', u'августа', u'сентября', u'октября', u'ноября', u'декабря'):
        #                 i += 1
        #                 if mon == data[1]:
        #                     mon = str(i)
        #                     if len(mon) == 1:
        #                         mon = '0' + mon
        #                     day = data[0]
        #                     if len(day) == 1:
        #                         day = '0' + day
        #                     res['info']['premiered'] = '-'.join([data[2], mon, day])
        #                     break

        # постер
        r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)', re.U | re.S).search(html)
        if r:
            poster = r.group(1).replace("'", '').strip()
            if poster:
                if poster.startswith("/"):
                    poster = "http://www.kinopoisk.ru%s" % poster
                res['icon'] = poster
                res['thumbnail'] = poster

        menu = re.compile('<ul id="newMenuSub" class="clearfix(.+?)</ul>', re.U | re.S).search(html)
        if menu:
            menu = menu.group(1)

            # фанарт
            if menu.find('/film/' + id + '/wall/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/wall/', headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile('<a href="/picture/([0-9]+)/w_size/([0-9]+)/">', re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1), (id2, size2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch('http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/', headers=self.headers)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(1).strip()

            # если нет фанарта (обоев), то пробуем получить кадры
            if not res['properties']['fanart_image'] and menu.find('/film/' + id + '/stills/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/stills/', headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile('<a href="/picture/([0-9]+)/"><img  src="[^<]+</a>[^<]+<b><i>([0-9]+)&times;([0-9]+)</i>', re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1, t1), (id2, size2, t2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280 and int(x[1]) > int(x[2])]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch('http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/', headers=self.headers)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(1).strip()

            # # студии
            # if menu.find('/film/' + id + '/studio/') != -1:
            #     response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/studio/', headers=self.headers)
            #     if not response.error:
            #         html = response.body.decode('windows-1251')
            #         r = re.compile(u'<b>Производство:</b>(.+?)</table>', re.U|re.S).search(html)
            #         if r:
            #             studio = []
            #             for r in re.compile('<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>', re.U).findall(r.group(1)):
            #                 r = self.html.string(r)
            #                 if r:
            #                     studio.append(r)
            #             if studio:
            #                 res['info']['studio'] = u', '.join(studio)

            # трэйлеры

            # trailers1 = [] # русские трейлеры
            # trailers2 = [] # другие русские видео
            # trailers3 = [] # трейлеры
            # trailers4 = [] # другие видео

            # if menu.find('/film/' + id + '/video/') != -1:
            #     response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/video/', headers=self.headers)
            #     if not response.error:
            #         html = response.body.decode('windows-1251')

            #         for row in re.compile(u'<!-- ролик -->(.+?)<!-- /ролик -->', re.U|re.S).findall(html):

            #             # отсекаем лишние блоки
            #             if row.find(u'>СМОТРЕТЬ</a>') != -1:

            #                 # русский ролик?
            #                 if row.find('class="flag flag2"') == -1:
            #                     is_ru = False
            #                 else:
            #                     is_ru = True

            #                 # получаем имя трейлера
            #                 r = re.compile('<a href="/film/' + id + '/video/[0-9]+/[^>]+ class="all">(.+?)</a>', re.U).search(row)
            #                 if r:
            #                     name = self.html.string(r.group(1))
            #                     if name:

            #                         trailer = {
            #                             'name': name,
            #                             'time': None,
            #                             'trailer': False,
            #                             'ru': is_ru,
            #                             'video': []
            #                         }

            #                         # трейлер или тизер?
            #                         for token in (u'Трейлер', u'трейлер', u'Тизер', u'тизер'):
            #                             if name.find(token) != -1:
            #                                 trailer['trailer'] = True
            #                                 break

            #                         # получаем время трейлера
            #                         r = re.compile(u'clock.gif"[^>]+></td>\s*<td style="color\: #777">[^0-9]*([0-9\:]+)</td>', re.U|re.S).search(row)
            #                         if r:
            #                             trailer['time'] = r.group(1).strip()

            #                         # делим ролики по качеству
            #                         for r in re.compile('trailer/([1-3])a.gif"(.+?)link=([^"]+)" class="continue">.+?<td style="color\:#777">([^<]+)</td>\s*</tr>', re.U|re.S).findall(row):
            #                             quality = int(r[0])
            #                             if r[1].find('icon-hd') != -1:
            #                                 quality += 3

            #                             trailer['video'].append((quality, r[2].strip(), r[3]))

            #                         if trailer['video']:
            #                             if trailer['ru']:
            #                                 if trailer['trailer']:
            #                                     trailers1.append(trailer)
            #                                 else:
            #                                     trailers2.append(trailer)
            #                             else:
            #                                 if trailer['trailer']:
            #                                     trailers3.append(trailer)
            #                                 else:
            #                                     trailers4.append(trailer)

            # # склеиваем трейлеры
            # res['trailers'].extend(trailers1)
            # res['trailers'].extend(trailers2)
            # res['trailers'].extend(trailers3)
            # res['trailers'].extend(trailers4)

        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or int(res['info']['year']) >= time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60  # week

        return timeout, res
Example #13
0
class Cache:
    def __init__(self, name, version, expire=0, size=0, step=100):
        self.name = name
        self.version = version
        self._connect()
        if expire:
            self.expire(expire)
        if size:
            self.size(size, step)

    def get(self, token, callback, *param):
        cur = self.db.cursor()
        cur.execute('select expire,data from cache where id=? limit 1',
                    (token, ))
        row = cur.fetchone()
        cur.close()

        if row:
            if row[0] and row[0] < int(time.time()):
                pass
            else:
                try:
                    obj = pickle.loads(row[1])
                except:
                    pass
                else:
                    return obj

        response = callback(*param)

        if response[0]:
            obj = sqlite.Binary(pickle.dumps(response[1]))
            curtime = int(time.time())
            cur = self.db.cursor()
            if isinstance(response[0], bool):
                cur.execute(
                    'replace into cache(id,addtime,expire,data) values(?,?,?,?)',
                    (token, curtime, None, obj))
            else:
                cur.execute(
                    'replace into cache(id,addtime,expire,data) values(?,?,?,?)',
                    (token, curtime, curtime + response[0], obj))
            self.db.commit()
            cur.close()

        return response[1]

    def expire(self, expire):
        # with rtrCache_lock:
        cur = self.db.cursor()
        cur.execute('delete from cache where addtime<?',
                    (int(time.time()) - expire, ))
        self.db.commit()
        cur.close()

    def size(self, size, step=100):
        # with rtrCache_lock:
        while True:
            if os.path.getsize(self.filename) < size:
                break
            cur = self.db.cursor()
            cur.execute('select id from cache order by addtime asc limit ?',
                        (step, ))
            rows = cur.fetchall()
            if not rows:
                cur.close()
                break
            cur.execute(
                'delete from cache where id in (' + ','.join(len(rows) * '?') +
                ')', [x[0] for x in rows])
            self.db.commit()
            cur.close()

    def flush(self):
        # with rtrCache_lock:
        cur = self.db.cursor()
        cur.execute('delete from cache')
        self.db.commit()
        cur.close()

    def _connect(self):
        with rtrCache_lock:
            dirname = xbmc.translatePath('special://temp')
            for subdir in ('xbmcup', 'plugin.video.torrenter'):
                dirname = os.path.join(dirname, subdir)
                if not xbmcvfs.exists(dirname):
                    xbmcvfs.mkdir(dirname)

            self.filename = os.path.join(dirname, self.name)

            first = False
            if not xbmcvfs.exists(self.filename):
                first = True

            self.db = sqlite.connect(self.filename, check_same_thread=False)
            if not first:
                cur = self.db.cursor()
                try:
                    cur.execute('select version from db_ver')
                    row = cur.fetchone()
                    if not row or float(row[0]) != self.version:
                        cur.execute('drop table cache')
                        cur.execute('drop table if exists db_ver')
                        first = True
                except:
                    cur.execute('drop table cache')
                    first = True
                self.db.commit()
                cur.close()

            if first and not self.first_time():
                cur = self.db.cursor()
                cur.execute('pragma auto_vacuum=1')
                cur.execute(
                    'create table cache(id varchar(255) unique, addtime integer, expire integer, data blob)'
                )
                cur.execute('create index time on cache(addtime asc)')
                cur.execute('create table db_ver(version real)')
                cur.execute('insert into db_ver(version) values(?)',
                            (self.version, ))
                self.db.commit()
                cur.close()

    def first_time(self):
        scrapers = {
            'tvdb': 'TheTVDB.com',
            'tmdb': 'TheMovieDB.org',
            'kinopoisk': 'KinoPoisk.ru'
        }
        ok = xbmcgui.Dialog().yesno(
            Localization.localize('Content Lists'),
            Localization.localize('Do you want to preload full metadata?') +
            ' (%s)' %
            (scrapers[os.path.basename(self.filename).split('.')[0]]),
            Localization.localize('It is highly recommended!'))
        if ok:
            return self.download()
        else:
            return False

    def download(self):
        dirname = os.path.dirname(self.filename)
        zipname = os.path.basename(self.filename).replace('.db', '') + '.zip'
        url = 'http://www.tat-store.ru/torrenter/' + zipname
        self.http = HTTP()
        response = self.http.fetch(url,
                                   download=os.path.join(dirname, zipname),
                                   progress=True)
        if response.error:
            return False

        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, zipname), 'r')
            filezip.extractall(dirname)
            filezip.close()
        except:
            return False

        return True
Example #14
0
class KinoPoisk:
    """
    
    API:
        scraper  - скрапер
        movie    - профайл фильма
        search   - поиск фильма
        best     - поиск лучших фильмов
        person   - поиск персон
        work     - информация о работах персоны
        
    """

    def __init__(self, language='ru'):
        dbname = 'kinopoisk.%s.db' % language
        self.cache = Cache(dbname, 1.0)
        self.html = Clear()

        self.timeout = 60.0

        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }

    # API

    def scraper(self, search, year=None):

        try:
            if not isinstance(search, list):
                search = [search]
            tag = 'scraper:' + urllib.quote_plus(":".join(search).encode('utf8'))
        except:
            return None
        else:

            if year:
                tag += ':' + str(year)

            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None

            return self.movie(id)

    def movie(self, id):
        id = str(id)
        return self.cache.get('movie:' + id, self._movie, id)

    def search(self, search, year):
        return self._search_movie(search, year)

    def countries(self):
        return COUNTRIES

    def country(self, id, default=None):
        country = [x[1] for x in COUNTRIES if x[0] == id]
        return country[0] if country else default

    def _search_movie(self, search, year=None):
        parser = kinopoisk.pageparser.PageParser(kinopoisk.LOGGER, isDebug=True)
        orginalname = search[0]
        if len(search) > 1:
            name = search[1]
        else:
            name = None
        results = parser.fetchAndParseSearchResults(orginalname, year, name)
        if results and results[0][3] > 70:
            return results[0][0]

    def _scraper(self, search, year):
        timeout = True

        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year > time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60 * 4  # 4 week

        movie_id = self._search_movie(search, year)

        if movie_id is None:
            # сохраняем пустой результат на 4 week
            return 7 * 24 * 60 * 60 * 4, None

        else:
            return timeout, movie_id

    def _movie(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/', headers=self.headers,
                                   timeout=self.timeout)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'icon': None,
            'thumbnail': None,
            'properties': {
                'fanart_image': None,
            },
            'info': {
                'count': int(id)
            }
        }

        # имя, оригинальное имя, девиз, цензура, год, top250
        # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла)
        for tag, reg, cb in (
                ('title', '<title>(.+?)</title>', self.html.string),
                ('originaltitle', 'itemprop="alternativeHeadline">([^<]*)</span>', self.html.string),
                ('tagline', '<td style="color\: #555">&laquo;(.+?)&raquo;</td></tr>', self.html.string),
                ('mpaa', 'images/mpaa/([^\.]+).gif', self.html.string),
                ('runtime', '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>',
                 self.html.string),
                ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"', int),
                ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', int)

        ):
            r = re.compile(reg, re.U).search(html)
            if r:
                value = r.group(1).strip()
                if value:
                    res['info'][tag] = cb(value)


        # режисеры, сценаристы, жанры
        for tag, reg in (
                ('director', u'<td itemprop="director">(.+?)</td>'),
                ('writer', u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'),
                ('genre', u'<span itemprop="genre">(.+?)</span>')
        ):
            r = re.compile(reg, re.U | re.S).search(html)
            if r:
                r2 = []
                for r in re.compile('<a href="[^"]+">([^<]+)</a>', re.U).findall(r.group(1)):
                    r = self.html.string(r)
                    if r and r != '...':
                        r2.append(r)
                if r2:
                    res['info'][tag] = u', '.join(r2)

        # актеры
        r = re.compile(u'<h4>В главных ролях:</h4>(.+?)</ul>', re.U | re.S).search(html)
        if r:
            actors = []
            for r in re.compile('<li itemprop="actors"><a [^>]+>([^<]+)</a></li>', re.U).findall(r.group(1)):
                r = self.html.string(r)
                if r and r != '...':
                    actors.append(r)
            if actors:
                res['info']['cast'] = actors[:]
                # res['info']['castandrole'] = actors[:]

        # описание фильма
        r = re.compile('<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>',
                       re.U).search(html)
        if r:
            plot = self.html.text(r.group(1).replace('<=end=>', '\n'))
            if plot:
                res['info']['plot'] = plot

        # IMDB
        r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>', re.U).search(html)
        if r:
            res['info']['rating'] = float(r.group(1).strip())
            res['info']['votes'] = r.group(2).strip()


        # премьера
        r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>', re.U | re.S).search(html)
        if r:
            r = re.compile(u'data\-ical\-date="([^"]+)"', re.U | re.S).search(r.group(1))
            if r:
                data = r.group(1).split(' ')
                if len(data) == 3:
                    i = 0
                    for mon in (
                            u'января', u'февраля', u'марта', u'апреля', u'мая', u'июня', u'июля', u'августа',
                            u'сентября',
                            u'октября', u'ноября', u'декабря'):
                        i += 1
                        if mon == data[1]:
                            mon = str(i)
                            if len(mon) == 1:
                                mon = '0' + mon
                            day = data[0]
                            if len(day) == 1:
                                day = '0' + day
                            res['info']['premiered'] = '-'.join([data[2], mon, day])
                            break


        # постер
        r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)', re.U | re.S).search(html)
        if r:
            poster = r.group(1).replace("'", '').strip()
            if poster:
                res['thumbnail'] = res['icon'] = 'http://kinopoisk.ru' + poster

        menu = re.compile('<ul id="newMenuSub" class="clearfix(.+?)<!\-\- /menu \-\->', re.U | re.S).search(html)
        if menu:
            menu = menu.group(1)

            # фанарт
            if menu.find('/film/' + id + '/wall/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/wall/', headers=self.headers,
                                           timeout=self.timeout)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile('<a href="/picture/([0-9]+)/w_size/([0-9]+)/">', re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1), (id2, size2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch(
                            'http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/',
                            headers=self.headers, timeout=self.timeout)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(1).strip()


            # если нет фанарта (обоев), то пробуем получить кадры
            if not res['properties']['fanart_image'] and menu.find('/film/' + id + '/stills/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/stills/', headers=self.headers,
                                           timeout=self.timeout)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile(
                        '<a href="/picture/([0-9]+)/"><img  src="[^<]+</a>[^<]+<b><i>([0-9]+)&times;([0-9]+)</i>',
                        re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1, t1), (id2, size2, t2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280 and int(x[1]) > int(x[2])]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch('http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/',
                                                   headers=self.headers, timeout=self.timeout)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"', re.U | re.S).search(html)
                            if r:
                                res['properties']['fanart_image'] = r.group(1).strip()


            # студии
            if menu.find('/film/' + id + '/studio/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/studio/', headers=self.headers,
                                           timeout=self.timeout)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    r = re.compile(u'<b>Производство:</b>(.+?)</table>', re.U | re.S).search(html)
                    if r:
                        studio = []
                        for r in re.compile('<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>',
                                            re.U).findall(r.group(1)):
                            r = self.html.string(r)
                            if r:
                                studio.append(r)
                        if studio:
                            res['info']['studio'] = u', '.join(studio)

        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or not res['properties']['fanart_image'] \
                or int(res['info']['year']) > time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60 * 4  # 4 week

        return timeout, res
Example #15
0
class TvDb:
    """
    
    API:
        scraper  - скрапер
        search   - поиск сериалов
        movie    - профайл фильма
        
    """
    
    def __init__(self):
        self.api_key = '1D62F2F90030C444'
        
        self.cache = Cache('tvdb.db', 1.0)
        
        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.thetvdb.com/'
        }
        
        
    # API
    
    def scraper(self, search, year=None):
        try:
            if not isinstance(search, list):
                search = [search]
            tag = 'scraper:' + urllib.quote_plus(":".join(search).encode('utf8'))
        except:
            return None
        else:
            
            if year:
                tag += ':' + str(year)
            
            id = self.cache.get(tag, self._scraper, search, year)
            if not id:
                return None
            
            return self.movie(id)
        
    def search(self, name):
        return self._search(name)
    
    
    def movie(self, id):
        id = str(id)
        return self.cache.get('movie:' + id, self._movie, id)
    
    
    def _movie(self, id):
        dirname = tempfile.mkdtemp()
        response = self.http.fetch('http://www.thetvdb.com/api/' + self.api_key + '/series/' + id + '/all/ru.zip', headers=self.headers, download=os.path.join(dirname, 'movie.zip'))
        if response.error:
            self._movie_clear(dirname)
            return False, None
        
        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r')
            filezip.extractall(dirname)
            filezip.close()
            movie = file(os.path.join(dirname, 'ru.xml'), 'rb').read().decode('utf8')
        except:
            self._movie_clear(dirname)
            return False, None
        
        self._movie_clear(dirname)
        
        body = re.compile(r'<Series>(.+?)</Series>', re.U|re.S).search(movie)
        if not body:
            return False, None
        
        body = body.group(1)
        
        res = {
            'icon' : None,
            'thumbnail': None,
            'properties': {
                'fanart_image': None,
            },
            'info': {
                'count' : int(id)
            }
        }
        
        # режисеры и сценаристы
        for tag in ('Director', 'Writer'):
            people = {}
            people_list = []
            [people_list.extend(x.split('|')) for x in re.compile(r'<' + tag + r'>([^<]+)</' + tag + r'>', re.U|re.S).findall(movie)]
            [people.update({x: 1}) for x in [x.strip() for x in people_list] if x]
            if people:
                res['info'][tag.lower()] = u', '.join([x for x in people.keys() if x])
        
        for tag, retag, typeof, targettype in (
                    ('plot', 'Overview', None, None),
                    ('mpaa', 'ContentRating', None, None),
                    ('premiered', 'FirstAired', None, None),
                    ('studio', 'Network', None, None),
                    ('title', 'SeriesName', None, None),
                    ('runtime', 'Runtime', None, None),
                    ('votes', 'RatingCount', None, None),
                    ('rating', 'Rating', float, None),
                    ('genre', 'Genre', list, unicode),
                    ('cast', 'Actors', list, None)
                    ):
            r = re.compile(r'<' + retag + r'>([^<]+)</' + retag + r'>', re.U|re.S).search(body)
            if r:
                r = r.group(1).strip()
                if typeof == float:
                    res['info'][tag] = float(r)
                elif typeof == list:
                    if targettype == unicode:
                        res['info'][tag] = u', '.join([x for x in [x.strip() for x in r.split(u'|')] if x])
                    else:
                        res['info'][tag] = [x for x in [x.strip() for x in r.split(u'|')] if x]
                else:
                    res['info'][tag] = r
        
        # год
        if 'premiered' in res['info']:
            res['info']['year'] = int(res['info']['premiered'].split('-')[0])
        
        # постер
        r = re.compile(r'<poster>([^<]+)</poster>', re.U|re.S).search(body)
        if r:
            res['icon'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
            res['thumbnail'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
        
        # фанарт
        r = re.compile(r'<fanart>([^<]+)</fanart>', re.U|re.S).search(body)
        if r:
            res['properties']['fanart_image'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
        
        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or int(res['info']['year']) >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        return timeout, res
            
    
    def _movie_clear(self, dirname):
        for filename in os.listdir(dirname):
            try:
                os.unlink(os.path.join(dirname, filename))
            except:
                raise
        try:
            os.rmdir(dirname)
        except:
            raise
        
    
    def _search(self, search):
        for name in search:
            response = self.http.fetch('http://www.thetvdb.com/api/GetSeries.php?language=ru&seriesname=' + urllib.quote_plus(name.encode('utf8')), headers=self.headers)
            if response.error:
                return None
        
            res = []
            rows = re.compile('<Series>(.+?)</Series>', re.U|re.S).findall(response.body.decode('utf8'))
            if rows:
                recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U|re.S)
            
                for row in [x for x in rows if x.find(u'<language>ru</language>') != -1]:
                    r = recmd.search(row)
                    if r:
                        res.append(int(r.group(1)))
                # в некоторых случаях можно найти только по оригинальному названию, 
                # но при этом русское описание есть
                if not res:
                    for row in [x for x in rows if x.find(u'<language>en</language>') != -1]:
                        r = recmd.search(row)
                        if r:
                            res.append(int(r.group(1)))

            if res:
                break
                
        return {'pages': (1, 0, 1, 0), 'data': res}
    
    
    def _scraper(self, name, year):
        timeout = True
        
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        ids = self._search(name)
        
        if ids is None:
            return False, None
        
        elif not ids['data']:
            # сохраняем пустой результат на 3-е суток
            return 259200, None
        
        else:
            return timeout, ids['data'][0]