def fileManager(self, href, folder): def recur(data, result): rez =[] li_ = data.findAll('li', recursive=False) for l in li_: li = l if li.find('ul'):li.find('ul').extract() if not li.find('a'):continue #------------------------------------------------------------- rel = re.compile("parent_id:\s?'?([\d]+)").findall(li.a['rel']) info = {} if rel:#Папки info['parent']= rel = rel[0] try: info['title'] = li.a.b.string except: title_ = li.a.contents if len(title_)>1: info['title'] = title_[0]+BeautifulSoup(str(title_[1])).find('font').string else: info['title'] = title_[0] quality_list= re.compile("quality_list:\s?'([^']+)").findall(li.a['rel']) if quality_list: info['quality_list'] = quality_list[0] else: info['quality_list'] = None lang = li.a['class'] lang = re.compile('\sm\-(\w+)\s').findall(lang) if lang: info['lang']=lang[0].upper()+' ' info['next'] = 'folder' if (li.find('a', 'folder-filelist'))== None else 'filelist' info['folder'] = 'folder' else:#Файлы try: info['qual'] = li.find('span', 'video-qulaity').string except: pass info['parent'] = '' #re.compile('(series-.*)').findall(li['class'])[0] info['folder'] = 'file' title = li.find('span', 'b-file-new__material-filename-text') if title == None: title = li.find('span', 'b-file-new__link-material-filename-text') info['title']=title.string a= li.find('a', 'b-file-new__link-material') info['href'] ='' if a: info['href']= a['href'] a= li.find('a', 'b-file-new__link-material-download') info['only_download'] = 'only-download' in a['class'] info['href_dl'] = a['href'] info['size'] = a.span.string #--------------------------------------------------------------------- rez.append(info) ul = li.find('ul', recursive=False) if ul: js = {info['parent']:recur(ul, result)} result.append(js) return rez url=self.site_url+href+'?ajax&folder=' cache = CacheToDb('fileManager.db', 0.1) result = cache.get(url+folder, '') if not result: Data =Get_url(url+folder, Cookie=True) Soup = BeautifulSoup(Data) isBlocked = Soup.find('div', id='file-block-text')!=None ul = Soup.find('ul', recursive=False) result =[] if folder =='0': js = {folder:[recur(ul,result),isBlocked]} else: js = {folder:recur(ul,result)} result.append(js) for r in result: cache.get(url+r.keys()[0], lambda x:[30*60,x], r[r.keys()[0]]) result = r[r.keys()[0]] if folder == '0': self.isBlocked = result[1] return result[0] return result
class kinopoisk: def __init__(self): self.cache = CacheToDb('kinopoisk.db', 0.1) def GetLocalInfo(self, cxzid): res = self.cache.get('cxz:'+cxzid, None) if not res: return None info = None if res['kpid']: info = self.cache.get('kp:'+res['kpid'], None, res['kpid']) return {'cxz':res, 'kinopoisk':info} def GetInfo(self, cxzid): res = self.cache.get('cxz:'+cxzid, self._cxz, cxzid) if not res: return None info = None if res['kpid']: info = self.cache.get('kp:'+res['kpid'], self._getinfo, res['kpid']) return {'cxz':res, 'kinopoisk':info} def _cxz(self, href): cxz_data = cxz() cxz_data.contententPage(href) res = self._search(str(cxz_data.contententPageInfo['title_origin']), str(cxz_data.contententPageInfo['year'])) cxz_data.contententPage(href) cxzInfo = cxz_data.contententPageInfo cxzInfo['kpid'] = res #TODO Время хранения информации для cxz return True, cxzInfo def _search(self, title, year='', director=''): p = '1.9.1' g = {'callback':'jQuery'+(p+'{:.17}'.format(random.random())).replace('.', '')+'_'+str(time.time()*101), 'q':title, 'query_id':random.random(), 'type':'jsonp', 'topsuggest':'true' } url = 'http://www.kinopoisk.ru/handler_search.php?%s'%urllib.urlencode(g) headers = {'Accept':'*/*', 'Accept-Encoding':'gzip, deflate, sdch', 'Accept-Language':'ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4', 'Connection':'keep-alive', 'Referer':'http://www.kinopoisk.ru/', 'User-Agent':User_Agent } Data = Get_url(url, headers=headers) js=json.loads(Data.replace(g['callback'],'')[1:-1]) del(js['query_id']) if not js: return None for j in sorted(js): jsyear = re.compile('(\d{4})').findall(js[j]['year']) if jsyear: if jsyear[0]==year: id = js[j]['id'] return id return js['0']['id'] def GetRating(self,id): self.GetInfo(id) url = 'http://rating.kinopoisk.ru/'+str(id)+'.xml' Data = Get_url(url) if Data: xml = BeautifulStoneSoup(Data) try: kp = xml.find('kp_rating') r_kp = kp.string.encode('UTF-8') v_kp = kp['num_vote'].encode('UTF-8') except: r_kp = '-' v_kp = '-' try: imdb = xml.find('imdb_rating') r_imdb = imdb.string.encode('UTF-8') v_imdb = imdb['num_vote'].encode('UTF-8') except: r_imdb = '-' v_imdb = '-' return r_kp, v_kp, r_imdb, v_imdb else: return '-', '-', '-', '-' def _getinfo(self,id): headers = {'User-Agent': User_Agent, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3', 'Cache-Control': 'no-cache', 'Referer': 'http://www.kinopoisk.ru/level/7/' } url = 'http://www.kinopoisk.ru/film/' + id + '/' Data = Get_url(url,headers=headers).decode('windows-1251') if not Data: return None Data = re.sub('<br[^>]*','\n',Data) Soup = BeautifulSoup(Data, convertEntities=BeautifulSoup.HTML_ENTITIES) FilmInfo = Soup.find('div', id='viewFilmInfoWrapper') if not FilmInfo : return None Info = {} Info['title'] = str(FilmInfo.find('div', id='headerFilm').h1.string) Info['originaltitle'] = str(FilmInfo.find('span', itemprop='alternativeHeadline').string) plot =Soup.find('div', itemprop='description') Info['plot'] ='' if plot: Info['plot'] = str(plot.string) info_film = FilmInfo.find('table', 'info').findAll('tr') Info['cast'] =[] try: info_actors = FilmInfo.find('div', id='actorList').ul.findAll('li') except: pass else: Info['cast'] = [str(x.find('a').string) for x in info_actors if not '...' in str(x)] lst={} for i in info_film: td = i.findAll('td') lst[td[0].string] = td[1] tags ={'year':u'год','country':u'страна','tagline':u'слоган','director':u'режиссер','writer':u'сценарий','genre':u'жанр', 'runtime':u'время'} for tag in tags: value = ', '.join([str(x.string) for x in lst[tags[tag]].findAll('a')]) if not value: value= ', '.join([str(x.string) for x in lst[tags[tag]]]) Info[tag] = value.replace(', ...','').replace(', слова', '') return True, Info
def fileManager(self, href, folder): def recur(data, result): rez = [] li_ = data.findAll('li', recursive=False) for l in li_: li = l if li.find('ul'): li.find('ul').extract() if not li.find('a'): continue #------------------------------------------------------------- rel = re.compile("parent_id:\s?'?([\d]+)").findall(li.a['rel']) info = {} if rel: #Папки info['parent'] = rel = rel[0] try: info['title'] = li.a.b.string except: title_ = li.a.contents if len(title_) > 1: info['title'] = title_[0] + BeautifulSoup( str(title_[1])).find('font').string else: info['title'] = title_[0] quality_list = re.compile( "quality_list:\s?'([^']+)").findall(li.a['rel']) if quality_list: info['quality_list'] = quality_list[0] else: info['quality_list'] = None lang = li.a['class'] lang = re.compile('\sm\-(\w+)\s').findall(lang) if lang: info['lang'] = lang[0].upper() + ' ' info['next'] = 'folder' if (li.find( 'a', 'folder-filelist')) == None else 'filelist' info['folder'] = 'folder' else: #Файлы try: info['qual'] = li.find('span', 'video-qulaity').string except: pass info[ 'parent'] = '' #re.compile('(series-.*)').findall(li['class'])[0] info['folder'] = 'file' title = li.find('span', 'b-file-new__material-filename-text') if title == None: title = li.find( 'span', 'b-file-new__link-material-filename-text') info['title'] = title.string a = li.find('a', 'b-file-new__link-material') info['href'] = '' if a: info['href'] = a['href'] a = li.find('a', 'b-file-new__link-material-download') info['only_download'] = 'only-download' in a['class'] info['href_dl'] = a['href'] info['size'] = a.span.string #--------------------------------------------------------------------- rez.append(info) ul = li.find('ul', recursive=False) if ul: js = {info['parent']: recur(ul, result)} result.append(js) return rez url = self.site_url + href + '?ajax&folder=' cache = CacheToDb('fileManager.db', 0.1) result = cache.get(url + folder, '') if not result: Data = Get_url(url + folder, Cookie=True) Soup = BeautifulSoup(Data) isBlocked = Soup.find('div', id='file-block-text') != None ul = Soup.find('ul', recursive=False) result = [] if folder == '0': js = {folder: [recur(ul, result), isBlocked]} else: js = {folder: recur(ul, result)} result.append(js) for r in result: cache.get(url + r.keys()[0], lambda x: [30 * 60, x], r[r.keys()[0]]) result = r[r.keys()[0]] if folder == '0': self.isBlocked = result[1] return result[0] return result
class kinopoisk: def __init__(self): self.cache = CacheToDb('kinopoisk.db', 0.1) def GetLocalInfo(self, cxzid): res = self.cache.get('cxz:' + cxzid, None) if not res: return None info = None if res['kpid']: info = self.cache.get('kp:' + res['kpid'], None, res['kpid']) return {'cxz': res, 'kinopoisk': info} def GetInfo(self, cxzid): res = self.cache.get('cxz:' + cxzid, self._cxz, cxzid) if not res: return None info = None if res['kpid']: info = self.cache.get('kp:' + res['kpid'], self._getinfo, res['kpid']) return {'cxz': res, 'kinopoisk': info} def _cxz(self, href): cxz_data = cxz() cxz_data.contententPage(href) res = self._search(str(cxz_data.contententPageInfo['title_origin']), str(cxz_data.contententPageInfo['year'])) cxz_data.contententPage(href) cxzInfo = cxz_data.contententPageInfo cxzInfo['kpid'] = res #TODO Время хранения информации для cxz return True, cxzInfo def _search(self, title, year='', director=''): p = '1.9.1' g = { 'callback': 'jQuery' + (p + '{:.17}'.format(random.random())).replace('.', '') + '_' + str(time.time() * 101), 'q': title, 'query_id': random.random(), 'type': 'jsonp', 'topsuggest': 'true' } url = 'http://www.kinopoisk.ru/handler_search.php?%s' % urllib.urlencode( g) headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4', 'Connection': 'keep-alive', 'Referer': 'http://www.kinopoisk.ru/', 'User-Agent': User_Agent } Data = Get_url(url, headers=headers) js = json.loads(Data.replace(g['callback'], '')[1:-1]) del (js['query_id']) if not js: return None for j in sorted(js): jsyear = re.compile('(\d{4})').findall(js[j]['year']) if jsyear: if jsyear[0] == year: id = js[j]['id'] return id return js['0']['id'] def GetRating(self, id): self.GetInfo(id) url = 'http://rating.kinopoisk.ru/' + str(id) + '.xml' Data = Get_url(url) if Data: xml = BeautifulStoneSoup(Data) try: kp = xml.find('kp_rating') r_kp = kp.string.encode('UTF-8') v_kp = kp['num_vote'].encode('UTF-8') except: r_kp = '-' v_kp = '-' try: imdb = xml.find('imdb_rating') r_imdb = imdb.string.encode('UTF-8') v_imdb = imdb['num_vote'].encode('UTF-8') except: r_imdb = '-' v_imdb = '-' return r_kp, v_kp, r_imdb, v_imdb else: return '-', '-', '-', '-' def _getinfo(self, id): headers = { 'User-Agent': User_Agent, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3', 'Cache-Control': 'no-cache', 'Referer': 'http://www.kinopoisk.ru/level/7/' } url = 'http://www.kinopoisk.ru/film/' + id + '/' Data = Get_url(url, headers=headers).decode('windows-1251') if not Data: return None Data = re.sub('<br[^>]*', '\n', Data) Soup = BeautifulSoup(Data, convertEntities=BeautifulSoup.HTML_ENTITIES) FilmInfo = Soup.find('div', id='viewFilmInfoWrapper') if not FilmInfo: return None Info = {} Info['title'] = str(FilmInfo.find('div', id='headerFilm').h1.string) Info['originaltitle'] = str( FilmInfo.find('span', itemprop='alternativeHeadline').string) plot = Soup.find('div', itemprop='description') Info['plot'] = '' if plot: Info['plot'] = str(plot.string) info_film = FilmInfo.find('table', 'info').findAll('tr') Info['cast'] = [] try: info_actors = FilmInfo.find('div', id='actorList').ul.findAll('li') except: pass else: Info['cast'] = [ str(x.find('a').string) for x in info_actors if not '...' in str(x) ] lst = {} for i in info_film: td = i.findAll('td') lst[td[0].string] = td[1] tags = { 'year': u'год', 'country': u'страна', 'tagline': u'слоган', 'director': u'режиссер', 'writer': u'сценарий', 'genre': u'жанр', 'runtime': u'время' } for tag in tags: value = ', '.join( [str(x.string) for x in lst[tags[tag]].findAll('a')]) if not value: value = ', '.join([str(x.string) for x in lst[tags[tag]]]) Info[tag] = value.replace(', ...', '').replace(', слова', '') return True, Info