Example #1
0
	def fileManager(self, href, folder):
		def recur(data, result):
			rez =[]
			li_ = data.findAll('li', recursive=False)
			for l in li_:
				li = l
				if li.find('ul'):li.find('ul').extract()

				if not li.find('a'):continue
				#-------------------------------------------------------------

				rel = re.compile("parent_id:\s?'?([\d]+)").findall(li.a['rel'])
				info = {}
				if rel:#Папки
					info['parent']= rel = rel[0]

					try:
						info['title'] = li.a.b.string
					except:
						title_ = li.a.contents
						if len(title_)>1:
							info['title'] = title_[0]+BeautifulSoup(str(title_[1])).find('font').string
						else:
							info['title'] = title_[0]

					quality_list= re.compile("quality_list:\s?'([^']+)").findall(li.a['rel'])
					if quality_list:
						info['quality_list'] = quality_list[0]
					else:
						info['quality_list'] = None

					lang = li.a['class']
					lang = re.compile('\sm\-(\w+)\s').findall(lang)
					if lang:
						info['lang']=lang[0].upper()+' '

					info['next'] = 'folder' if (li.find('a', 'folder-filelist'))== None else 'filelist'
					info['folder'] = 'folder'
				else:#Файлы
					try:
						info['qual'] = li.find('span', 'video-qulaity').string
					except:
						pass
					info['parent'] =  '' #re.compile('(series-.*)').findall(li['class'])[0]
					info['folder'] = 'file'

					title = li.find('span', 'b-file-new__material-filename-text')
					if title == None:
						title = li.find('span', 'b-file-new__link-material-filename-text')
					info['title']=title.string

					a= li.find('a', 'b-file-new__link-material')
					info['href'] =''
					if a:
						info['href']= a['href']

					a= li.find('a', 'b-file-new__link-material-download')
					info['only_download'] = 'only-download' in a['class']
					info['href_dl'] = a['href']
					info['size'] = a.span.string

				#---------------------------------------------------------------------
				rez.append(info)
				ul = li.find('ul', recursive=False)
				if ul:
					js = {info['parent']:recur(ul, result)}
					result.append(js)
			return rez

		url=self.site_url+href+'?ajax&folder='
		cache = CacheToDb('fileManager.db', 0.1)
		result = cache.get(url+folder, '')
		if not result:
			Data =Get_url(url+folder, Cookie=True)
			Soup = BeautifulSoup(Data)
			isBlocked = Soup.find('div', id='file-block-text')!=None
			ul = Soup.find('ul', recursive=False)
			result =[]
			if folder =='0':
				js = {folder:[recur(ul,result),isBlocked]}
			else:
				js = {folder:recur(ul,result)}
			result.append(js)
			for r in result:
				cache.get(url+r.keys()[0], lambda x:[30*60,x], r[r.keys()[0]])
			result = r[r.keys()[0]]

		if folder == '0':
			self.isBlocked = result[1]
			return result[0]
		return  result
Example #2
0
    def fileManager(self, href, folder):
        def recur(data, result):
            rez = []
            li_ = data.findAll('li', recursive=False)
            for l in li_:
                li = l
                if li.find('ul'): li.find('ul').extract()

                if not li.find('a'): continue
                #-------------------------------------------------------------

                rel = re.compile("parent_id:\s?'?([\d]+)").findall(li.a['rel'])
                info = {}
                if rel:  #Папки
                    info['parent'] = rel = rel[0]

                    try:
                        info['title'] = li.a.b.string
                    except:
                        title_ = li.a.contents
                        if len(title_) > 1:
                            info['title'] = title_[0] + BeautifulSoup(
                                str(title_[1])).find('font').string
                        else:
                            info['title'] = title_[0]

                    quality_list = re.compile(
                        "quality_list:\s?'([^']+)").findall(li.a['rel'])
                    if quality_list:
                        info['quality_list'] = quality_list[0]
                    else:
                        info['quality_list'] = None

                    lang = li.a['class']
                    lang = re.compile('\sm\-(\w+)\s').findall(lang)
                    if lang:
                        info['lang'] = lang[0].upper() + ' '

                    info['next'] = 'folder' if (li.find(
                        'a', 'folder-filelist')) == None else 'filelist'
                    info['folder'] = 'folder'
                else:  #Файлы
                    try:
                        info['qual'] = li.find('span', 'video-qulaity').string
                    except:
                        pass
                    info[
                        'parent'] = ''  #re.compile('(series-.*)').findall(li['class'])[0]
                    info['folder'] = 'file'

                    title = li.find('span',
                                    'b-file-new__material-filename-text')
                    if title == None:
                        title = li.find(
                            'span', 'b-file-new__link-material-filename-text')
                    info['title'] = title.string

                    a = li.find('a', 'b-file-new__link-material')
                    info['href'] = ''
                    if a:
                        info['href'] = a['href']

                    a = li.find('a', 'b-file-new__link-material-download')
                    info['only_download'] = 'only-download' in a['class']
                    info['href_dl'] = a['href']
                    info['size'] = a.span.string

                #---------------------------------------------------------------------
                rez.append(info)
                ul = li.find('ul', recursive=False)
                if ul:
                    js = {info['parent']: recur(ul, result)}
                    result.append(js)
            return rez

        url = self.site_url + href + '?ajax&folder='
        cache = CacheToDb('fileManager.db', 0.1)
        result = cache.get(url + folder, '')
        if not result:
            Data = Get_url(url + folder, Cookie=True)
            Soup = BeautifulSoup(Data)
            isBlocked = Soup.find('div', id='file-block-text') != None
            ul = Soup.find('ul', recursive=False)
            result = []
            if folder == '0':
                js = {folder: [recur(ul, result), isBlocked]}
            else:
                js = {folder: recur(ul, result)}
            result.append(js)
            for r in result:
                cache.get(url + r.keys()[0], lambda x: [30 * 60, x],
                          r[r.keys()[0]])
            result = r[r.keys()[0]]

        if folder == '0':
            self.isBlocked = result[1]
            return result[0]
        return result
Example #3
0
	def __init__(self):
		self.cache = CacheToDb('kinopoisk.db', 0.1)
Example #4
0
class kinopoisk:
	def __init__(self):
		self.cache = CacheToDb('kinopoisk.db', 0.1)

	def GetLocalInfo(self, cxzid):
		res = self.cache.get('cxz:'+cxzid, None)
		if not res:
			return None
		info = None
		if res['kpid']:
			info  = self.cache.get('kp:'+res['kpid'], None, res['kpid'])
		return {'cxz':res, 'kinopoisk':info}

	def GetInfo(self, cxzid):
		res = self.cache.get('cxz:'+cxzid, self._cxz, cxzid)
		if not res:
			return None
		info = None
		if res['kpid']:
			info  = self.cache.get('kp:'+res['kpid'], self._getinfo, res['kpid'])
		return {'cxz':res, 'kinopoisk':info}

	def _cxz(self, href):
		cxz_data = cxz()
		cxz_data.contententPage(href)
		res = self._search(str(cxz_data.contententPageInfo['title_origin']), str(cxz_data.contententPageInfo['year']))

		cxz_data.contententPage(href)
		cxzInfo = cxz_data.contententPageInfo
		cxzInfo['kpid'] = res
		#TODO Время хранения информации для cxz
		return True, cxzInfo

	def _search(self, title, year='', director=''):
		p = '1.9.1'
		g = {'callback':'jQuery'+(p+'{:.17}'.format(random.random())).replace('.', '')+'_'+str(time.time()*101),
			 'q':title,
			 'query_id':random.random(),
			 'type':'jsonp',
			 'topsuggest':'true'
			}
		url = 'http://www.kinopoisk.ru/handler_search.php?%s'%urllib.urlencode(g)
		headers = {'Accept':'*/*',
					'Accept-Encoding':'gzip, deflate, sdch',
					'Accept-Language':'ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4',
					'Connection':'keep-alive',
					'Referer':'http://www.kinopoisk.ru/',
					'User-Agent':User_Agent
				  }
		Data = Get_url(url, headers=headers)
		js=json.loads(Data.replace(g['callback'],'')[1:-1])
		del(js['query_id'])
		if not js: return None
		for j in sorted(js):
			jsyear = re.compile('(\d{4})').findall(js[j]['year'])
			if jsyear:
				if jsyear[0]==year:
					id = js[j]['id']
					return id
		return js['0']['id']

	def GetRating(self,id):

		self.GetInfo(id)

		url = 'http://rating.kinopoisk.ru/'+str(id)+'.xml'
		Data = Get_url(url)
		if Data:
			xml = BeautifulStoneSoup(Data)
			try:
				kp = xml.find('kp_rating')
				r_kp = kp.string.encode('UTF-8')
				v_kp = kp['num_vote'].encode('UTF-8')
			except:
				r_kp = '-'
				v_kp = '-'
			try:
				imdb = xml.find('imdb_rating')
				r_imdb =  imdb.string.encode('UTF-8')
				v_imdb = imdb['num_vote'].encode('UTF-8')
			except:
				r_imdb =  '-'
				v_imdb =  '-'
			return r_kp, v_kp, r_imdb, v_imdb
		else:
			return '-', '-', '-', '-'


	def _getinfo(self,id):
		headers = {'User-Agent': User_Agent,
				   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
				   'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
				   'Cache-Control': 'no-cache',
				   'Referer': 'http://www.kinopoisk.ru/level/7/'
				  }
		url = 'http://www.kinopoisk.ru/film/' + id + '/'
		Data = Get_url(url,headers=headers).decode('windows-1251')
		if not Data: return None
		Data = re.sub('<br[^>]*','\n',Data)
		Soup = BeautifulSoup(Data, convertEntities=BeautifulSoup.HTML_ENTITIES)

		FilmInfo = Soup.find('div', id='viewFilmInfoWrapper')
		if not FilmInfo : return None

		Info = {}
		Info['title']         = str(FilmInfo.find('div', id='headerFilm').h1.string)
		Info['originaltitle'] = str(FilmInfo.find('span', itemprop='alternativeHeadline').string)

		plot =Soup.find('div', itemprop='description')
		Info['plot'] =''
		if plot:
			Info['plot'] = str(plot.string)

		info_film   = FilmInfo.find('table', 'info').findAll('tr')

		Info['cast'] =[]
		try:
			info_actors = FilmInfo.find('div', id='actorList').ul.findAll('li')
		except:
			pass
		else:
			Info['cast'] = [str(x.find('a').string) for x in info_actors if not '...' in str(x)]

		lst={}
		for i in info_film:
			td = i.findAll('td')
			lst[td[0].string] = td[1]

		tags ={'year':u'год','country':u'страна','tagline':u'слоган','director':u'режиссер','writer':u'сценарий','genre':u'жанр',
			   'runtime':u'время'}

		for tag in tags:
			value = ', '.join([str(x.string) for x in lst[tags[tag]].findAll('a')])
			if not value:
				value= ', '.join([str(x.string) for x in lst[tags[tag]]])
			Info[tag] = value.replace(', ...','').replace(', слова', '')

		return True, Info
Example #5
0
 def __init__(self):
     self.cache = CacheToDb('kinopoisk.db', 0.1)
Example #6
0
class kinopoisk:
    def __init__(self):
        self.cache = CacheToDb('kinopoisk.db', 0.1)

    def GetLocalInfo(self, cxzid):
        res = self.cache.get('cxz:' + cxzid, None)
        if not res:
            return None
        info = None
        if res['kpid']:
            info = self.cache.get('kp:' + res['kpid'], None, res['kpid'])
        return {'cxz': res, 'kinopoisk': info}

    def GetInfo(self, cxzid):
        res = self.cache.get('cxz:' + cxzid, self._cxz, cxzid)
        if not res:
            return None
        info = None
        if res['kpid']:
            info = self.cache.get('kp:' + res['kpid'], self._getinfo,
                                  res['kpid'])
        return {'cxz': res, 'kinopoisk': info}

    def _cxz(self, href):
        cxz_data = cxz()
        cxz_data.contententPage(href)
        res = self._search(str(cxz_data.contententPageInfo['title_origin']),
                           str(cxz_data.contententPageInfo['year']))

        cxz_data.contententPage(href)
        cxzInfo = cxz_data.contententPageInfo
        cxzInfo['kpid'] = res
        #TODO Время хранения информации для cxz
        return True, cxzInfo

    def _search(self, title, year='', director=''):
        p = '1.9.1'
        g = {
            'callback':
            'jQuery' +
            (p + '{:.17}'.format(random.random())).replace('.', '') + '_' +
            str(time.time() * 101),
            'q':
            title,
            'query_id':
            random.random(),
            'type':
            'jsonp',
            'topsuggest':
            'true'
        }
        url = 'http://www.kinopoisk.ru/handler_search.php?%s' % urllib.urlencode(
            g)
        headers = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip, deflate, sdch',
            'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4',
            'Connection': 'keep-alive',
            'Referer': 'http://www.kinopoisk.ru/',
            'User-Agent': User_Agent
        }
        Data = Get_url(url, headers=headers)
        js = json.loads(Data.replace(g['callback'], '')[1:-1])
        del (js['query_id'])
        if not js: return None
        for j in sorted(js):
            jsyear = re.compile('(\d{4})').findall(js[j]['year'])
            if jsyear:
                if jsyear[0] == year:
                    id = js[j]['id']
                    return id
        return js['0']['id']

    def GetRating(self, id):

        self.GetInfo(id)

        url = 'http://rating.kinopoisk.ru/' + str(id) + '.xml'
        Data = Get_url(url)
        if Data:
            xml = BeautifulStoneSoup(Data)
            try:
                kp = xml.find('kp_rating')
                r_kp = kp.string.encode('UTF-8')
                v_kp = kp['num_vote'].encode('UTF-8')
            except:
                r_kp = '-'
                v_kp = '-'
            try:
                imdb = xml.find('imdb_rating')
                r_imdb = imdb.string.encode('UTF-8')
                v_imdb = imdb['num_vote'].encode('UTF-8')
            except:
                r_imdb = '-'
                v_imdb = '-'
            return r_kp, v_kp, r_imdb, v_imdb
        else:
            return '-', '-', '-', '-'

    def _getinfo(self, id):
        headers = {
            'User-Agent': User_Agent,
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }
        url = 'http://www.kinopoisk.ru/film/' + id + '/'
        Data = Get_url(url, headers=headers).decode('windows-1251')
        if not Data: return None
        Data = re.sub('<br[^>]*', '\n', Data)
        Soup = BeautifulSoup(Data, convertEntities=BeautifulSoup.HTML_ENTITIES)

        FilmInfo = Soup.find('div', id='viewFilmInfoWrapper')
        if not FilmInfo: return None

        Info = {}
        Info['title'] = str(FilmInfo.find('div', id='headerFilm').h1.string)
        Info['originaltitle'] = str(
            FilmInfo.find('span', itemprop='alternativeHeadline').string)

        plot = Soup.find('div', itemprop='description')
        Info['plot'] = ''
        if plot:
            Info['plot'] = str(plot.string)

        info_film = FilmInfo.find('table', 'info').findAll('tr')

        Info['cast'] = []
        try:
            info_actors = FilmInfo.find('div', id='actorList').ul.findAll('li')
        except:
            pass
        else:
            Info['cast'] = [
                str(x.find('a').string) for x in info_actors
                if not '...' in str(x)
            ]

        lst = {}
        for i in info_film:
            td = i.findAll('td')
            lst[td[0].string] = td[1]

        tags = {
            'year': u'год',
            'country': u'страна',
            'tagline': u'слоган',
            'director': u'режиссер',
            'writer': u'сценарий',
            'genre': u'жанр',
            'runtime': u'время'
        }

        for tag in tags:
            value = ', '.join(
                [str(x.string) for x in lst[tags[tag]].findAll('a')])
            if not value:
                value = ', '.join([str(x.string) for x in lst[tags[tag]]])
            Info[tag] = value.replace(', ...', '').replace(', слова', '')

        return True, Info