def search(self, kubo_item, search_params): self.url = 'https://www.google.com.tw/search?q=%s %s %s' % ( kubo_item['title'], kubo_item['year'] if kubo_item['year'] else '', kubo_item['stars'][0] if len(kubo_item['stars']) > 0 else '' ) headers = None if search_params.user_agent is not None: headers = {'User-Agent': search_params.user_agent} proxies = None if search_params.proxy is not None: proxies = {search_params.proxy['schema']: search_params.proxy['url']} response = requests.get(self.url, headers=headers, proxies=proxies, timeout=search_params.timeout) # set correct encoding fix_response_encoding(response) if response.status_code == 200: pattern = '<cite>movie.douban.com/subject/(\d+)/</cite>' results = re.findall(pattern, response.text) if len(results) > 0: return results[0] else: logging.warning( 'url: %s, http status: %d, proxy: %s' % (self.url, response.status_code, search_params.proxy['url']) )
def search(self, kubo_item, search_params): self.url = 'https://api.douban.com/v2/movie/search?q=%s %s %s' % ( kubo_item['title'], kubo_item['year'] if kubo_item['year'] else '', kubo_item['stars'][0] if len(kubo_item['stars']) > 0 else '' ) headers = None if search_params.user_agent is not None: headers = {'User-Agent': search_params.user_agent} proxies = None if search_params.proxy is not None: proxies = {search_params.proxy['schema']: search_params.proxy['url']} response = requests.get(self.url, headers=headers, proxies=proxies, timeout=search_params.timeout) # set correct encoding fix_response_encoding(response) if response.status_code == 200: result = json.loads(response.text) return self._get_vid(result, kubo_item) else: logging.warning( 'url: %s, http status: %d, proxy: %s' % (self.url, response.status_code, search_params.proxy['url']) )
def get_playdata(self, url): """ 取得酷播的藏在javascript裡的影片列表 :param url: :return: [{ "servername": null, "playname": "xigua", "playurls": [ [ "殭国语01.mkv", "ftp://a.gbl.114s.com:20320/4422/殭国语01.mkv", "/vod-play-id-82698-sid-0-pid-1.html" ], [ "殭国语02.mkv", "ftp://a.gbl.114s.com:20320/1525/殭国语02.mkv", "/vod-play-id-82698-sid-0-pid-2.html" ] ] }, { "servername": null, "playname": "bj58", "playurls": [ [ "殭國語01", "fun58_756xPfUP%2FBnVcLgrsGABxA9JAVH7vs1Is2QVrFm2Xqo%3D", "/vod-play-id-82698-sid-2-pid-1.html" ], [ "殭國語02(影像有問題介意者勿點)", "fun58_k%2BC2MosJD0Zd1HxdnGkfy7PIhyuq2m9YY2OJWloA0BA%3D", "/vod-play-id-82698-sid-2-pid-2.html" ] ] }] """ playdata = [] response = requests.get(url, timeout=30) # set correct encoding fix_response_encoding(response) if response.status_code != 200: logging.warning('url: %s, http status: %d' % (url, response.status_code)) else: playdata = self.jsexecutor.get_playdata(response.text)['Data'] return playdata
def get_performers(self, performers): result = re.search(".*(?:ajax-performer/=/data=([^']*))", self.html) if result: ajax_url = 'http://www.dmm.co.jp/digital/videoa/-/detail/ajax-performer/=/data=%s' % result.group(1) try: response = requests.get(ajax_url, timeout=30) # set correct encoding fix_response_encoding(response) if response.status_code == 200: sel = Selector(text=response.text) return sel.xpath("//a/text()").extract() else: logging.warning('url: %s, status: %d' % (ajax_url, response.status_code)) except requests.exceptions.Timeout: logging.warning('url: %s timeout' % ajax_url) return performers
def get_albuminfos(self, data): """ 取得album資訊 :param data: :return: """ if len(data['items']) == 0: return acodes = ','.join([self.get_id(item['playUrl']) for item in data['items']]) url = 'http://www.tudou.com/crp/getAlbumInfos.action?acodes=' + acodes response = requests.get(url, timeout=30) # set correct encoding fix_response_encoding(response) albums = json.loads(response.text) return albums
def get_albuminfos(self, data): """ 取得album資訊 :param data: :return: """ if len(data['items']) == 0: return acodes = ','.join( [self.get_id(item['playUrl']) for item in data['items']]) url = 'http://www.tudou.com/crp/getAlbumInfos.action?acodes=' + acodes response = requests.get(url, timeout=30) # set correct encoding fix_response_encoding(response) albums = json.loads(response.text) return albums
def get_videos(self, term): """ 取回利用search_term在youtube搜尋的第一頁結果 :param term: :return: """ # 失戀無罪 A-Lin => 失戀無罪 A-Lin ktv query_term = '%s ktv' % term # 失戀無罪+A-Lin+ktv query_term = query_term.replace(' ', '+') queryurl = "https://www.youtube.com/results?lclk=video&filters=video&search_query=" + query_term response = requests.get(queryurl, timeout=30) # set correct encoding fix_response_encoding(response) if response.ok: body = lxml.html.fromstring(response.text) videos = body.xpath("//div[contains(@class, 'yt-lockup-video')]") return videos
def get_performers(self, performers): result = re.search(".*(?:ajax-performer/=/data=([^']*))", self.html) if result: ajax_url = 'http://www.dmm.co.jp/digital/videoa/-/detail/ajax-performer/=/data=%s' % result.group( 1) try: response = requests.get(ajax_url, timeout=30) # set correct encoding fix_response_encoding(response) if response.status_code == 200: sel = Selector(text=response.text) return sel.xpath("//a/text()").extract() else: logging.warning('url: %s, status: %d' % (ajax_url, response.status_code)) except requests.exceptions.Timeout: logging.warning('url: %s timeout' % ajax_url) return performers
def get_posterurl_by_width(self, imdbpymovie, width): if self._tmdbresult is None: # fixme: 如果要全部都跑的話,記得修改條件 # 符合2個條件就執行 matchcount = 0 matchcount = matchcount + 1 if self['directors'] != [] else matchcount matchcount = matchcount + 1 if self['stars'] != [] else matchcount matchcount = matchcount + 1 if self['stars'] != [] else matchcount if matchcount > 1: url = 'http://api.themoviedb.org/3/find/%s?external_source=imdb_id&api_key=%s' % (self._imdbid, self._tmdbapikey) response = requests.get(url, timeout=30) # set correct encoding fix_response_encoding(response) if response.status_code == 200: self._tmdbresult = json.loads(response.text) else: time.sleep(5) logger.warning('url: %s, status: %d' % (url, response.status_code)) if self._tmdbresult is not None: if len(self._tmdbresult['movie_results']) == 1 and self._tmdbresult['movie_results'][0]['poster_path']: return 'http://image.tmdb.org/t/p/w%d%s' % (width, self._tmdbresult['movie_results'][0]['poster_path'])