def search(self, entry, config): session = Session() entries = set() for search_string in entry.get('search_strings', [entry['title']]): #[entry['series_name']]:# search_string_normalized = normalize_unicode(clean_title(search_string)).encode('utf8') search_string_normalized = search_string_normalized.replace(' ','+') url = 'http://www.elitetorrent.net/busqueda/'+search_string_normalized log.debug('Fetching URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('a', 'nombre'): entry = Entry() entry['title'] = result['title'] entry['url'] = 'http://www.elitetorrent.net/get-torrent/'+result['href'].split('/')[2] log.debug('Adding entry `%s`: %s' % (entry['title'], entry['url'])) entries.add(entry) return entries
class T411RestClient(object): """A REST client for T411 API""" @staticmethod def template_url(url_scheme='http'): return url_scheme + '://' + T411API_DOMAIN_URL + '%s' @staticmethod def download_url(torrent_id, url_scheme='http'): return (T411RestClient.template_url(url_scheme) % T411API_DOWNLOAD_PATH) + str(torrent_id) def __init__(self, username=None, password=None, url_scheme='http'): self.credentials = {'username': username, 'password': password} self.api_token = None self.api_template_url = url_scheme + '://' + T411API_DOMAIN_URL + '%s' self.web_session = Session() def auth(self): """ Request server to obtain a api token. Obtained token will be set for future usage of the client instance :return: """ auth_url = self.api_template_url % T411API_AUTH_PATH response = self.web_session.post(auth_url, self.credentials) json_response = response.json() error_description = json_response.get('error', None) if error_description: log.error('%d - %s', json_response.get('code'), error_description) else: self.set_api_token(json_response.get('token')) def set_api_token(self, api_token): """ Set the client for use an api token. :param api_token: :return: """ self.api_token = api_token self.web_session.headers.update({'Authorization': self.api_token}) def is_authenticated(self): """ :return: True if an api token is set. Note that the client doesn't check if the token is valid (expired or wrong). """ return self.api_token is not None @staticmethod def raise_on_fail_response(json_response): """ This method throw an Exception if server return a error message :return: """ if json_response is None: pass error_name = json_response.get('error', None) error_code = json_response.get('code', None) if error_name is not None: raise ApiError(error_code, error_name) def get_json(self, path, params=None): """ Common method for requesting JSON response :param path: :return: """ url = self.api_template_url % path request = self.web_session.get(url, params=params) try: result = request.json() except ValueError: log.debug("Response from %s was not JSON encoded. Attempting deep inspection...", path) try: last_line = request.text.splitlines()[-1] result = json.loads(last_line) except (ValueError, IndexError): log.warning("Server response doesn't contains any JSON encoded response.") raise T411RestClient.raise_on_fail_response(result) return result @auth_required def retrieve_category_tree(self): """ Request T411 API for retrieving categories and them subcategories :return**kwargs: """ return self.get_json(T411API_CATEGORY_TREE_PATH) @auth_required def retrieve_terms_tree(self): """ Request T411 API for retrieving term types and terms :return **kwargs: """ return self.get_json(T411API_TERMS_PATH) @auth_required def search(self, query): """ Search torrent :param query: dict :param query['category_id']: Int optional :param query['result_per_page']: Int optional :param query['page_index']: Int optional :param query['terms']: (Term type id, Term id,) :return dict """ url = T411API_SEARCH_PATH if query.get('expression') is not None: url += query['expression'] url_params = {} if query.get('category_id') is not None: # using cat or cid will do the same result # but using cid without query expression will not broke # results url_params['cid'] = query['category_id'] if query.get('result_per_page') is not None: url_params['limit'] = query['result_per_page'] if query.get('page_index') is not None: url_params['offset'] = query['page_index'] if query.get('terms') is not None: for (term_type_id, term_id) in query['terms']: term_type_key_param = 'term[%s][]' % term_type_id if url_params.get(term_type_key_param) is None: url_params[term_type_key_param] = [] url_params[term_type_key_param].append(term_id) return self.get_json(url, params=url_params) @auth_required def details(self, torrent_id): url = T411API_DETAILS_PATH + str(torrent_id) return self.get_json(url)
class ImdbEntrySet(MutableSet): schema = { 'type': 'object', 'properties': { 'login': { 'type': 'string' }, 'password': { 'type': 'string' }, 'list': { 'type': 'string' }, 'force_language': { 'type': 'string', 'default': 'en-us' }, }, 'additionalProperties': False, 'required': ['login', 'password', 'list'], } def __init__(self, config): self.config = config self._session = RequestSession() self._session.add_domain_limiter(TimedLimiter('imdb.com', '5 seconds')) self._session.headers.update( {'Accept-Language': config.get('force_language', 'en-us')}) self.user_id = None self.list_id = None self.cookies = None self.hidden_value = None self._items = None self._authenticated = False @property def session(self): if not self._authenticated: self.authenticate() return self._session def get_user_id_and_hidden_value(self, cookies=None): try: if cookies: self._session.cookies = cookiejar_from_dict(cookies) # We need to allow for redirects here as it performs 1-2 redirects before reaching the real profile url response = self._session.get('https://www.imdb.com/profile', allow_redirects=True) except RequestException as e: raise PluginError(str(e)) user_id_match = re.search(r'ur\d+(?!\d)', response.url) if user_id_match: # extract the hidden form value that we need to do post requests later on try: soup = get_soup(response.text) self.hidden_value = soup.find('input', attrs={'id': '49e6c'})['value'] except Exception as e: log.warning( 'Unable to locate the hidden form value ' '49e6c' '. Without it, you might not be able to ' 'add or remove items. %s', e, ) return user_id_match.group() if user_id_match else None def authenticate(self): """Authenticates a session with IMDB, and grabs any IDs needed for getting/modifying list.""" cached_credentials = False with Session() as session: user = (session.query(IMDBListUser).filter( IMDBListUser.user_name == self.config.get( 'login')).one_or_none()) if user and user.cookies and user.user_id: log.debug('login credentials found in cache, testing') self.user_id = user.user_id if not self.get_user_id_and_hidden_value(cookies=user.cookies): log.debug('cache credentials expired') user.cookies = None self._session.cookies.clear() else: self.cookies = user.cookies cached_credentials = True if not cached_credentials: log.debug( 'user credentials not found in cache or outdated, fetching from IMDB' ) url_credentials = ( 'https://www.imdb.com/ap/signin?openid.return_to=https%3A%2F%2Fwww.imdb.com%2Fap-signin-' 'handler&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&' 'openid.assoc_handle=imdb_mobile_us&openid.mode=checkid_setup&openid.claimed_id=http%3A%' '2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.ns=http%3A%2F%2Fspecs.ope' 'nid.net%2Fauth%2F2.0') try: # we need to get some cookies first self._session.get('https://www.imdb.com') r = self._session.get(url_credentials) except RequestException as e: raise PluginError(e.args[0]) soup = get_soup(r.content) form = soup.find('form', attrs={'name': 'signIn'}) inputs = form.select('input') data = dict((i['name'], i.get('value')) for i in inputs if i.get('name')) data['email'] = self.config['login'] data['password'] = self.config['password'] action = form.get('action') log.debug('email=%s, password=%s', data['email'], data['password']) self._session.headers.update({'Referer': url_credentials}) self._session.post(action, data=data) self._session.headers.update( {'Referer': 'https://www.imdb.com/'}) self.user_id = self.get_user_id_and_hidden_value() if not self.user_id: raise plugin.PluginError( 'Login to IMDB failed. Check your credentials.') self.cookies = self._session.cookies.get_dict( domain='.imdb.com') # Get list ID if user: for list in user.lists: if self.config['list'] == list.list_name: log.debug( 'found list ID %s matching list name %s in cache', list.list_id, list.list_name, ) self.list_id = list.list_id if not self.list_id: log.debug( 'could not find list ID in cache, fetching from IMDB') if self.config['list'] == 'watchlist': data = { 'consts[]': 'tt0133093', 'tracking_tag': 'watchlistRibbon' } wl_data = self._session.post( 'https://www.imdb.com/list/_ajax/watchlist_has', data=data, cookies=self.cookies, ).json() try: self.list_id = wl_data['list_id'] except KeyError: raise PluginError( 'No list ID could be received. Please initialize list by ' 'manually adding an item to it and try again') elif self.config['list'] in IMMUTABLE_LISTS or self.config[ 'list'].startswith('ls'): self.list_id = self.config['list'] else: data = {'tconst': 'tt0133093'} list_data = self._session.post( 'https://www.imdb.com/list/_ajax/wlb_dropdown', data=data, cookies=self.cookies, ).json() for li in list_data['items']: if li['wlb_text'] == self.config['list']: self.list_id = li['data_list_id'] break else: raise plugin.PluginError('Could not find list %s' % self.config['list']) user = IMDBListUser(self.config['login'], self.user_id, self.cookies) list = IMDBListList(self.list_id, self.config['list'], self.user_id) user.lists.append(list) session.merge(user) self._authenticated = True def invalidate_cache(self): self._items = None @property def items(self): if self._items is None: log.debug('fetching items from IMDB') try: r = self.session.get( 'https://www.imdb.com/list/export?list_id=%s&author_id=%s' % (self.list_id, self.user_id), cookies=self.cookies, ) lines = list(r.iter_lines(decode_unicode=True)) except RequestException as e: raise PluginError(e.args[0]) # Normalize headers to lowercase lines[0] = lines[0].lower() self._items = [] for row in csv.DictReader(lines): log.debug('parsing line from csv: %s', row) try: item_type = row['title type'].lower() name = row['title'] year = int(row['year']) if row['year'] != '????' else None created = (datetime.strptime(row['created'], '%Y-%m-%d') if row.get('created') else None) modified = (datetime.strptime(row['modified'], '%Y-%m-%d') if row.get('modified') else None) entry = Entry({ 'title': '%s (%s)' % (name, year) if year != '????' else name, 'url': row['url'], 'imdb_id': row['const'], 'imdb_url': row['url'], 'imdb_list_position': int(row['position']) if 'position' in row else None, 'imdb_list_created': created, 'imdb_list_modified': modified, 'imdb_list_description': row.get('description'), 'imdb_name': name, 'imdb_year': year, 'imdb_user_score': float(row['imdb rating']) if row['imdb rating'] else None, 'imdb_votes': int(row['num votes']) if row['num votes'] else None, 'imdb_genres': [genre.strip() for genre in row['genres'].split(',')], }) except ValueError as e: log.debug( 'no movie row detected, skipping. %s. Exception: %s', row, e) continue if item_type in MOVIE_TYPES: entry['movie_name'] = name entry['movie_year'] = year elif item_type in SERIES_TYPES: entry['series_name'] = name entry['series_year'] = year elif item_type in OTHER_TYPES: entry['title'] = name else: log.verbose( 'Unknown IMDB type entry received: %s. Skipping', item_type) continue self._items.append(entry) return self._items @property def immutable(self): if self.config['list'] in IMMUTABLE_LISTS: return '%s list is not modifiable' % self.config['list'] def _from_iterable(cls, it): # TODO: is this the right answer? the returned object won't have our custom __contains__ logic return set(it) def __contains__(self, entry): return self.get(entry) is not None def __iter__(self): return iter(self.items) def discard(self, entry): if self.config['list'] in IMMUTABLE_LISTS: raise plugin.PluginError('%s lists are not modifiable' % ' and '.join(IMMUTABLE_LISTS)) if 'imdb_id' not in entry: log.warning( 'Cannot remove %s from imdb_list because it does not have an imdb_id', entry['title'], ) return # Get the list item id item_ids = None urls = [] if self.config['list'] == 'watchlist': method = 'delete' data = { 'consts[]': entry['imdb_id'], 'tracking_tag': 'watchlistRibbon' } status = self.session.post( 'https://www.imdb.com/list/_ajax/watchlist_has', data=data, cookies=self.cookies).json() item_ids = status.get('has', {}).get(entry['imdb_id']) urls = ['https://www.imdb.com/watchlist/%s' % entry['imdb_id']] else: method = 'post' data = {'tconst': entry['imdb_id']} status = self.session.post( 'https://www.imdb.com/list/_ajax/wlb_dropdown', data=data, cookies=self.cookies).json() for a_list in status['items']: if a_list['data_list_id'] == self.list_id: item_ids = a_list['data_list_item_ids'] break for item_id in item_ids: urls.append('https://www.imdb.com/list/%s/li%s/delete' % (self.list_id, item_id)) if not item_ids: log.warning('%s is not in list %s, cannot be removed', entry['imdb_id'], self.list_id) return for url in urls: log.debug( 'found movie %s with ID %s in list %s, removing', entry['title'], entry['imdb_id'], self.list_id, ) self.session.request(method, url, data={'49e6c': self.hidden_value}, cookies=self.cookies) # We don't need to invalidate our cache if we remove the item self._items = ( [i for i in self._items if i['imdb_id'] != entry['imdb_id']] if self._items else None) def _add(self, entry): """Submit a new movie to imdb. (does not update cache)""" if self.config['list'] in IMMUTABLE_LISTS: raise plugin.PluginError('%s lists are not modifiable' % ' and '.join(IMMUTABLE_LISTS)) if 'imdb_id' not in entry: log.warning( 'Cannot add %s to imdb_list because it does not have an imdb_id', entry['title']) return # Manually calling authenticate to fetch list_id and cookies and hidden form value self.authenticate() if self.config['list'] == 'watchlist': method = 'put' url = 'https://www.imdb.com/watchlist/%s' % entry['imdb_id'] else: method = 'post' url = 'https://www.imdb.com/list/%s/%s/add' % (self.list_id, entry['imdb_id']) log.debug('adding title %s with ID %s to imdb %s', entry['title'], entry['imdb_id'], self.list_id) self.session.request(method, url, cookies=self.cookies, data={'49e6c': self.hidden_value}) def add(self, entry): self._add(entry) # Invalidate the cache so that we get the canonical entry from the imdb list self.invalidate_cache() def __ior__(self, entries): for entry in entries: self._add(entry) self.invalidate_cache() return self def __len__(self): return len(self.items) @property def online(self): """ Set the online status of the plugin, online plugin should be treated differently in certain situations, like test mode""" return True def get(self, entry): if not entry.get('imdb_id'): log.debug( 'entry %s does not have imdb_id, cannot compare to imdb list items', entry) return None log.debug('finding %s in imdb list', entry['imdb_id']) for e in self.items: if e['imdb_id'] == entry['imdb_id']: return e log.debug('could not find %s in imdb list items', entry['imdb_id']) return None
class UrlRewriteDescargas2020(object): """Descargas2020 urlrewriter and search.""" schema = {'type': 'boolean', 'default': False} def __init__(self): self.requests = None # urlrewriter API def url_rewritable(self, task, entry): url = entry['url'] rewritable_regex = r'^http:\/\/(www.)?(descargas2020|tvsinpagar|tumejortorrent|torrentlocura|torrentrapid).com\/.*' return re.match(rewritable_regex, url) and not url.endswith('.torrent') def session(self): # TODO: This is not used for all requests even .. if self.requests is None: self.requests = Session() requests.headers.update({ 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' }) requests.add_domain_limiter( TimedLimiter('descargas2020.com', '2 seconds')) return self.requests # urlrewriter API def url_rewrite(self, task, entry): entry['url'] = self.parse_download_page(entry['url'], task) @plugin.internet(log) def parse_download_page(self, url, task): log.verbose('Descargas2020 URL: %s', url) try: page = self.requests.get(url) except requests.RequestException as e: raise UrlRewritingError(e) try: soup = get_soup(page.text) except Exception as e: raise UrlRewritingError(e) torrent_id = None url_format = DESCARGAS2020_TORRENT_FORMAT torrent_id_prog = re.compile( r"(?:parametros\s*=\s*\n?)\s*{\s*\n(?:\s*'\w+'\s*:.*\n)+\s*'(?:torrentID|id)" "'\s*:\s*'(\d+)'") torrent_ids = soup.findAll(text=torrent_id_prog) if torrent_ids: match = torrent_id_prog.search(torrent_ids[0]) if match: torrent_id = match.group(1) if not torrent_id: log.debug('torrent ID not found, searching openTorrent script') match = re.search( r'function openTorrent.*\n.*\{.*(\n.*)+window\.location\.href =\s*\".*\/(\d+.*)\";', page.text, re.MULTILINE, ) if match: torrent_id = match.group(2).rstrip('/') if not torrent_id: raise UrlRewritingError('Unable to locate torrent ID from url %s' % url) return url_format.format(torrent_id) def search(self, task, entry, config=None): if not config: log.debug('Descargas2020 disabled') return set() log.debug('Search Descargas2020') url_search = 'http://descargas2020.com/buscar' results = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = re.sub(r' \(\d\d\d\d\)$', '', query) log.debug('Searching Descargas2020 %s', query) query = unicodedata.normalize('NFD', query).encode('ascii', 'ignore') data = {'q': query} try: response = task.requests.post(url_search, data=data) except requests.RequestException as e: log.error('Error searching Descargas2020: %s', e) return results content = response.content soup = get_soup(content) soup2 = soup.find('ul', attrs={'class': 'buscar-list'}) children = soup2.findAll('a', href=True) for child in children: entry = Entry() entry['url'] = child['href'] entry_title = child.find('h2') if entry_title is None: log.debug('Ignore empty entry') continue entry_title = entry_title.text if not entry_title: continue try: entry_quality_lan = re.search( r'.+ \[([^\]]+)\](\[[^\]]+\])+$', entry_title).group(1) except AttributeError: log.debug('Quality not found') continue entry_title = re.sub(r' \[.+]$', '', entry_title) entry['title'] = entry_title + ' ' + entry_quality_lan results.add(entry) log.debug('Finish search Descargas2020 with %d entries', len(results)) return results
class T411RestClient(object): """A REST client for T411 API""" @staticmethod def template_url(url_scheme='https'): return url_scheme + '://' + T411API_DOMAIN_URL + '%s' @staticmethod def download_url(torrent_id, url_scheme='https'): return (T411RestClient.template_url(url_scheme) % T411API_DOWNLOAD_PATH) + str(torrent_id) def __init__(self, username=None, password=None, url_scheme='https'): self.credentials = {'username': username, 'password': password} self.api_token = None self.api_template_url = url_scheme + '://' + T411API_DOMAIN_URL + '%s' self.web_session = Session() def auth(self): """ Request server to obtain a api token. Obtained token will be set for future usage of the client instance :return: """ auth_url = self.api_template_url % T411API_AUTH_PATH response = self.web_session.post(auth_url, self.credentials) json_response = response.json() error_description = json_response.get('error', None) if error_description: log.error('%d - %s', json_response.get('code'), error_description) else: self.set_api_token(json_response.get('token')) def set_api_token(self, api_token): """ Set the client for use an api token. :param api_token: :return: """ self.api_token = api_token self.web_session.headers.update({'Authorization': self.api_token}) def is_authenticated(self): """ :return: True if an api token is set. Note that the client doesn't check if the token is valid (expired or wrong). """ return self.api_token is not None @staticmethod def raise_on_fail_response(json_response): """ This method throw an Exception if server return a error message :return: """ if json_response is None: pass error_name = json_response.get('error', None) error_code = json_response.get('code', None) if error_name is not None: raise ApiError(error_code, error_name) def get_json(self, path, params=None): """ Common method for requesting JSON response :param path: :return: """ url = self.api_template_url % path request = self.web_session.get(url, params=params) try: result = request.json() except ValueError: log.debug( "Response from %s was not JSON encoded. Attempting deep inspection...", path) try: last_line = request.text.splitlines()[-1] result = json.loads(last_line) except (ValueError, IndexError): log.warning( "Server response doesn't contains any JSON encoded response." ) raise T411RestClient.raise_on_fail_response(result) return result @auth_required def retrieve_category_tree(self): """ Request T411 API for retrieving categories and them subcategories :return**kwargs: """ return self.get_json(T411API_CATEGORY_TREE_PATH) @auth_required def retrieve_terms_tree(self): """ Request T411 API for retrieving term types and terms :return **kwargs: """ return self.get_json(T411API_TERMS_PATH) @auth_required def search(self, query): """ Search torrent :param query: dict :param query['category_id']: Int optional :param query['result_per_page']: Int optional :param query['page_index']: Int optional :param query['terms']: (Term type id, Term id,) :return dict """ url = T411API_SEARCH_PATH if query.get('expression') is not None: url += query['expression'] url_params = {} if query.get('category_id') is not None: # using cat or cid will do the same result # but using cid without query expression will not broke # results url_params['cid'] = query['category_id'] if query.get('result_per_page') is not None: url_params['limit'] = query['result_per_page'] if query.get('page_index') is not None: url_params['offset'] = query['page_index'] if query.get('terms') is not None: for (term_type_id, term_id) in query['terms']: term_type_key_param = 'term[%s][]' % term_type_id if url_params.get(term_type_key_param) is None: url_params[term_type_key_param] = [] url_params[term_type_key_param].append(term_id) return self.get_json(url, params=url_params) @auth_required def details(self, torrent_id): url = T411API_DETAILS_PATH + str(torrent_id) return self.get_json(url)
class ImdbEntrySet(MutableSet): schema = { 'type': 'object', 'properties': { 'login': {'type': 'string'}, 'password': {'type': 'string'}, 'list': {'type': 'string'}, 'force_language': {'type': 'string', 'default': 'en-us'} }, 'additionalProperties': False, 'required': ['login', 'password', 'list'] } def __init__(self, config): self.config = config self._session = Session() self._session.add_domain_limiter(TimedLimiter('imdb.com', '5 seconds')) self._session.headers = {'Accept-Language': config.get('force_language', 'en-us')} self.user_id = None self.list_id = None self._items = None self._authenticated = False @property def session(self): if not self._authenticated: self.authenticate() return self._session def authenticate(self): """Authenticates a session with imdb, and grabs any IDs needed for getting/modifying list.""" try: r = self._session.get( 'https://www.imdb.com/ap/signin?openid.return_to=https%3A%2F%2Fwww.imdb.com%2Fap-signin-' 'handler&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&' 'openid.assoc_handle=imdb_mobile_us&openid.mode=checkid_setup&openid.claimed_id=http%3A%' '2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.ns=http%3A%2F%2Fspecs.ope' 'nid.net%2Fauth%2F2.0') except ConnectionError as e: raise PluginError(e.args[0]) soup = get_soup(r.content) inputs = soup.select('form#ap_signin_form input') data = dict((i['name'], i.get('value')) for i in inputs if i.get('name')) data['email'] = self.config['login'] data['password'] = self.config['password'] d = self._session.post('https://www.imdb.com/ap/signin', data=data) # Get user id by extracting from redirect url r = self._session.head('http://www.imdb.com/profile', allow_redirects=False) if not r.headers.get('location') or 'login' in r.headers['location']: raise plugin.PluginError('Login to imdb failed. Check your credentials.') self.user_id = re.search('ur\d+(?!\d)', r.headers['location']).group() # Get list ID if self.config['list'] == 'watchlist': data = {'consts[]': 'tt0133093', 'tracking_tag': 'watchlistRibbon'} wl_data = self._session.post('http://www.imdb.com/list/_ajax/watchlist_has', data=data).json() try: self.list_id = wl_data['list_id'] except KeyError: raise PluginError('No list ID could be received. Please initialize list by ' 'manually adding an item to it and try again') elif self.config['list'] in IMMUTABLE_LISTS or self.config['list'].startswith('ls'): self.list_id = self.config['list'] else: data = {'tconst': 'tt0133093'} list_data = self._session.post('http://www.imdb.com/list/_ajax/wlb_dropdown', data=data).json() for li in list_data['items']: if li['wlb_text'] == self.config['list']: self.list_id = li['data_list_id'] break else: raise plugin.PluginError('Could not find list %s' % self.config['list']) self._authenticated = True def invalidate_cache(self): self._items = None @property def items(self): if self._items is None: try: r = self.session.get('http://www.imdb.com/list/export?list_id=%s&author_id=%s' % (self.list_id, self.user_id)) except HTTPError as e: raise PluginError(e.args[0]) lines = r.iter_lines() # Throw away first line with headers next(lines) self._items = [] for row in csv.reader(lines): row = [unicode(cell, 'utf-8') for cell in row] log.debug('parsing line from csv: %s', ', '.join(row)) if not len(row) == 16: log.debug('no movie row detected, skipping. %s', ', '.join(row)) continue entry = Entry({ 'title': '%s (%s)' % (row[5], row[11]) if row[11] != '????' else '%s' % row[5], 'url': row[15], 'imdb_id': row[1], 'imdb_url': row[15], 'imdb_list_position': int(row[0]), 'imdb_list_created': datetime.strptime(row[2], '%a %b %d %H:%M:%S %Y') if row[2] else None, 'imdb_list_modified': datetime.strptime(row[3], '%a %b %d %H:%M:%S %Y') if row[3] else None, 'imdb_list_description': row[4], 'imdb_name': row[5], 'movie_name': row[5], 'imdb_year': int(row[11]) if row[11] != '????' else None, 'movie_year': int(row[11]) if row[11] != '????' else None, 'imdb_score': float(row[9]) if row[9] else None, 'imdb_user_score': float(row[8]) if row[8] else None, 'imdb_votes': int(row[13]) if row[13] else None, 'imdb_genres': [genre.strip() for genre in row[12].split(',')] }) self._items.append(entry) return self._items @property def immutable(self): if self.config['list'] in IMMUTABLE_LISTS: return '%s list is not modifiable' % self.config['list'] def _from_iterable(cls, it): # TODO: is this the right answer? the returned object won't have our custom __contains__ logic return set(it) def __contains__(self, entry): if not entry.get('imdb_id'): log.debug('entry %s does not have imdb_id, skipping', entry) return False return any(e['imdb_id'] == entry['imdb_id'] for e in self.items) def __iter__(self): return iter(self.items) def discard(self, entry): if self.config['list'] in IMMUTABLE_LISTS: raise plugin.PluginError('%s lists are not modifiable' % ' and '.join(IMMUTABLE_LISTS)) if 'imdb_id' not in entry: log.warning('Cannot remove %s from imdb_list because it does not have an imdb_id', entry['title']) return # Get the list item id item_ids = None if self.config['list'] == 'watchlist': data = {'consts[]': entry['imdb_id'], 'tracking_tag': 'watchlistRibbon'} status = self.session.post('http://www.imdb.com/list/_ajax/watchlist_has', data=data).json() item_ids = status.get('has', {}).get(entry['imdb_id']) else: data = {'tconst': entry['imdb_id']} status = self.session.post('http://www.imdb.com/list/_ajax/wlb_dropdown', data=data).json() for a_list in status['items']: if a_list['data_list_id'] == self.list_id: item_ids = a_list['data_list_item_ids'] break if not item_ids: log.warning('%s is not in list %s, cannot be removed', entry['imdb_id'], self.list_id) return data = { 'action': 'delete', 'list_id': self.list_id, 'ref_tag': 'title' } for item_id in item_ids: self.session.post('http://www.imdb.com/list/_ajax/edit', data=dict(data, list_item_id=item_id)) # We don't need to invalidate our cache if we remove the item self._items = [i for i in self._items if i['imdb_id'] != entry['imdb_id']] if self._items else None def add(self, entry): if self.config['list'] in IMMUTABLE_LISTS: raise plugin.PluginError('%s lists are not modifiable' % ' and '.join(IMMUTABLE_LISTS)) if 'imdb_id' not in entry: log.warning('Cannot add %s to imdb_list because it does not have an imdb_id', entry['title']) return data = { 'const': entry['imdb_id'], 'list_id': self.list_id, 'ref_tag': 'title' } self.session.post('http://www.imdb.com/list/_ajax/edit', data=data) # Invalidate cache so that new movie info will be grabbed self.invalidate_cache() def __len__(self): return len(self.items) @property def online(self): """ Set the online status of the plugin, online plugin should be treated differently in certain situations, like test mode""" return True
class ImdbEntrySet(MutableSet): schema = { 'type': 'object', 'properties': { 'login': {'type': 'string'}, 'password': {'type': 'string'}, 'list': {'type': 'string'}, 'force_language': {'type': 'string', 'default': 'en-us'} }, 'additionalProperties': False, 'required': ['login', 'password', 'list'] } def __init__(self, config): self.config = config self._session = RequestSession() self._session.add_domain_limiter(TimedLimiter('imdb.com', '5 seconds')) self._session.headers.update({'Accept-Language': config.get('force_language', 'en-us')}) self.user_id = None self.list_id = None self.cookies = None self._items = None self._authenticated = False @property def session(self): if not self._authenticated: self.authenticate() return self._session def authenticate(self): """Authenticates a session with IMDB, and grabs any IDs needed for getting/modifying list.""" cached_credentials = False with Session() as session: user = session.query(IMDBListUser).filter(IMDBListUser.user_name == self.config.get('login')).one_or_none() if user and user.cookies and user.user_id: log.debug('login credentials found in cache, testing') self.cookies = user.cookies self.user_id = user.user_id r = self._session.head('http://www.imdb.com/profile', allow_redirects=False, cookies=self.cookies) if not r.headers.get('location') or 'login' in r.headers['location']: log.debug('cache credentials expired') else: cached_credentials = True if not cached_credentials: log.debug('user credentials not found in cache or outdated, fetching from IMDB') url_credentials = ( 'https://www.imdb.com/ap/signin?openid.return_to=https%3A%2F%2Fwww.imdb.com%2Fap-signin-' 'handler&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&' 'openid.assoc_handle=imdb_mobile_us&openid.mode=checkid_setup&openid.claimed_id=http%3A%' '2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.ns=http%3A%2F%2Fspecs.ope' 'nid.net%2Fauth%2F2.0' ) try: r = self._session.get(url_credentials) except RequestException as e: raise PluginError(e.args[0]) soup = get_soup(r.content) inputs = soup.select('form#ap_signin_form input') data = dict((i['name'], i.get('value')) for i in inputs if i.get('name')) data['email'] = self.config['login'] data['password'] = self.config['password'] action = soup.find('form', id='ap_signin_form').get('action') log.debug('email=%s, password=%s', data['email'], data['password']) self._session.headers.update({'Referer': url_credentials}) d = self._session.post(action, data=data) self._session.headers.update({'Referer': 'http://www.imdb.com/'}) # Get user id by extracting from redirect url r = self._session.head('http://www.imdb.com/profile', allow_redirects=False) if not r.headers.get('location') or 'login' in r.headers['location']: raise plugin.PluginError('Login to IMDB failed. Check your credentials.') self.user_id = re.search('ur\d+(?!\d)', r.headers['location']).group() self.cookies = dict(d.cookies) # Get list ID if user: for list in user.lists: if self.config['list'] == list.list_name: log.debug('found list ID %s matching list name %s in cache', list.list_id, list.list_name) self.list_id = list.list_id if not self.list_id: log.debug('could not find list ID in cache, fetching from IMDB') if self.config['list'] == 'watchlist': data = {'consts[]': 'tt0133093', 'tracking_tag': 'watchlistRibbon'} wl_data = self._session.post('http://www.imdb.com/list/_ajax/watchlist_has', data=data, cookies=self.cookies).json() try: self.list_id = wl_data['list_id'] except KeyError: raise PluginError('No list ID could be received. Please initialize list by ' 'manually adding an item to it and try again') elif self.config['list'] in IMMUTABLE_LISTS or self.config['list'].startswith('ls'): self.list_id = self.config['list'] else: data = {'tconst': 'tt0133093'} list_data = self._session.post('http://www.imdb.com/list/_ajax/wlb_dropdown', data=data, cookies=self.cookies).json() for li in list_data['items']: if li['wlb_text'] == self.config['list']: self.list_id = li['data_list_id'] break else: raise plugin.PluginError('Could not find list %s' % self.config['list']) user = IMDBListUser(self.config['login'], self.user_id, self.cookies) list = IMDBListList(self.list_id, self.config['list'], self.user_id) user.lists.append(list) session.merge(user) self._authenticated = True def invalidate_cache(self): self._items = None @property def items(self): if self._items is None: log.debug('fetching items from IMDB') try: r = self.session.get('http://www.imdb.com/list/export?list_id=%s&author_id=%s' % (self.list_id, self.user_id), cookies=self.cookies) except RequestException as e: raise PluginError(e.args[0]) lines = r.iter_lines(decode_unicode=True) # Throw away first line with headers next(lines) self._items = [] for row in csv_reader(lines): log.debug('parsing line from csv: %s', ', '.join(row)) if not len(row) == 16: log.debug('no movie row detected, skipping. %s', ', '.join(row)) continue entry = Entry({ 'title': '%s (%s)' % (row[5], row[11]) if row[11] != '????' else '%s' % row[5], 'url': row[15], 'imdb_id': row[1], 'imdb_url': row[15], 'imdb_list_position': int(row[0]), 'imdb_list_created': datetime.strptime(row[2], '%a %b %d %H:%M:%S %Y') if row[2] else None, 'imdb_list_modified': datetime.strptime(row[3], '%a %b %d %H:%M:%S %Y') if row[3] else None, 'imdb_list_description': row[4], 'imdb_name': row[5], 'imdb_year': int(row[11]) if row[11] != '????' else None, 'imdb_score': float(row[9]) if row[9] else None, 'imdb_user_score': float(row[8]) if row[8] else None, 'imdb_votes': int(row[13]) if row[13] else None, 'imdb_genres': [genre.strip() for genre in row[12].split(',')] }) item_type = row[6].lower() name = row[5] year = int(row[11]) if row[11] != '????' else None if item_type in MOVIE_TYPES: entry['movie_name'] = name entry['movie_year'] = year elif item_type in SERIES_TYPES: entry['series_name'] = name entry['series_year'] = year elif item_type in OTHER_TYPES: entry['title'] = name else: log.verbose('Unknown IMDB type entry received: %s. Skipping', item_type) continue self._items.append(entry) return self._items @property def immutable(self): if self.config['list'] in IMMUTABLE_LISTS: return '%s list is not modifiable' % self.config['list'] def _from_iterable(cls, it): # TODO: is this the right answer? the returned object won't have our custom __contains__ logic return set(it) def _find_movie(self, entry): log.debug('trying to match %s to existing list items', entry['imdb_id']) for e in self.items: if e['imdb_id'] == entry['imdb_id']: return e log.debug('could not match %s to existing list items', entry['imdb_id']) def __contains__(self, entry): if not entry.get('imdb_id'): log.debug('entry %s does not have imdb_id, skipping', entry) return False return self._find_movie(entry) is not None def __iter__(self): return iter(self.items) def discard(self, entry): if self.config['list'] in IMMUTABLE_LISTS: raise plugin.PluginError('%s lists are not modifiable' % ' and '.join(IMMUTABLE_LISTS)) if 'imdb_id' not in entry: log.warning('Cannot remove %s from imdb_list because it does not have an imdb_id', entry['title']) return # Get the list item id item_ids = None if self.config['list'] == 'watchlist': data = {'consts[]': entry['imdb_id'], 'tracking_tag': 'watchlistRibbon'} status = self.session.post('http://www.imdb.com/list/_ajax/watchlist_has', data=data, cookies=self.cookies).json() item_ids = status.get('has', {}).get(entry['imdb_id']) else: data = {'tconst': entry['imdb_id']} status = self.session.post('http://www.imdb.com/list/_ajax/wlb_dropdown', data=data, cookies=self.cookies).json() for a_list in status['items']: if a_list['data_list_id'] == self.list_id: item_ids = a_list['data_list_item_ids'] break if not item_ids: log.warning('%s is not in list %s, cannot be removed', entry['imdb_id'], self.list_id) return data = { 'action': 'delete', 'list_id': self.list_id, 'ref_tag': 'title' } for item_id in item_ids: log.debug('found movie %s with ID %s in list %s, removing', entry['title'], entry['imdb_id'], self.list_id) self.session.post('http://www.imdb.com/list/_ajax/edit', data=dict(data, list_item_id=item_id), cookies=self.cookies) # We don't need to invalidate our cache if we remove the item self._items = [i for i in self._items if i['imdb_id'] != entry['imdb_id']] if self._items else None def _add(self, entry): """Submit a new movie to imdb. (does not update cache)""" if self.config['list'] in IMMUTABLE_LISTS: raise plugin.PluginError('%s lists are not modifiable' % ' and '.join(IMMUTABLE_LISTS)) if 'imdb_id' not in entry: log.warning('Cannot add %s to imdb_list because it does not have an imdb_id', entry['title']) return # Manually calling authenticate to fetch list_id and cookies self.authenticate() data = { 'const': entry['imdb_id'], 'list_id': self.list_id, 'ref_tag': 'title' } log.debug('adding title %s with ID %s to imdb %s', entry['title'], entry['imdb_id'], self.list_id) self.session.post('http://www.imdb.com/list/_ajax/edit', data=data, cookies=self.cookies) def add(self, entry): self._add(entry) # Invalidate the cache so that we get the canonical entry from the imdb list self.invalidate_cache() def __ior__(self, entries): for entry in entries: self._add(entry) self.invalidate_cache() return self def __len__(self): return len(self.items) @property def online(self): """ Set the online status of the plugin, online plugin should be treated differently in certain situations, like test mode""" return True def get(self, entry): return self._find_movie(entry)
class ImdbEntrySet(MutableSet): schema = { 'type': 'object', 'properties': { 'login': { 'type': 'string' }, 'password': { 'type': 'string' }, 'list': { 'type': 'string' }, 'force_language': { 'type': 'string', 'default': 'en-us' } }, 'additionalProperties': False, 'required': ['login', 'password', 'list'] } def __init__(self, config): self.config = config self._session = RequestSession() self._session.add_domain_limiter(TimedLimiter('imdb.com', '5 seconds')) self._session.headers.update( {'Accept-Language': config.get('force_language', 'en-us')}) self.user_id = None self.list_id = None self.cookies = None self._items = None self._authenticated = False @property def session(self): if not self._authenticated: self.authenticate() return self._session def authenticate(self): """Authenticates a session with IMDB, and grabs any IDs needed for getting/modifying list.""" cached_credentials = False with Session() as session: user = session.query(IMDBListUser).filter( IMDBListUser.user_name == self.config.get( 'login')).one_or_none() if user and user.cookies and user.user_id: log.debug('login credentials found in cache, testing') self.cookies = user.cookies self.user_id = user.user_id r = self._session.head('http://www.imdb.com/profile', allow_redirects=False, cookies=self.cookies) if not r.headers.get( 'location') or 'login' in r.headers['location']: log.debug('cache credentials expired') else: cached_credentials = True if not cached_credentials: log.debug( 'user credentials not found in cache or outdated, fetching from IMDB' ) url_credentials = ( 'https://www.imdb.com/ap/signin?openid.return_to=https%3A%2F%2Fwww.imdb.com%2Fap-signin-' 'handler&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&' 'openid.assoc_handle=imdb_mobile_us&openid.mode=checkid_setup&openid.claimed_id=http%3A%' '2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.ns=http%3A%2F%2Fspecs.ope' 'nid.net%2Fauth%2F2.0') try: r = self._session.get(url_credentials) except RequestException as e: raise PluginError(e.args[0]) soup = get_soup(r.content) inputs = soup.select('form#ap_signin_form input') data = dict((i['name'], i.get('value')) for i in inputs if i.get('name')) data['email'] = self.config['login'] data['password'] = self.config['password'] action = soup.find('form', id='ap_signin_form').get('action') log.debug('email=%s, password=%s', data['email'], data['password']) self._session.headers.update({'Referer': url_credentials}) d = self._session.post(action, data=data) self._session.headers.update( {'Referer': 'http://www.imdb.com/'}) # Get user id by extracting from redirect url r = self._session.head('http://www.imdb.com/profile', allow_redirects=False) if not r.headers.get( 'location') or 'login' in r.headers['location']: raise plugin.PluginError( 'Login to IMDB failed. Check your credentials.') self.user_id = re.search('ur\d+(?!\d)', r.headers['location']).group() self.cookies = dict(d.cookies) # Get list ID if user: for list in user.lists: if self.config['list'] == list.list_name: log.debug( 'found list ID %s matching list name %s in cache', list.list_id, list.list_name) self.list_id = list.list_id if not self.list_id: log.debug( 'could not find list ID in cache, fetching from IMDB') if self.config['list'] == 'watchlist': data = { 'consts[]': 'tt0133093', 'tracking_tag': 'watchlistRibbon' } wl_data = self._session.post( 'http://www.imdb.com/list/_ajax/watchlist_has', data=data, cookies=self.cookies).json() try: self.list_id = wl_data['list_id'] except KeyError: raise PluginError( 'No list ID could be received. Please initialize list by ' 'manually adding an item to it and try again') elif self.config['list'] in IMMUTABLE_LISTS or self.config[ 'list'].startswith('ls'): self.list_id = self.config['list'] else: data = {'tconst': 'tt0133093'} list_data = self._session.post( 'http://www.imdb.com/list/_ajax/wlb_dropdown', data=data, cookies=self.cookies).json() for li in list_data['items']: if li['wlb_text'] == self.config['list']: self.list_id = li['data_list_id'] break else: raise plugin.PluginError('Could not find list %s' % self.config['list']) user = IMDBListUser(self.config['login'], self.user_id, self.cookies) list = IMDBListList(self.list_id, self.config['list'], self.user_id) user.lists.append(list) session.merge(user) self._authenticated = True def invalidate_cache(self): self._items = None @property def items(self): if self._items is None: log.debug('fetching items from IMDB') try: r = self.session.get( 'http://www.imdb.com/list/export?list_id=%s&author_id=%s' % (self.list_id, self.user_id), cookies=self.cookies) except RequestException as e: raise PluginError(e.args[0]) lines = r.iter_lines(decode_unicode=True) # Throw away first line with headers next(lines) self._items = [] for row in csv_reader(lines): log.debug('parsing line from csv: %s', ', '.join(row)) if not len(row) == 16: log.debug('no movie row detected, skipping. %s', ', '.join(row)) continue entry = Entry({ 'title': '%s (%s)' % (row[5], row[11]) if row[11] != '????' else '%s' % row[5], 'url': row[15], 'imdb_id': row[1], 'imdb_url': row[15], 'imdb_list_position': int(row[0]), 'imdb_list_created': datetime.strptime(row[2], '%a %b %d %H:%M:%S %Y') if row[2] else None, 'imdb_list_modified': datetime.strptime(row[3], '%a %b %d %H:%M:%S %Y') if row[3] else None, 'imdb_list_description': row[4], 'imdb_name': row[5], 'imdb_year': int(row[11]) if row[11] != '????' else None, 'imdb_score': float(row[9]) if row[9] else None, 'imdb_user_score': float(row[8]) if row[8] else None, 'imdb_votes': int(row[13]) if row[13] else None, 'imdb_genres': [genre.strip() for genre in row[12].split(',')] }) item_type = row[6].lower() name = row[5] year = int(row[11]) if row[11] != '????' else None if item_type in MOVIE_TYPES: entry['movie_name'] = name entry['movie_year'] = year elif item_type in SERIES_TYPES: entry['series_name'] = name entry['series_year'] = year elif item_type in OTHER_TYPES: entry['title'] = name else: log.verbose( 'Unknown IMDB type entry received: %s. Skipping', item_type) continue self._items.append(entry) return self._items @property def immutable(self): if self.config['list'] in IMMUTABLE_LISTS: return '%s list is not modifiable' % self.config['list'] def _from_iterable(cls, it): # TODO: is this the right answer? the returned object won't have our custom __contains__ logic return set(it) def _find_movie(self, entry): log.debug('trying to match %s to existing list items', entry['imdb_id']) for e in self.items: if e['imdb_id'] == entry['imdb_id']: return e log.debug('could not match %s to existing list items', entry['imdb_id']) def __contains__(self, entry): if not entry.get('imdb_id'): log.debug('entry %s does not have imdb_id, skipping', entry) return False return self._find_movie(entry) is not None def __iter__(self): return iter(self.items) def discard(self, entry): if self.config['list'] in IMMUTABLE_LISTS: raise plugin.PluginError('%s lists are not modifiable' % ' and '.join(IMMUTABLE_LISTS)) if 'imdb_id' not in entry: log.warning( 'Cannot remove %s from imdb_list because it does not have an imdb_id', entry['title']) return # Get the list item id item_ids = None if self.config['list'] == 'watchlist': data = { 'consts[]': entry['imdb_id'], 'tracking_tag': 'watchlistRibbon' } status = self.session.post( 'http://www.imdb.com/list/_ajax/watchlist_has', data=data, cookies=self.cookies).json() item_ids = status.get('has', {}).get(entry['imdb_id']) else: data = {'tconst': entry['imdb_id']} status = self.session.post( 'http://www.imdb.com/list/_ajax/wlb_dropdown', data=data, cookies=self.cookies).json() for a_list in status['items']: if a_list['data_list_id'] == self.list_id: item_ids = a_list['data_list_item_ids'] break if not item_ids: log.warning('%s is not in list %s, cannot be removed', entry['imdb_id'], self.list_id) return data = { 'action': 'delete', 'list_id': self.list_id, 'ref_tag': 'title' } for item_id in item_ids: log.debug('found movie %s with ID %s in list %s, removing', entry['title'], entry['imdb_id'], self.list_id) self.session.post('http://www.imdb.com/list/_ajax/edit', data=dict(data, list_item_id=item_id), cookies=self.cookies) # We don't need to invalidate our cache if we remove the item self._items = [ i for i in self._items if i['imdb_id'] != entry['imdb_id'] ] if self._items else None def _add(self, entry): """Submit a new movie to imdb. (does not update cache)""" if self.config['list'] in IMMUTABLE_LISTS: raise plugin.PluginError('%s lists are not modifiable' % ' and '.join(IMMUTABLE_LISTS)) if 'imdb_id' not in entry: log.warning( 'Cannot add %s to imdb_list because it does not have an imdb_id', entry['title']) return # Manually calling authenticate to fetch list_id and cookies self.authenticate() data = { 'const': entry['imdb_id'], 'list_id': self.list_id, 'ref_tag': 'title' } log.debug('adding title %s with ID %s to imdb %s', entry['title'], entry['imdb_id'], self.list_id) self.session.post('http://www.imdb.com/list/_ajax/edit', data=data, cookies=self.cookies) def add(self, entry): self._add(entry) # Invalidate the cache so that we get the canonical entry from the imdb list self.invalidate_cache() def __ior__(self, entries): for entry in entries: self._add(entry) self.invalidate_cache() return self def __len__(self): return len(self.items) @property def online(self): """ Set the online status of the plugin, online plugin should be treated differently in certain situations, like test mode""" return True def get(self, entry): return self._find_movie(entry)
episode.update() except LookupError, e: log.warning( 'Error while updating from tvdb (%s), using cached data.' % e.message) else: log.debug('Using episode info for %s from cache.' % ep_description) else: if only_cached: raise LookupError('Episode %s not found from cache' % ep_description) # There was no episode found in the cache, do a lookup from tvdb log.debug('Episode %s not found in cache, looking up from tvdb.' % ep_description) try: raw_data = requests.get(url).content data = BeautifulStoneSoup( raw_data, convertEntities=BeautifulStoneSoup.HTML_ENTITIES).data if data: error = data.find('error') if error: raise LookupError( 'Error lookuing up episode from TVDb (%s)' % error.string) ep_data = data.find('episode') if ep_data: # Check if this episode id is already in our db episode = session.query(TVDBEpisode).filter( TVDBEpisode.id == ep_data.id.string).first() if episode:
if episode: if episode.expired and not only_cached: log.info('Data for %r has expired, refreshing from tvdb' % episode) try: episode.update() except LookupError, e: log.warning('Error while updating from tvdb (%s), using cached data.' % e.message) else: log.debug('Using episode info for %s from cache.' % ep_description) else: if only_cached: raise LookupError('Episode %s not found from cache' % ep_description) # There was no episode found in the cache, do a lookup from tvdb log.debug('Episode %s not found in cache, looking up from tvdb.' % ep_description) try: raw_data = requests.get(url).content data = BeautifulStoneSoup(raw_data, convertEntities=BeautifulStoneSoup.HTML_ENTITIES).data if data: error = data.find('error') if error: raise LookupError('Error lookuing up episode from TVDb (%s)' % error.string) ep_data = data.find('episode') if ep_data: # Check if this episode id is already in our db episode = session.query(TVDBEpisode).filter(TVDBEpisode.id == ep_data.id.string).first() if episode: episode.update_from_bss(ep_data) else: episode = TVDBEpisode(ep_data) series.episodes.append(episode) session.merge(series)
def search(self, entry, config=None): """ Search for entries on SceneAccess """ try: multip = int(config['gravity_multiplier']) except KeyError: multip = 1 # Login... params = {'username': config['username'], 'password': config['password'], 'submit': 'come on in'} session = Session() session.headers = {'User agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:27.0) Gecko/20100101 Firefox/27.0'} log.debug('Logging in to %s...' % URL) session.post(URL + 'login', data=params) # Prepare queries... BASE_URLS = list() entries = set() for category in self.processCategories(config): BASE_URLS.append(URL + '%(url_path)s?method=2%(category_url_string)s' % category) # Search... for search_string in entry.get('search_strings', [entry['title']]): search_string_normalized = normalize_unicode(clean_title(search_string)) search_string_url_fragment = '&search=' + quote(search_string_normalized.encode('utf8')) for url in BASE_URLS: url += search_string_url_fragment log.debug('Search URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'tt_row'}): entry = Entry() entry['title'] = result.find('a', href=re.compile(r'details\?id=\d+'))['title'] entry['url'] = URL + result.find('a', href=re.compile(r'.torrent$'))['href'] entry['torrent_seeds'] = result.find('td', attrs={'class': 'ttr_seeders'}).string entry['torrent_leeches'] = result.find('td', attrs={'class': 'ttr_leechers'}).string entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])*multip size = result.find('td', attrs={'class': 'ttr_size'}).next size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) if size: if size.group(2) == 'GB': entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'MB': entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2) elif size.group(2) == 'KB': entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1)) / 1024 ** 2) entries.add(entry) return entries
def search(self, entry, config=None): try: multip = int(config['gravity_multiplier']) except KeyError: multip = 1 if not isinstance(config['category'], list): config['category'] = [config['category']] categories_id = list() for category in config['category']: if not isinstance(category, int): categories_id.append(CATEGORIES.get(category)) else: categories_id.append(category) category_url_fragment = ''.join( ['&' + quote('filter_cat[%s]' % id) + '=1' for id in categories_id]) params = { 'username': config['username'], 'password': config['password'], 'keeplogged': '1', 'login': '******' } session = Session() log.debug('Logging in to %s...' % URL) session.post(URL + 'login.php', data=params) entries = set() for search_string in entry.get('search_strings', [entry['title']]): search_string_normalized = normalize_unicode(clean_title(search_string)) search_string_url_fragment = 'searchstr=' + quote(search_string_normalized.encode('utf8')) url = URL + 'torrents.php?' + search_string_url_fragment + category_url_fragment log.debug('Fetching URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'torrent'}): entry = Entry() entry['title'] = result.find('span', attrs={'class': 'torrent_name_link'}).string entry['url'] = URL + result.find('a', href=re.compile(r'torrents.php\?action=download'), attrs={'title': 'Download'})['href'] entry['torrent_seeds'] = result.findAll('td')[-3].string entry['torrent_leeches'] = result.findAll('td')[-2].string entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) * multip size = result.findAll('td')[-5].string size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) if size: if size.group(2) == 'GB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'MB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2) elif size.group(2) == 'KB': entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2) entries.add(entry) return entries
class ImdbEntrySet(MutableSet): schema = { 'type': 'object', 'properties': { 'login': {'type': 'string'}, 'password': {'type': 'string'}, 'list': {'type': 'string'}, 'force_language': {'type': 'string', 'default': 'en-us'} }, 'additionalProperties': False, 'required': ['login', 'password', 'list'] } def __init__(self, config): self.config = config self._session = RequestSession() self._session.add_domain_limiter(TimedLimiter('imdb.com', '5 seconds')) self._session.headers.update({'Accept-Language': config.get('force_language', 'en-us')}) self.user_id = None self.list_id = None self.cookies = None self.hidden_value = None self._items = None self._authenticated = False @property def session(self): if not self._authenticated: self.authenticate() return self._session def get_user_id_and_hidden_value(self, cookies=None): try: if cookies: self._session.cookies = cookiejar_from_dict(cookies) # We need to allow for redirects here as it performs 1-2 redirects before reaching the real profile url response = self._session.get('https://www.imdb.com/profile', allow_redirects=True) except RequestException as e: raise PluginError(str(e)) user_id_match = re.search('ur\d+(?!\d)', response.url) if user_id_match: # extract the hidden form value that we need to do post requests later on try: soup = get_soup(response.text) self.hidden_value = soup.find('input', attrs={'id': '49e6c'})['value'] except Exception as e: log.warning('Unable to locate the hidden form value ''49e6c''. Without it, you might not be able to ' 'add or remove items. %s', e) return user_id_match.group() if user_id_match else None def authenticate(self): """Authenticates a session with IMDB, and grabs any IDs needed for getting/modifying list.""" cached_credentials = False with Session() as session: user = session.query(IMDBListUser).filter(IMDBListUser.user_name == self.config.get('login')).one_or_none() if user and user.cookies and user.user_id: log.debug('login credentials found in cache, testing') self.user_id = user.user_id if not self.get_user_id_and_hidden_value(cookies=user.cookies): log.debug('cache credentials expired') user.cookies = None self._session.cookies.clear() else: self.cookies = user.cookies cached_credentials = True if not cached_credentials: log.debug('user credentials not found in cache or outdated, fetching from IMDB') url_credentials = ( 'https://www.imdb.com/ap/signin?openid.return_to=https%3A%2F%2Fwww.imdb.com%2Fap-signin-' 'handler&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&' 'openid.assoc_handle=imdb_mobile_us&openid.mode=checkid_setup&openid.claimed_id=http%3A%' '2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.ns=http%3A%2F%2Fspecs.ope' 'nid.net%2Fauth%2F2.0' ) try: # we need to get some cookies first self._session.get('https://www.imdb.com') r = self._session.get(url_credentials) except RequestException as e: raise PluginError(e.args[0]) soup = get_soup(r.content) form = soup.find('form', attrs={'name': 'signIn'}) inputs = form.select('input') data = dict((i['name'], i.get('value')) for i in inputs if i.get('name')) data['email'] = self.config['login'] data['password'] = self.config['password'] action = form.get('action') log.debug('email=%s, password=%s', data['email'], data['password']) self._session.headers.update({'Referer': url_credentials}) self._session.post(action, data=data) self._session.headers.update({'Referer': 'https://www.imdb.com/'}) self.user_id = self.get_user_id_and_hidden_value() if not self.user_id: raise plugin.PluginError('Login to IMDB failed. Check your credentials.') self.cookies = self._session.cookies.get_dict(domain='.imdb.com') # Get list ID if user: for list in user.lists: if self.config['list'] == list.list_name: log.debug('found list ID %s matching list name %s in cache', list.list_id, list.list_name) self.list_id = list.list_id if not self.list_id: log.debug('could not find list ID in cache, fetching from IMDB') if self.config['list'] == 'watchlist': data = {'consts[]': 'tt0133093', 'tracking_tag': 'watchlistRibbon'} wl_data = self._session.post('https://www.imdb.com/list/_ajax/watchlist_has', data=data, cookies=self.cookies).json() try: self.list_id = wl_data['list_id'] except KeyError: raise PluginError('No list ID could be received. Please initialize list by ' 'manually adding an item to it and try again') elif self.config['list'] in IMMUTABLE_LISTS or self.config['list'].startswith('ls'): self.list_id = self.config['list'] else: data = {'tconst': 'tt0133093'} list_data = self._session.post('https://www.imdb.com/list/_ajax/wlb_dropdown', data=data, cookies=self.cookies).json() for li in list_data['items']: if li['wlb_text'] == self.config['list']: self.list_id = li['data_list_id'] break else: raise plugin.PluginError('Could not find list %s' % self.config['list']) user = IMDBListUser(self.config['login'], self.user_id, self.cookies) list = IMDBListList(self.list_id, self.config['list'], self.user_id) user.lists.append(list) session.merge(user) self._authenticated = True def invalidate_cache(self): self._items = None @property def items(self): if self._items is None: log.debug('fetching items from IMDB') try: r = self.session.get('https://www.imdb.com/list/export?list_id=%s&author_id=%s' % (self.list_id, self.user_id), cookies=self.cookies) lines = list(r.iter_lines(decode_unicode=True)) except RequestException as e: raise PluginError(e.args[0]) # Normalize headers to lowercase lines[0] = lines[0].lower() self._items = [] for row in csv_dictreader(lines): log.debug('parsing line from csv: %s', row) try: item_type = row['title type'].lower() name = row['title'] year = int(row['year']) if row['year'] != '????' else None created = datetime.strptime(row['created'], '%Y-%m-%d') if row.get('created') else None modified = datetime.strptime(row['modified'], '%Y-%m-%d') if row.get('modified') else None entry = Entry({ 'title': '%s (%s)' % (name, year) if year != '????' else name, 'url': row['url'], 'imdb_id': row['const'], 'imdb_url': row['url'], 'imdb_list_position': int(row['position']) if 'position' in row else None, 'imdb_list_created': created, 'imdb_list_modified': modified, 'imdb_list_description': row.get('description'), 'imdb_name': name, 'imdb_year': year, 'imdb_user_score': float(row['imdb rating']) if row['imdb rating'] else None, 'imdb_votes': int(row['num votes']) if row['num votes'] else None, 'imdb_genres': [genre.strip() for genre in row['genres'].split(',')] }) except ValueError as e: log.debug('no movie row detected, skipping. %s. Exception: %s', row, e) continue if item_type in MOVIE_TYPES: entry['movie_name'] = name entry['movie_year'] = year elif item_type in SERIES_TYPES: entry['series_name'] = name entry['series_year'] = year elif item_type in OTHER_TYPES: entry['title'] = name else: log.verbose('Unknown IMDB type entry received: %s. Skipping', item_type) continue self._items.append(entry) return self._items @property def immutable(self): if self.config['list'] in IMMUTABLE_LISTS: return '%s list is not modifiable' % self.config['list'] def _from_iterable(cls, it): # TODO: is this the right answer? the returned object won't have our custom __contains__ logic return set(it) def __contains__(self, entry): return self.get(entry) is not None def __iter__(self): return iter(self.items) def discard(self, entry): if self.config['list'] in IMMUTABLE_LISTS: raise plugin.PluginError('%s lists are not modifiable' % ' and '.join(IMMUTABLE_LISTS)) if 'imdb_id' not in entry: log.warning('Cannot remove %s from imdb_list because it does not have an imdb_id', entry['title']) return # Get the list item id item_ids = None urls = [] if self.config['list'] == 'watchlist': method = 'delete' data = {'consts[]': entry['imdb_id'], 'tracking_tag': 'watchlistRibbon'} status = self.session.post('https://www.imdb.com/list/_ajax/watchlist_has', data=data, cookies=self.cookies).json() item_ids = status.get('has', {}).get(entry['imdb_id']) urls = ['https://www.imdb.com/watchlist/%s' % entry['imdb_id']] else: method = 'post' data = {'tconst': entry['imdb_id']} status = self.session.post('https://www.imdb.com/list/_ajax/wlb_dropdown', data=data, cookies=self.cookies).json() for a_list in status['items']: if a_list['data_list_id'] == self.list_id: item_ids = a_list['data_list_item_ids'] break for item_id in item_ids: urls.append('https://www.imdb.com/list/%s/li%s/delete' % (self.list_id, item_id)) if not item_ids: log.warning('%s is not in list %s, cannot be removed', entry['imdb_id'], self.list_id) return for url in urls: log.debug('found movie %s with ID %s in list %s, removing', entry['title'], entry['imdb_id'], self.list_id) self.session.request(method, url, data={'49e6c': self.hidden_value}, cookies=self.cookies) # We don't need to invalidate our cache if we remove the item self._items = [i for i in self._items if i['imdb_id'] != entry['imdb_id']] if self._items else None def _add(self, entry): """Submit a new movie to imdb. (does not update cache)""" if self.config['list'] in IMMUTABLE_LISTS: raise plugin.PluginError('%s lists are not modifiable' % ' and '.join(IMMUTABLE_LISTS)) if 'imdb_id' not in entry: log.warning('Cannot add %s to imdb_list because it does not have an imdb_id', entry['title']) return # Manually calling authenticate to fetch list_id and cookies and hidden form value self.authenticate() if self.config['list'] == 'watchlist': method = 'put' url = 'https://www.imdb.com/watchlist/%s' % entry['imdb_id'] else: method = 'post' url = 'https://www.imdb.com/list/%s/%s/add' % (self.list_id, entry['imdb_id']) log.debug('adding title %s with ID %s to imdb %s', entry['title'], entry['imdb_id'], self.list_id) self.session.request(method, url, cookies=self.cookies, data={'49e6c': self.hidden_value}) def add(self, entry): self._add(entry) # Invalidate the cache so that we get the canonical entry from the imdb list self.invalidate_cache() def __ior__(self, entries): for entry in entries: self._add(entry) self.invalidate_cache() return self def __len__(self): return len(self.items) @property def online(self): """ Set the online status of the plugin, online plugin should be treated differently in certain situations, like test mode""" return True def get(self, entry): if not entry.get('imdb_id'): log.debug('entry %s does not have imdb_id, cannot compare to imdb list items', entry) return None log.debug('finding %s in imdb list', entry['imdb_id']) for e in self.items: if e['imdb_id'] == entry['imdb_id']: return e log.debug('could not find %s in imdb list items', entry['imdb_id']) return None