def verify(self, provider, name, size): """ Main filtering method to match torrent names, resolutions, release types and size filters Args: provider (str): Provider ID name (str): Torrent name size (str): Arbitrary torrent size to be parsed Returns: bool: ``True`` if torrent name passed filtering, ``False`` otherwise. """ if not name: self.reason = '[%s] %s' % (provider, '*** Empty name ***') return False name = normalize_string(name) if self.filter_title and self.title: self.title = normalize_string(self.title) self.reason = "[%s] %70s ***" % (provider, name) if self.filter_resolutions and get_setting('require_resolution', bool): resolution = self.determine_resolution(name)[0] if resolution not in self.resolutions_allow: self.reason += " Resolution not allowed ({0})".format( resolution) return False if self.filter_title: if not all( map(lambda match: match in name, re.split( r'\s', self.title))): self.reason += " Name mismatch" return False if self.require_keywords and get_setting('require_keywords', bool): for required in self.require_keywords: if not self.included(name, keys=[required]): self.reason += " Missing required keyword" return False if not self.included_rx(name, keys=self.releases_allow) and get_setting( 'require_release_type', bool): self.reason += " Missing release type keyword" return False if self.included_rx(name, keys=self.releases_deny) and get_setting( 'require_release_type', bool): self.reason += " Blocked by release type keyword" return False if size and not self.in_size_range(size) and get_setting( 'require_size', bool): self.reason += " Size out of range ({0})".format(size) return False return True
def process_keywords(self, provider, text): """ Processes the query payload from a provider's keyword definitions Args: provider (str): Provider ID text (str): Keyword placeholders from definitions, ie. {title} Returns: str: Processed query keywords """ keywords = self.read_keywords(text) replacing = get_setting("filter_quotes", bool) for keyword in keywords: keyword = keyword.lower() if 'title' in keyword: title = self.info["title"] language = definitions[provider]['language'] use_language = None if ':' in keyword: use_language = keyword.split(':')[1].lower() if provider not in self.language_exceptions and \ (use_language or self.kodi_language) and \ 'titles' in self.info and self.info['titles']: try: if self.kodi_language and self.kodi_language in self.info[ 'titles']: use_language = self.kodi_language if use_language not in self.info['titles']: use_language = language if 'original' in self.info['titles']: title = self.info['titles']['original'] if use_language in self.info['titles'] and self.info[ 'titles'][use_language]: title = self.info['titles'][use_language] title = normalize_string(title) log.info("[%s] Using translated '%s' title %s" % (provider, use_language, repr(title))) log.debug( "[%s] Translated titles from Elementum: %s" % (provider, repr(self.info['titles']))) except Exception as e: import traceback log.error("%s failed with: %s" % (provider, repr(e))) map(log.debug, traceback.format_exc().split("\n")) text = text.replace('{%s}' % keyword, title) if 'year' in keyword: text = text.replace('{%s}' % keyword, str(self.info["year"])) if 'season' in keyword: if '+' in keyword: keys = keyword.split('+') season = str(self.info["season"] + get_int(keys[1])) elif ':' in keyword: keys = keyword.split(':') season = ('%%.%sd' % keys[1]) % self.info["season"] else: season = '%s' % self.info["season"] text = text.replace('{%s}' % keyword, season) if 'episode' in keyword: if '+' in keyword: keys = keyword.split('+') episode = str(self.info["episode"] + get_int(keys[1])) elif ':' in keyword: keys = keyword.split(':') episode = ('%%.%sd' % keys[1]) % self.info["episode"] else: episode = '%s' % self.info["episode"] text = text.replace('{%s}' % keyword, episode) if replacing: text = text.replace(u"'", '') return text
def open(cls, url='', language='en', post_data=None, get_data=None, use_cache=True, charset='utf-8'): """ Open a web page and returns its contents :param charset: :param use_cache: if it uses the information stored in the cache :type use_cache: bool :param url: url address web page :type url: str or unicode :param language: language encoding web page :type language: str :param post_data: parameters for POST request :type post_data: dict :param get_data: parameters for GET request :type get_data: dict :return: True if the web page was opened successfully. False, otherwise. """ if len(url) == 0: cls.status = 404 cls.raw_content = '' logger.debug('Empty url') return False # Check the cache cache_file = quote_plus(repr(url)) + '.cache' if use_cache: cache = Storage.open(cache_file, ttl=15) if 'content' in cache: cls.status = 200 cls.content = cache['content'] cls.raw_content = cache['raw_content'] cls.headers = cache['headers'] logger.debug('Using cache for %s' % url) cache.close() logger.debug("Status: " + str(cls.status)) logger.debug(repr(cls.content)) return True # Creating request if post_data is None: post_data = {} if get_data is not None: url += '?' + urlencode(get_data) logger.debug(url) result = True cls.status = 200 data = urlencode(post_data) if len(post_data) > 0 else None req = urllib2.Request(url, data) # Cookies and cloudhole info cls._read_cookies(url) logger.debug("Cookies: %s" % repr(cls._cookies)) # open cookie jar opener = urllib2.build_opener(urllib2.HTTPCookieProcessor( cls._cookies)) # Headers req.add_header('User-Agent', cls.user_agent) req.add_header('Content-Language', language) req.add_header("Accept-Encoding", "gzip") try: cls._good_spider() # send cookies and open url with closing(opener.open(req)) as response: cls.headers = response.headers cls._save_cookies() # borrow from provider.py Steeve if response.headers.get('Content-Encoding', '') == 'gzip': import zlib cls.raw_content = zlib.decompressobj( 16 + zlib.MAX_WBITS).decompress(response.read()) else: cls.raw_content = response.read() logger.debug("Raw Content:") logger.debug(repr(cls.raw_content)) if 'charset' in cls.raw_content: match = re.search('charset=(\S+)"', cls.raw_content, re.IGNORECASE) if match: charset = match.group(1) logger.debug('charset=' + charset) except urllib2.HTTPError as e: result = False cls.status = e.code logger.warning("Status: " + str(cls.status)) if e.code == 403: logger.warning("CloudFlared at %s" % url) except urllib2.URLError as e: result = False cls.status = e.reason logger.warning("Status: " + str(cls.status)) except Exception as e: result = False logger.error("Error in the browser: %s" % repr(e)) if result: # Normalization try: cls.content = cls.raw_content.decode(charset) except UnicodeDecodeError: cls.content = normalize_string(cls.raw_content, charset=charset) except LookupError: cls.content = normalize_string(cls.raw_content) cls.content = cls.content.replace('<![CDATA[', '').replace(']]', '') cls.content = HTMLParser().unescape(cls.content) # Save in the cache if use_cache: cache = Storage.open(cache_file, ttl=15) cache['content'] = cls.content cache['raw_content'] = cls.raw_content cache['headers'] = cls.headers cache.close() # status logger.debug("Status: " + str(cls.status)) logger.debug(repr(cls.content)) return result