Python normalize_string Exemples, normalize.normalize_string Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : filtering.py Projet : sigmaxt/script.elementum.burst

    def verify(self, provider, name, size):
        """ Main filtering method to match torrent names, resolutions, release types and size filters

        Args:
            provider (str): Provider ID
            name     (str): Torrent name
            size     (str): Arbitrary torrent size to be parsed

        Returns:
            bool: ``True`` if torrent name passed filtering, ``False`` otherwise.
        """
        if not name:
            self.reason = '[%s] %s' % (provider, '*** Empty name ***')
            return False

        name = normalize_string(name)
        if self.filter_title and self.title:
            self.title = normalize_string(self.title)

        self.reason = "[%s] %70s ***" % (provider, name)

        if self.filter_resolutions and get_setting('require_resolution', bool):
            resolution = self.determine_resolution(name)[0]
            if resolution not in self.resolutions_allow:
                self.reason += " Resolution not allowed ({0})".format(
                    resolution)
                return False

        if self.filter_title:
            if not all(
                    map(lambda match: match in name, re.split(
                        r'\s', self.title))):
                self.reason += " Name mismatch"
                return False

        if self.require_keywords and get_setting('require_keywords', bool):
            for required in self.require_keywords:
                if not self.included(name, keys=[required]):
                    self.reason += " Missing required keyword"
                    return False

        if not self.included_rx(name,
                                keys=self.releases_allow) and get_setting(
                                    'require_release_type', bool):
            self.reason += " Missing release type keyword"
            return False

        if self.included_rx(name, keys=self.releases_deny) and get_setting(
                'require_release_type', bool):
            self.reason += " Blocked by release type keyword"
            return False

        if size and not self.in_size_range(size) and get_setting(
                'require_size', bool):
            self.reason += " Size out of range ({0})".format(size)
            return False

        return True

Exemple #2

0

Afficher le fichier

Fichier : filtering.py Projet : sigmaxt/script.elementum.burst

    def process_keywords(self, provider, text):
        """ Processes the query payload from a provider's keyword definitions

        Args:
            provider (str): Provider ID
            text     (str): Keyword placeholders from definitions, ie. {title}

        Returns:
            str: Processed query keywords
        """
        keywords = self.read_keywords(text)
        replacing = get_setting("filter_quotes", bool)

        for keyword in keywords:
            keyword = keyword.lower()
            if 'title' in keyword:
                title = self.info["title"]
                language = definitions[provider]['language']
                use_language = None
                if ':' in keyword:
                    use_language = keyword.split(':')[1].lower()
                if provider not in self.language_exceptions and \
                   (use_language or self.kodi_language) and \
                   'titles' in self.info and self.info['titles']:
                    try:
                        if self.kodi_language and self.kodi_language in self.info[
                                'titles']:
                            use_language = self.kodi_language
                        if use_language not in self.info['titles']:
                            use_language = language
                            if 'original' in self.info['titles']:
                                title = self.info['titles']['original']
                        if use_language in self.info['titles'] and self.info[
                                'titles'][use_language]:
                            title = self.info['titles'][use_language]
                            title = normalize_string(title)
                            log.info("[%s] Using translated '%s' title %s" %
                                     (provider, use_language, repr(title)))
                            log.debug(
                                "[%s] Translated titles from Elementum: %s" %
                                (provider, repr(self.info['titles'])))
                    except Exception as e:
                        import traceback
                        log.error("%s failed with: %s" % (provider, repr(e)))
                        map(log.debug, traceback.format_exc().split("\n"))
                text = text.replace('{%s}' % keyword, title)

            if 'year' in keyword:
                text = text.replace('{%s}' % keyword, str(self.info["year"]))

            if 'season' in keyword:
                if '+' in keyword:
                    keys = keyword.split('+')
                    season = str(self.info["season"] + get_int(keys[1]))
                elif ':' in keyword:
                    keys = keyword.split(':')
                    season = ('%%.%sd' % keys[1]) % self.info["season"]
                else:
                    season = '%s' % self.info["season"]
                text = text.replace('{%s}' % keyword, season)

            if 'episode' in keyword:
                if '+' in keyword:
                    keys = keyword.split('+')
                    episode = str(self.info["episode"] + get_int(keys[1]))
                elif ':' in keyword:
                    keys = keyword.split(':')
                    episode = ('%%.%sd' % keys[1]) % self.info["episode"]
                else:
                    episode = '%s' % self.info["episode"]
                text = text.replace('{%s}' % keyword, episode)

        if replacing:
            text = text.replace(u"'", '')

        return text

Exemple #3

0

Afficher le fichier

    def open(cls,
             url='',
             language='en',
             post_data=None,
             get_data=None,
             use_cache=True,
             charset='utf-8'):
        """
        Open a web page and returns its contents
        :param charset:
        :param use_cache: if it uses the information stored in the cache
        :type use_cache: bool
        :param url: url address web page
        :type url: str or unicode
        :param language: language encoding web page
        :type language: str
        :param post_data: parameters for POST request
        :type post_data: dict
        :param get_data: parameters for GET request
        :type get_data: dict
        :return: True if the web page was opened successfully. False, otherwise.
        """
        if len(url) == 0:
            cls.status = 404
            cls.raw_content = ''
            logger.debug('Empty url')
            return False

        # Check the cache
        cache_file = quote_plus(repr(url)) + '.cache'
        if use_cache:
            cache = Storage.open(cache_file, ttl=15)
            if 'content' in cache:
                cls.status = 200
                cls.content = cache['content']
                cls.raw_content = cache['raw_content']
                cls.headers = cache['headers']
                logger.debug('Using cache for %s' % url)
                cache.close()
                logger.debug("Status: " + str(cls.status))
                logger.debug(repr(cls.content))
                return True

        # Creating request
        if post_data is None:
            post_data = {}
        if get_data is not None:
            url += '?' + urlencode(get_data)

        logger.debug(url)
        result = True
        cls.status = 200
        data = urlencode(post_data) if len(post_data) > 0 else None
        req = urllib2.Request(url, data)
        # Cookies and cloudhole info
        cls._read_cookies(url)
        logger.debug("Cookies: %s" % repr(cls._cookies))
        # open cookie jar
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(
            cls._cookies))
        # Headers
        req.add_header('User-Agent', cls.user_agent)
        req.add_header('Content-Language', language)
        req.add_header("Accept-Encoding", "gzip")
        try:
            cls._good_spider()
            # send cookies and open url
            with closing(opener.open(req)) as response:
                cls.headers = response.headers
                cls._save_cookies()
                # borrow from provider.py Steeve
                if response.headers.get('Content-Encoding', '') == 'gzip':
                    import zlib
                    cls.raw_content = zlib.decompressobj(
                        16 + zlib.MAX_WBITS).decompress(response.read())

                else:
                    cls.raw_content = response.read()

                logger.debug("Raw Content:")
                logger.debug(repr(cls.raw_content))
                if 'charset' in cls.raw_content:
                    match = re.search('charset=(\S+)"', cls.raw_content,
                                      re.IGNORECASE)
                    if match:
                        charset = match.group(1)

                logger.debug('charset=' + charset)

        except urllib2.HTTPError as e:
            result = False
            cls.status = e.code
            logger.warning("Status: " + str(cls.status))
            if e.code == 403:
                logger.warning("CloudFlared at %s" % url)

        except urllib2.URLError as e:
            result = False
            cls.status = e.reason
            logger.warning("Status: " + str(cls.status))

        except Exception as e:
            result = False
            logger.error("Error in the browser: %s" % repr(e))

        if result:
            # Normalization
            try:
                cls.content = cls.raw_content.decode(charset)

            except UnicodeDecodeError:
                cls.content = normalize_string(cls.raw_content,
                                               charset=charset)

            except LookupError:
                cls.content = normalize_string(cls.raw_content)

            cls.content = cls.content.replace('<![CDATA[',
                                              '').replace(']]', '')
            cls.content = HTMLParser().unescape(cls.content)
            # Save in the cache
            if use_cache:
                cache = Storage.open(cache_file, ttl=15)
                cache['content'] = cls.content
                cache['raw_content'] = cls.raw_content
                cache['headers'] = cls.headers
                cache.close()

            # status
            logger.debug("Status: " + str(cls.status))
            logger.debug(repr(cls.content))

        return result