Python clean_title Examples, flexget.utils.search.clean_title Python Examples

Example #1

0

Show file

File: search_sceneaccess.py Project: MikeyCanuck/Flexget

    def search(self, task, entry, config=None):
        """
            Search for entries on SceneAccess
        """

        if not session.cookies:
            log.debug('Logging in to %s...' % URL)
            params = {'username': config['username'],
                      'password': config['password'],
                      'submit': 'come on in'}
            session.post(URL + 'login', data=params)

        if config.has_key('gravity_multiplier'):
            multip = config['gravity_multiplier']
        else:
            multip = 1

        # Prepare queries...
        BASE_URLS = list()
        entries = set()
        for category in self.processCategories(config):
            BASE_URLS.append(URL + '%(url_path)s?method=2%(category_url_string)s' % category)

        # Search...
        for search_string in entry.get('search_strings', [entry['title']]):
            search_string_normalized = normalize_unicode(clean_title(search_string))
            search_string_url_fragment = '&search=' + quote(search_string_normalized.encode('utf8'))

            for url in BASE_URLS:
                url += search_string_url_fragment
                log.debug('Search URL for `%s`: %s' % (search_string, url))

                page = session.get(url).content
                soup = get_soup(page)

                for result in soup.findAll('tr', attrs={'class': 'tt_row'}):
                    entry = Entry()
                    entry['title'] = result.find('a', href=re.compile(r'details\?id=\d+'))['title']
                    entry['url'] = URL + result.find('a', href=re.compile(r'.torrent$'))['href']

                    entry['torrent_seeds'] = result.find('td', attrs={'class': 'ttr_seeders'}).text
                    entry['torrent_leeches'] = result.find('td', attrs={'class': 'ttr_leechers'}).text
                    entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])*multip

                    size = result.find('td', attrs={'class': 'ttr_size'}).next
                    size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                    if size:
                        if size.group(2) == 'GB':
                            entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2)
                        elif size.group(2) == 'MB':
                            entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2)
                        elif size.group(2) == 'KB':
                            entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2)
                        else:
                            entry['content_size'] = int(float(size.group(1)) / 1024 ** 2)

                    entries.add(entry)

        return entries

Example #2

0

Show file

File: sceneaccess.py Project: JorisDeRieck/Flexget

    def search(self, task, entry, config=None):
        """
            Search for entries on SceneAccess
        """

        session = task.requests

        if 'sceneaccess.eu' not in session.domain_limiters:
            session.add_domain_limiter(TimedLimiter('sceneaccess.eu', '7 seconds'))

        if not session.cookies:
            log.debug('Logging in to %s...' % URL)
            params = {'username': config['username'],
                      'password': config['password'],
                      'submit': 'come on in'}
            session.post(URL + 'login', data=params)

        if 'gravity_multiplier' in config:
            multip = config['gravity_multiplier']
        else:
            multip = 1

        # Prepare queries...
        base_urls = list()
        entries = set()
        for category in self.process_categories(config):
            base_urls.append(URL + '%(url_path)s?method=2%(category_url_string)s' % category)

        # Search...
        for search_string in entry.get('search_strings', [entry['title']]):
            search_string_normalized = normalize_unicode(clean_title(search_string))
            search_string_url_fragment = '&search=' + quote(search_string_normalized.encode('utf8'))

            for url in base_urls:
                url += search_string_url_fragment
                log.debug('Search URL for `%s`: %s' % (search_string, url))

                page = session.get(url).content
                soup = get_soup(page)

                for result in soup.findAll('tr', attrs={'class': 'tt_row'}):
                    entry = Entry()
                    entry['title'] = result.find('a', href=re.compile(r'details\?id=\d+'))['title']
                    entry['url'] = URL + result.find('a', href=re.compile(r'.torrent$'))['href']

                    entry['torrent_seeds'] = result.find('td', attrs={'class': 'ttr_seeders'}).text
                    entry['torrent_leeches'] = result.find('td', attrs={'class': 'ttr_leechers'}).text
                    entry['search_sort'] = torrent_availability(entry['torrent_seeds'],
                                                                entry['torrent_leeches']) * multip

                    size = result.find('td', attrs={'class': 'ttr_size'}).text
                    size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                    entry['content_size'] = parse_filesize(size.group(0))

                    entries.add(entry)

        return entries

Example #3

0

Show file

File: kat.py Project: AlinaKay/Flexget

    def search(self, task, entry, config):
        search_strings = [normalize_unicode(s).lower() for s in entry.get('search_strings', [entry['title']])]
        entries = set()
        for search_string in search_strings:
            search_string = clean_title(search_string)
            search_string_url_fragment = search_string
            params = {'rss': 1}
            if config.get('verified'):
                search_string_url_fragment += ' verified:1'
            url = 'https://kat.cr/usearch/%s/' % quote(search_string_url_fragment.encode('utf-8'))
            if config.get('category', 'all') != 'all':
                params['category'] = config['category']

            sorters = [{'field': 'time_add', 'sorder': 'desc'},
                       {'field': 'seeders', 'sorder': 'desc'}]
            for sort in sorters:
                params.update(sort)

                log.debug('requesting: %s' % url)
                try:
                    r = task.requests.get(url, params=params, raise_status=False)
                except RequestException as e:
                    log.warning('Search resulted in: %s' % e)
                    continue
                if not r.content:
                    log.debug('No content returned from search.')
                    continue
                elif r.status_code != 200:
                    log.warning('Search returned %s response code' % r.status_code)
                    continue
                rss = feedparser.parse(r.content)

                ex = rss.get('bozo_exception', False)
                if ex:
                    log.warning('Got bozo_exception (bad feed)')
                    continue

                for item in rss.entries:
                    entry = Entry()
                    entry['title'] = item.title

                    if not item.get('enclosures'):
                        log.warning('Could not get url for entry from KAT. Maybe plugin needs updated?')
                        continue
                    entry['url'] = item.enclosures[0]['url']
                    entry['torrent_seeds'] = int(item.torrent_seeds)
                    entry['torrent_leeches'] = int(item.torrent_peers)
                    entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
                    entry['content_size'] = int(item.torrent_contentlength) / 1024 / 1024
                    entry['torrent_info_hash'] = item.torrent_infohash

                    entries.add(entry)

                if len(rss.entries) < 25:
                    break

        return entries

Example #4

0

Show file

    def search(self, task, entry, config=None):
        config = self.prepare_config(config)

        if not session.cookies:
            log.debug('Logging in to %s...' % URL)
            params = {
                'username': config['username'],
                'password': config['password'],
                'keeplogged': '1',
                'login': '******'
            }
            session.post(URL + 'login.php', data=params)

        cat = ''.join([
            '&' + ('filter_cat[%s]' % id) + '=1' for id in config['category']
        ])
        rls = 'release_type=' + config['type']
        url_params = rls + cat
        multip = config['gravity_multiplier']

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            srch = normalize_unicode(clean_title(search_string))
            srch = '&searchstr=' + quote(srch.encode('utf8'))

            url = URL + 'torrents.php?' + url_params + srch
            log.debug('Fetching URL for `%s`: %s' % (search_string, url))

            page = session.get(url).content
            soup = get_soup(page)

            for result in soup.findAll('tr', attrs={'class': 'torrent'}):
                entry = Entry()
                entry['title'] = result.find('span',
                                             attrs={
                                                 'class': 'torrent_name_link'
                                             }).text
                entry['url'] = URL + result.find(
                    'a', href=re.compile(
                        'torrents\.php\?action=download')).get('href')
                entry['torrent_seeds'], entry['torrent_leeches'] = [
                    r.text for r in result.findAll('td')[-2:]
                ]
                entry['search_sort'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches']) * multip

                size = result.findAll('td')[-4].text
                size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                entry['content_size'] = parse_filesize(size.group(0))

                entries.add(entry)
        return entries

Example #5

0

Show file

File: torrentshack.py Project: awesome-python/Flexget

    def search(self, task, entry, config=None):
        config = self.prepare_config(config)

        if not session.cookies:
            log.debug('Logging in to %s...' % URL)
            params = {
                'username': config['username'],
                'password': config['password'],
                'keeplogged': '1',
                'login': '******'
            }
            session.post(URL + 'login.php', data=params)

        cat = ''.join(['&' + ('filter_cat[%s]' % id) + '=1' for id in config['category']])
        rls = 'release_type=' + config['type']
        url_params = rls + cat
        multip = config['gravity_multiplier']

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            srch = normalize_unicode(clean_title(search_string))
            srch = '&searchstr=' + quote(srch.encode('utf8'))

            url = URL + 'torrents.php?' + url_params + srch
            log.debug('Fetching URL for `%s`: %s' % (search_string, url))

            page = session.get(url).content
            soup = get_soup(page)

            for result in soup.findAll('tr', attrs={'class': 'torrent'}):
                entry = Entry()
                entry['title'] = result.find('span', attrs={'class': 'torrent_name_link'}).text
                entry['url'] = URL + result.find('a', href=re.compile('torrents\.php\?action=download')).get('href')
                entry['torrent_seeds'], entry['torrent_leeches'] = [r.text for r in result.findAll('td')[-2:]]
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) * multip

                size = result.findAll('td')[-4].text
                size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                if size:
                    if size.group(2) == 'GB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2)
                    elif size.group(2) == 'MB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2)
                    elif size.group(2) == 'KB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2)
                    else:
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2)

                entries.add(entry)
        return entries

Example #6

0

Show file

File: search_elite.py Project: phyrox-eh/flexget-elitetorrent

    def search(self, entry, config):

        session = Session()
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]): #[entry['series_name']]:#
            search_string_normalized = normalize_unicode(clean_title(search_string)).encode('utf8')
            search_string_normalized = search_string_normalized.replace(' ','+')
            url = 'http://www.elitetorrent.net/busqueda/'+search_string_normalized
            
            log.debug('Fetching URL for `%s`: %s' % (search_string, url))

            page = session.get(url).content
            soup = get_soup(page)

            for result in soup.findAll('a', 'nombre'):
                entry = Entry()
                entry['title'] = result['title']
                entry['url'] = 'http://www.elitetorrent.net/get-torrent/'+result['href'].split('/')[2]
                log.debug('Adding entry `%s`: %s' % (entry['title'], entry['url']))
                entries.add(entry)

        return entries

Example #7

0

Show file

File: sceneaccess.py Project: shangshanshi/Flexget

    def search(self, task, entry, config=None):
        """
            Search for entries on SceneAccess
        """

        session = task.requests

        if 'sceneaccess.eu' not in session.domain_limiters:
            session.add_domain_limiter(
                TimedLimiter('sceneaccess.eu', '7 seconds'))

        if not session.cookies:
            log.debug('Logging in to %s...' % URL)
            params = {
                'username': config['username'],
                'password': config['password'],
                'submit': 'come on in'
            }
            session.post(URL + 'login', data=params)

        if 'gravity_multiplier' in config:
            multip = config['gravity_multiplier']
        else:
            multip = 1

        # Prepare queries...
        BASE_URLS = list()
        entries = set()
        for category in self.processCategories(config):
            BASE_URLS.append(URL +
                             '%(url_path)s?method=2%(category_url_string)s' %
                             category)

        # Search...
        for search_string in entry.get('search_strings', [entry['title']]):
            search_string_normalized = normalize_unicode(
                clean_title(search_string))
            search_string_url_fragment = '&search=' + quote(
                search_string_normalized.encode('utf8'))

            for url in BASE_URLS:
                url += search_string_url_fragment
                log.debug('Search URL for `%s`: %s' % (search_string, url))

                page = session.get(url).content
                soup = get_soup(page)

                for result in soup.findAll('tr', attrs={'class': 'tt_row'}):
                    entry = Entry()
                    entry['title'] = result.find(
                        'a', href=re.compile(r'details\?id=\d+'))['title']
                    entry['url'] = URL + result.find(
                        'a', href=re.compile(r'.torrent$'))['href']

                    entry['torrent_seeds'] = result.find('td',
                                                         attrs={
                                                             'class':
                                                             'ttr_seeders'
                                                         }).text
                    entry['torrent_leeches'] = result.find('td',
                                                           attrs={
                                                               'class':
                                                               'ttr_leechers'
                                                           }).text
                    entry['search_sort'] = torrent_availability(
                        entry['torrent_seeds'],
                        entry['torrent_leeches']) * multip

                    size = result.find('td', attrs={'class': 'ttr_size'}).text
                    size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                    if size:
                        if size.group(2) == 'GB':
                            entry['content_size'] = int(
                                float(size.group(1)) * 1000**3 / 1024**2)
                        elif size.group(2) == 'MB':
                            entry['content_size'] = int(
                                float(size.group(1)) * 1000**2 / 1024**2)
                        elif size.group(2) == 'KB':
                            entry['content_size'] = int(
                                float(size.group(1)) * 1000 / 1024**2)
                        else:
                            entry['content_size'] = int(
                                float(size.group(1)) / 1024**2)

                    entries.add(entry)

        return entries

Example #8

0

Show file

File: search_torrentshack.py Project: ARLahan/Flexget

    def search(self, entry, config=None):

        if not session.cookies:
            log.debug('Logging in to %s...' % URL)
            params = {
                'username': config['username'],
                'password': config['password'],
                'keeplogged': '1',
                'login': '******'
            }
            session.post(URL + 'login.php', data=params)

        if config.has_key('category'):
            if not isinstance(config['category'], list):
                config['category'] = [config['category']]

            categories_id = list()
            for category in config['category']:
                if not isinstance(category, int):
                    categories_id.append(CATEGORIES.get(category))
                else:
                    categories_id.append(category)
            category_url_fragment = ''.join(
                ['&' + quote('filter_cat[%s]' % id) + '=1' for id in categories_id])
        else:
            category_url_fragment = ''

        if config.has_key('gravity_multiplier'):
            multip = config['gravity_multiplier']
        else:
            multip = 1

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            search_string_normalized = normalize_unicode(clean_title(search_string))
            search_string_url_fragment = 'searchstr=' + quote(search_string_normalized.encode('utf8'))

            url = URL + 'torrents.php?' + search_string_url_fragment + category_url_fragment
            log.debug('Fetching URL for `%s`: %s' % (search_string, url))

            page = session.get(url).content
            soup = get_soup(page)

            for result in soup.findAll('tr', attrs={'class': 'torrent'}):
                entry = Entry()
                entry['title'] = result.find('span', attrs={'class': 'torrent_name_link'}).text
                entry['url'] = URL + result.find('a', href=re.compile('torrents\.php\?action=download')).get('href')
                entry['torrent_seeds'], entry['torrent_leeches'] = [r.text for r in result.findAll('td')[-2:]]
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) * multip

                size = result.findAll('td')[-4].text
                size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                if size:
                    if size.group(2) == 'GB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2)
                    elif size.group(2) == 'MB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2)
                    elif size.group(2) == 'KB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2)
                    else:
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2)

                entries.add(entry)
        return entries

Example #9

0

Show file

File: kat.py Project: edetaillac/Flexget

    def search(self, task, entry, config):
        search_strings = [
            normalize_unicode(s).lower()
            for s in entry.get('search_strings', [entry['title']])
        ]
        entries = set()
        for search_string in search_strings:
            search_string = clean_title(search_string)
            search_string_url_fragment = search_string
            params = {'rss': 1}
            if config.get('verified'):
                search_string_url_fragment += ' verified:1'
            url = 'https://kat.cr/usearch/%s/' % quote(
                search_string_url_fragment.encode('utf-8'))
            if config.get('category', 'all') != 'all':
                params['category'] = config['category']

            sorters = [{
                'field': 'time_add',
                'sorder': 'desc'
            }, {
                'field': 'seeders',
                'sorder': 'desc'
            }]
            for sort in sorters:
                params.update(sort)

                log.debug('requesting: %s' % url)
                try:
                    r = task.requests.get(url,
                                          params=params,
                                          raise_status=False)
                except RequestException as e:
                    log.warning('Search resulted in: %s' % e)
                    continue
                if not r.content:
                    log.debug('No content returned from search.')
                    continue
                elif r.status_code != 200:
                    log.warning('Search returned %s response code' %
                                r.status_code)
                    continue
                rss = feedparser.parse(r.content)

                ex = rss.get('bozo_exception', False)
                if ex:
                    log.warning('Got bozo_exception (bad feed)')
                    continue

                for item in rss.entries:
                    entry = Entry()
                    entry['title'] = item.title

                    if not item.get('enclosures'):
                        log.warning(
                            'Could not get url for entry from KAT. Maybe plugin needs updated?'
                        )
                        continue
                    entry['url'] = item.enclosures[0]['url']
                    entry['torrent_seeds'] = int(item.torrent_seeds)
                    entry['torrent_leeches'] = int(item.torrent_peers)
                    entry['search_sort'] = torrent_availability(
                        entry['torrent_seeds'], entry['torrent_leeches'])
                    entry['content_size'] = int(
                        item.torrent_contentlength) / 1024 / 1024
                    entry['torrent_info_hash'] = item.torrent_infohash

                    entries.add(entry)

                if len(rss.entries) < 25:
                    break

        return entries

Example #10

0

Show file

File: search_sceneaccess.py Project: wicastchen/Flexget

    def search(self, entry, config=None):
        """
            Search for entries on SceneAccess
        """

        try:
            multip = int(config['gravity_multiplier'])
        except KeyError:
            multip = 1

        # Login...
        params = {'username': config['username'],
                  'password': config['password'],
                  'submit': 'come on in'}

        session = Session()
        session.headers = {'User agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:27.0) Gecko/20100101 Firefox/27.0'}
        log.debug('Logging in to %s...' % URL)
        session.post(URL + 'login', data=params)

        # Prepare queries...
        BASE_URLS = list()
        entries = set()
        for category in self.processCategories(config):
            BASE_URLS.append(URL + '%(url_path)s?method=2%(category_url_string)s' % category)

        # Search...
        for search_string in entry.get('search_strings', [entry['title']]):
            search_string_normalized = normalize_unicode(clean_title(search_string))
            search_string_url_fragment = '&search=' + quote(search_string_normalized.encode('utf8'))

            for url in BASE_URLS:
                url += search_string_url_fragment
                log.debug('Search URL for `%s`: %s' % (search_string, url))

                page = session.get(url).content
                soup = get_soup(page)

                for result in soup.findAll('tr', attrs={'class': 'tt_row'}):
                    entry = Entry()
                    entry['title'] = result.find('a', href=re.compile(r'details\?id=\d+'))['title']
                    entry['url'] = URL + result.find('a', href=re.compile(r'.torrent$'))['href']

                    entry['torrent_seeds'] = result.find('td', attrs={'class': 'ttr_seeders'}).string
                    entry['torrent_leeches'] = result.find('td', attrs={'class': 'ttr_leechers'}).string
                    entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])*multip

                    size = result.find('td', attrs={'class': 'ttr_size'}).next
                    size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                    if size:
                        if size.group(2) == 'GB':
                            entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2)
                        elif size.group(2) == 'MB':
                            entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2)
                        elif size.group(2) == 'KB':
                            entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2)
                        else:
                            entry['content_size'] = int(float(size.group(1)) / 1024 ** 2)

                    entries.add(entry)

        return entries

Example #11

0

Show file

    def search(self, entry, config=None):

        try:
            multip = int(config['gravity_multiplier'])
        except KeyError:
            multip = 1

        if not isinstance(config['category'], list):
            config['category'] = [config['category']]

        categories_id = list()
        for category in config['category']:
            if not isinstance(category, int):
                categories_id.append(CATEGORIES.get(category))
            else:
                categories_id.append(category)
        category_url_fragment = ''.join(
            ['&' + quote('filter_cat[%s]' % id) + '=1' for id in categories_id])

        params = {
            'username': config['username'],
            'password': config['password'],
            'keeplogged': '1',
            'login': '******'
        }

        session = Session()
        log.debug('Logging in to %s...' % URL)
        session.post(URL + 'login.php', data=params)

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            search_string_normalized = normalize_unicode(clean_title(search_string))
            search_string_url_fragment = 'searchstr=' + quote(search_string_normalized.encode('utf8'))

            url = URL + 'torrents.php?' + search_string_url_fragment + category_url_fragment
            log.debug('Fetching URL for `%s`: %s' % (search_string, url))

            page = session.get(url).content
            soup = get_soup(page)

            for result in soup.findAll('tr', attrs={'class': 'torrent'}):
                entry = Entry()
                entry['title'] = result.find('span', attrs={'class': 'torrent_name_link'}).string
                entry['url'] = URL + result.find('a',
                                                 href=re.compile(r'torrents.php\?action=download'),
                                                 attrs={'title': 'Download'})['href']
                entry['torrent_seeds'] = result.findAll('td')[-3].string
                entry['torrent_leeches'] = result.findAll('td')[-2].string
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) * multip

                size = result.findAll('td')[-5].string
                size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                if size:
                        if size.group(2) == 'GB':
                            entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2)
                        elif size.group(2) == 'MB':
                            entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2)
                        elif size.group(2) == 'KB':
                            entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2)
                        else:
                            entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2)

                entries.add(entry)
        return entries