Python extract_id Examples, flexget.components.imdb.utils.extract_id Python Examples

Example #1

0

Show file

File: cli.py Project: Flexget/Flexget

def seen_search(options, session=None):
    search_term = options.search_term
    if is_imdb_url(search_term):
        console('IMDB url detected, parsing ID')
        imdb_id = extract_id(search_term)
        if imdb_id:
            search_term = imdb_id
        else:
            console("Could not parse IMDB ID")
    else:
        search_term = '%' + options.search_term + '%'
    seen_entries = db.search(value=search_term, status=None, session=session)
    table_data = []
    for se in seen_entries.all():
        table_data.append(['Title', se.title])
        for sf in se.fields:
            if sf.field.lower() == 'title':
                continue
            table_data.append(['{}'.format(sf.field.upper()), str(sf.value)])
        table_data.append(['Task', se.task])
        table_data.append(['Added', se.added.strftime('%Y-%m-%d %H:%M')])
        if options.table_type != 'porcelain':
            table_data.append(['', ''])
    if not table_data:
        console('No results found for search')
        return
    if options.table_type != 'porcelain':
        del table_data[-1]

    try:
        table = TerminalTable(options.table_type, table_data, wrap_columns=[1])
        table.table.inner_heading_row_border = False
        console(table.output)
    except TerminalTableError as e:
        console('ERROR: %s' % str(e))

Example #2

0

Show file

File: db.py Project: yuyulklk/Flexget

def upgrade(ver, session):
    if ver is None:
        log.info('Converting seen imdb_url to imdb_id for seen movies.')
        field_table = table_schema('seen_field', session)
        for row in session.execute(
            select([field_table.c.id, field_table.c.value], field_table.c.field == 'imdb_url')
        ):
            new_values = {'field': 'imdb_id', 'value': extract_id(row['value'])}
            session.execute(update(field_table, field_table.c.id == row['id'], new_values))
        ver = 1
    if ver == 1:
        field_table = table_schema('seen_field', session)
        log.info('Adding index to seen_field table.')
        Index('ix_seen_field_seen_entry_id', field_table.c.seen_entry_id).create(bind=session.bind)
        ver = 2
    if ver == 2:
        log.info('Adding local column to seen_entry table')
        table_add_column('seen_entry', 'local', Boolean, session, default=False)
        ver = 3
    if ver == 3:
        # setting the default to False in the last migration was broken, fix the data
        log.info('Repairing seen table')
        entry_table = table_schema('seen_entry', session)
        session.execute(update(entry_table, entry_table.c.local == None, {'local': False}))
        ver = 4

    return ver

Example #3

0

Show file

File: imdb_url.py Project: x572b/Flexget

    def on_task_metainfo(self, task, config):
        # check if disabled (value set to false)
        if 'scan_imdb' in task.config:
            if not task.config['scan_imdb']:
                return

        for entry in task.entries:
            # Don't override already populated imdb_ids
            if entry.get('imdb_id', eval_lazy=False):
                continue
            if not entry.get('description'):
                continue
            urls = re.findall(r'\bimdb.com/title/tt\d+\b',
                              entry['description'])
            # Find unique imdb ids
            imdb_ids = [
                _f for _f in set(extract_id(url) for url in urls) if _f
            ]
            if not imdb_ids:
                continue

            if len(imdb_ids) > 1:
                logger.debug('Found multiple imdb ids; not using any of: {}',
                             ' '.join(imdb_ids))
                continue

            entry['imdb_id'] = imdb_ids[0]
            entry['imdb_url'] = make_url(entry['imdb_id'])
            logger.debug('Found imdb url in description {}', entry['imdb_url'])

Example #4

0

Show file

File: imdb_url.py Project: Flexget/Flexget

    def on_task_metainfo(self, task, config):
        # check if disabled (value set to false)
        if 'scan_imdb' in task.config:
            if not task.config['scan_imdb']:
                return

        for entry in task.entries:
            # Don't override already populated imdb_ids
            if entry.get('imdb_id', eval_lazy=False):
                continue
            if 'description' not in entry:
                continue
            urls = re.findall(r'\bimdb.com/title/tt\d+\b', entry['description'])
            # Find unique imdb ids
            imdb_ids = [_f for _f in set(extract_id(url) for url in urls) if _f]
            if not imdb_ids:
                continue

            if len(imdb_ids) > 1:
                log.debug('Found multiple imdb ids; not using any of: %s' % ' '.join(imdb_ids))
                continue

            entry['imdb_id'] = imdb_ids[0]
            entry['imdb_url'] = make_url(entry['imdb_id'])
            log.debug('Found imdb url in description %s' % entry['imdb_url'])

Example #5

0

Show file

    def parse_site(self, url, task):
        """Parse configured url and return releases array"""

        try:
            page = task.requests.get(url).content
        except RequestException as e:
            raise plugin.PluginError('Error getting input page: %s' % e)
        soup = get_soup(page)

        releases = []
        for entry in soup.find_all('div', attrs={'class': 'entry'}):
            release = {}
            title = entry.find('h2')
            if not title:
                log.debug('No h2 entrytitle')
                continue
            release['title'] = title.a.contents[0].strip()

            log.debug('Processing title %s' % (release['title']))

            for link in entry.find_all('a'):
                # no content in the link
                if not link.contents:
                    continue
                link_name = link.contents[0]
                if link_name is None:
                    continue
                if not isinstance(link_name, NavigableString):
                    continue
                link_name = link_name.strip().lower()
                if link.has_attr('href'):
                    link_href = link['href']
                else:
                    continue
                log.debug('found link %s -> %s' % (link_name, link_href))
                # handle imdb link
                if link_name.lower() == 'imdb':
                    log.debug('found imdb link %s' % link_href)
                    release['imdb_id'] = extract_id(link_href)

                # test if entry with this url would be rewritable by known plugins (ie. downloadable)
                temp = {}
                temp['title'] = release['title']
                temp['url'] = link_href
                urlrewriting = plugin.get('urlrewriting', self)
                if urlrewriting.url_rewritable(task, temp):
                    release['url'] = link_href
                    log.trace('--> accepting %s (resolvable)' % link_href)
                else:
                    log.trace('<-- ignoring %s (non-resolvable)' % link_href)

            # reject if no torrent link
            if 'url' not in release:
                from flexget.utils.log import log_once
                log_once('%s skipped due to missing or unsupported (unresolvable) download link' % (release['title']),
                         log)
            else:
                releases.append(release)

        return releases

Example #6

0

Show file

File: cli.py Project: ksurl/Flexget

def seen_search(options, session=None):
    search_term = options.search_term
    if is_imdb_url(search_term):
        console('IMDB url detected, parsing ID')
        imdb_id = extract_id(search_term)
        if imdb_id:
            search_term = imdb_id
        else:
            console("Could not parse IMDB ID")
    else:
        search_term = '%' + options.search_term + '%'
    seen_entries = db.search(value=search_term, status=None, session=session)
    table = TerminalTable('Field', 'Value', table_type=options.table_type)
    for se in seen_entries.all():
        table.add_row('Title', se.title)
        for sf in se.fields:
            if sf.field.lower() == 'title':
                continue
            table.add_row('{}'.format(sf.field.upper()), str(sf.value))
        table.add_row('Task', se.task)
        table.add_row('Added',
                      se.added.strftime('%Y-%m-%d %H:%M'),
                      end_section=True)
    if not table.rows:
        console('No results found for search')
        return
    console(table)

Example #7

0

Show file

def seen_add(manager: Manager, options):
    DEFAULT_TASK = 'cli_add'

    seen_name = options.add_value
    if is_imdb_url(seen_name):
        console('IMDB url detected, try to parse ID')
        imdb_id = extract_id(seen_name)
        if imdb_id:
            seen_name = imdb_id
        else:
            console("Could not parse IMDB ID")

    task = DEFAULT_TASK
    local = None
    if options.task and not options.task in manager.tasks:
        console(f"Task `{options.task}` not in config")
        return
    else:
        task = options.task
        local = True

    db.add(seen_name, task, {'cli_add': seen_name}, local=local)

    if task == DEFAULT_TASK:
        console(f'Added `{seen_name}` as seen. This will affect all tasks.')
    else:
        console(f'Added `{seen_name}` as seen. This will affect `{task}` task.')

Example #8

0

Show file

def seen_forget(manager: Manager, options):
    forget_name = options.forget_value
    if is_imdb_url(forget_name):
        imdb_id = extract_id(forget_name)
        if imdb_id:
            forget_name = imdb_id

    tasks = None
    if options.tasks:
        tasks = []
        for task in options.tasks:
            try:
                tasks.extend(m for m in manager.matching_tasks(task) if m not in tasks)
            except ValueError as e:
                console(e)
                continue

    # If tasks are specified it should use pattern matching as search
    if tasks:
        forget_name = forget_name.replace("%", "\\%").replace("_", "\\_")
        forget_name = forget_name.replace("*", "%").replace("?", "_")

    count, fcount = db.forget(forget_name, tasks=tasks, test=options.test)
    console(f'Removed {count} titles ({fcount} fields)')
    manager.config_changed()

Example #9

0

Show file

def seen_search(options, session=None):
    search_term = options.search_term
    if is_imdb_url(search_term):
        console('IMDB url detected, parsing ID')
        imdb_id = extract_id(search_term)
        if imdb_id:
            search_term = imdb_id
        else:
            console("Could not parse IMDB ID")
    else:
        search_term = '%' + options.search_term + '%'
    seen_entries = db.search(value=search_term, status=None, session=session)
    table_data = []
    for se in seen_entries.all():
        table_data.append(['Title', se.title])
        for sf in se.fields:
            if sf.field.lower() == 'title':
                continue
            table_data.append(['{}'.format(sf.field.upper()), str(sf.value)])
        table_data.append(['Task', se.task])
        table_data.append(['Added', se.added.strftime('%Y-%m-%d %H:%M')])
        if options.table_type != 'porcelain':
            table_data.append(['', ''])
    if not table_data:
        console('No results found for search')
        return
    if options.table_type != 'porcelain':
        del table_data[-1]

    try:
        table = TerminalTable(options.table_type, table_data, wrap_columns=[1])
        table.table.inner_heading_row_border = False
        console(table.output)
    except TerminalTableError as e:
        console('ERROR: %s' % str(e))

Example #10

0

Show file

File: imdb_watchlist.py Project: Flexget/Flexget

    def parse_html_list(self, task, config, url, params, headers):
        page = self.fetch_page(task, url, params, headers)
        soup = get_soup(page.text)
        try:
            item_text = soup.find('div', class_='lister-total-num-results').string.split()
            total_item_count = int(item_text[0].replace(',', ''))
            log.verbose('imdb list contains %d items', total_item_count)
        except AttributeError:
            total_item_count = 0
        except (ValueError, TypeError) as e:
            # TODO Something is wrong if we get a ValueError, I think
            raise plugin.PluginError(
                'Received invalid movie count: %s ; %s'
                % (soup.find('div', class_='lister-total-num-results').string, e)
            )

        if not total_item_count:
            log.verbose('No movies were found in imdb list: %s', config['list'])
            return

        entries = []
        items_processed = 0
        page_no = 1
        while items_processed < total_item_count:
            # Fetch the next page unless we've just begun
            if items_processed:
                page_no += 1
                params['page'] = page_no
                page = self.fetch_page(task, url, params, headers)
                soup = get_soup(page.text)

            items = soup.find_all('div', class_='lister-item')
            if not items:
                log.debug('no items found on page: %s, aborting.', url)
                break
            log.debug('%d items found on page %d', len(items), page_no)

            for item in items:
                items_processed += 1
                a = item.find('h3', class_='lister-item-header').find('a')
                if not a:
                    log.debug('no title link found for row, skipping')
                    continue

                link = ('http://www.imdb.com' + a.get('href')).rstrip('/')
                entry = Entry()
                entry['title'] = a.text
                try:
                    year = int(item.find('span', class_='lister-item-year').text)
                    entry['title'] += ' (%s)' % year
                    entry['imdb_year'] = year
                except (ValueError, TypeError):
                    pass
                entry['url'] = link
                entry['imdb_id'] = extract_id(link)
                entry['imdb_name'] = entry['title']
                entries.append(entry)

        return entries

Example #11

0

Show file

def seen_forget(manager, options):
    forget_name = options.forget_value
    if is_imdb_url(forget_name):
        imdb_id = extract_id(forget_name)
        if imdb_id:
            forget_name = imdb_id

    count, fcount = db.forget(forget_name)
    console('Removed %s titles (%s fields)' % (count, fcount))
    manager.config_changed()

Example #12

0

Show file

File: cli.py Project: Flexget/Flexget

def seen_forget(manager, options):
    forget_name = options.forget_value
    if is_imdb_url(forget_name):
        imdb_id = extract_id(forget_name)
        if imdb_id:
            forget_name = imdb_id

    count, fcount = db.forget(forget_name)
    console('Removed %s titles (%s fields)' % (count, fcount))
    manager.config_changed()

Example #13

0

Show file

File: cli.py Project: Flexget/Flexget

def seen_add(options):
    seen_name = options.add_value
    if is_imdb_url(seen_name):
        console('IMDB url detected, try to parse ID')
        imdb_id = extract_id(seen_name)
        if imdb_id:
            seen_name = imdb_id
        else:
            console("Could not parse IMDB ID")
    db.add(seen_name, 'cli_add', {'cli_add': seen_name})
    console('Added %s as seen. This will affect all tasks.' % seen_name)

Example #14

0

Show file

def seen_add(options):
    seen_name = options.add_value
    if is_imdb_url(seen_name):
        console('IMDB url detected, try to parse ID')
        imdb_id = extract_id(seen_name)
        if imdb_id:
            seen_name = imdb_id
        else:
            console("Could not parse IMDB ID")
    db.add(seen_name, 'cli_add', {'cli_add': seen_name})
    console('Added %s as seen. This will affect all tasks.' % seen_name)

Example #15

0

Show file

    def search(self, task, entry, config):
        if not session.cookies:
            try:
                login_params = {
                    'username': config['username'],
                    'password': config['password'],
                    'loginkey': config['login_key'],
                }
                r = session.post('https://piratethenet.org/takelogin.php',
                                 data=login_params,
                                 verify=False)
            except requests.RequestException as e:
                log.error('Error while logging in to PtN: %s', e)
                raise plugin.PluginError('Could not log in to PtN')

            passkey = re.search(r'passkey=([\d\w]+)"', r.text)
            if not passkey:
                log.error("It doesn't look like PtN login worked properly.")
                raise plugin.PluginError('PTN cookie info invalid')

        search_params = default_search_params.copy()
        if 'movie_name' in entry:
            if 'movie_year' in entry:
                search_params[
                    'advancedsearchparameters'] = '[year=%s]' % entry[
                        'movie_year']
            searches = [entry['movie_name']]
        else:
            searches = entry.get('search_strings', [entry['title']])

        results = set()
        for search in searches:
            search_params['searchstring'] = search
            try:
                r = session.get('http://piratethenet.org/torrentsutils.php',
                                params=search_params)
            except requests.RequestException as e:
                log.error('Error searching ptn: %s' % e)
                continue
            # html5parser doesn't work properly for some reason
            soup = get_soup(r.text, parser='html.parser')
            for movie in soup.select('.torrentstd'):
                imdb_id = movie.find('a',
                                     href=re.compile('.*imdb\.com/title/tt'))
                if imdb_id:
                    imdb_id = extract_id(imdb_id['href'])
                if imdb_id and 'imdb_id' in entry and imdb_id != entry[
                        'imdb_id']:
                    continue
                results.update(self.create_entries(movie, imdb_id=imdb_id))

        return results

Example #16

0

Show file

File: ptn.py Project: Flexget/Flexget

    def search(self, task, entry, config):
        if not session.cookies:
            try:
                login_params = {
                    'username': config['username'],
                    'password': config['password'],
                    'loginkey': config['login_key'],
                }
                r = session.post(
                    'https://piratethenet.org/takelogin.php', data=login_params, verify=False
                )
            except requests.RequestException as e:
                log.error('Error while logging in to PtN: %s', e)
                raise plugin.PluginError('Could not log in to PtN')

            passkey = re.search(r'passkey=([\d\w]+)"', r.text)
            if not passkey:
                log.error("It doesn't look like PtN login worked properly.")
                raise plugin.PluginError('PTN cookie info invalid')

        search_params = default_search_params.copy()
        if 'movie_name' in entry:
            if 'movie_year' in entry:
                search_params['advancedsearchparameters'] = '[year=%s]' % entry['movie_year']
            searches = [entry['movie_name']]
        else:
            searches = entry.get('search_strings', [entry['title']])

        results = set()
        for search in searches:
            search_params['searchstring'] = search
            try:
                r = session.get('http://piratethenet.org/torrentsutils.php', params=search_params)
            except requests.RequestException as e:
                log.error('Error searching ptn: %s' % e)
                continue
            # html5parser doesn't work properly for some reason
            soup = get_soup(r.text, parser='html.parser')
            for movie in soup.select('.torrentstd'):
                imdb_id = movie.find('a', href=re.compile('.*imdb\.com/title/tt'))
                if imdb_id:
                    imdb_id = extract_id(imdb_id['href'])
                if imdb_id and 'imdb_id' in entry and imdb_id != entry['imdb_id']:
                    continue
                results.update(self.create_entries(movie, imdb_id=imdb_id))

        return results

Example #17

0

Show file

File: rlslog.py Project: sirtyface/Flexget-1

    def parse_rlslog(self, rlslog_url, task):
        """
        :param rlslog_url: Url to parse from
        :param task: Task instance
        :return: List of release dictionaries
        """

        # BeautifulSoup doesn't seem to work if data is already decoded to unicode :/
        soup = get_soup(task.requests.get(rlslog_url, timeout=25).content)

        releases = []
        for entry in soup.find_all('div', attrs={'class': 'entry'}):
            release = {}
            h3 = entry.find('h3', attrs={'class': 'entrytitle'})
            if not h3:
                logger.debug('FAIL: No h3 entrytitle')
                continue
            release['title'] = h3.a.contents[0].strip()
            entrybody = entry.find('div', attrs={'class': 'entrybody'})
            if not entrybody:
                logger.debug('FAIL: No entrybody')
                continue

            logger.trace('Processing title {}', release['title'])

            # find imdb url
            link_imdb = entrybody.find('a',
                                       text=re.compile(r'imdb', re.IGNORECASE))
            if link_imdb:
                release['imdb_id'] = extract_id(link_imdb['href'])
                release['imdb_url'] = link_imdb['href']

            # find google search url
            google = entrybody.find('a',
                                    href=re.compile(r'google', re.IGNORECASE))
            if google:
                release['url'] = google['href']
                releases.append(release)
            else:
                log_once(
                    '%s skipped due to missing or unsupported download link' %
                    (release['title']),
                    logger,
                )

        return releases

Example #18

0

Show file

File: tmdb_lookup.py Project: Flexget/Flexget

    def lazy_loader(self, entry, language):
        """Does the lookup for this entry and populates the entry fields."""
        lookup = plugin.get('api_tmdb', self).lookup

        imdb_id = entry.get('imdb_id', eval_lazy=False) or extract_id(
            entry.get('imdb_url', eval_lazy=False)
        )
        try:
            with Session() as session:
                movie = lookup(
                    smart_match=entry['title'],
                    tmdb_id=entry.get('tmdb_id', eval_lazy=False),
                    imdb_id=imdb_id,
                    language=language,
                    session=session,
                )
                entry.update_using_map(self.field_map, movie)
        except LookupError:
            log_once('TMDB lookup failed for %s' % entry['title'], log, logging.WARN)

Example #19

0

Show file

File: rlslog.py Project: Flexget/Flexget

    def parse_rlslog(self, rlslog_url, task):
        """
        :param rlslog_url: Url to parse from
        :param task: Task instance
        :return: List of release dictionaries
        """

        # BeautifulSoup doesn't seem to work if data is already decoded to unicode :/
        soup = get_soup(task.requests.get(rlslog_url, timeout=25).content)

        releases = []
        for entry in soup.find_all('div', attrs={'class': 'entry'}):
            release = {}
            h3 = entry.find('h3', attrs={'class': 'entrytitle'})
            if not h3:
                log.debug('FAIL: No h3 entrytitle')
                continue
            release['title'] = h3.a.contents[0].strip()
            entrybody = entry.find('div', attrs={'class': 'entrybody'})
            if not entrybody:
                log.debug('FAIL: No entrybody')
                continue

            log.trace('Processing title %s' % (release['title']))

            # find imdb url
            link_imdb = entrybody.find('a', text=re.compile(r'imdb', re.IGNORECASE))
            if link_imdb:
                release['imdb_id'] = extract_id(link_imdb['href'])
                release['imdb_url'] = link_imdb['href']

            # find google search url
            google = entrybody.find('a', href=re.compile(r'google', re.IGNORECASE))
            if google:
                release['url'] = google['href']
                releases.append(release)
            else:
                log_once(
                    '%s skipped due to missing or unsupported download link' % (release['title']),
                    log,
                )

        return releases

Example #20

0

Show file

File: tmdb_lookup.py Project: pospqsjac/Flexget

    def lazy_loader(self, entry, language):
        """Does the lookup for this entry and populates the entry fields."""
        lookup = plugin.get('api_tmdb', self).lookup

        imdb_id = entry.get('imdb_id', eval_lazy=False) or extract_id(
            entry.get('imdb_url', eval_lazy=False))
        try:
            with Session() as session:
                movie = lookup(
                    smart_match=entry['title'],
                    tmdb_id=entry.get('tmdb_id', eval_lazy=False),
                    imdb_id=imdb_id,
                    language=language,
                    session=session,
                )
                entry.update_using_map(self.field_map, movie)
        except LookupError:
            log_once('TMDB lookup failed for %s' % entry['title'], log,
                     logging.WARN)

Example #21

0

Show file

def seen_search(manager: Manager, options, session=None):
    search_term = options.search_term
    if is_imdb_url(search_term):
        console('IMDB url detected, parsing ID')
        imdb_id = extract_id(search_term)
        if imdb_id:
            search_term = imdb_id
        else:
            console("Could not parse IMDB ID")
    else:
        search_term = search_term.replace("%", "\\%").replace("_", "\\_")
        search_term = search_term.replace("*", "%").replace("?", "_")

    tasks = None
    if options.tasks:
        tasks = []
        for task in options.tasks:
            try:
                tasks.extend(m for m in manager.matching_tasks(task) if m not in tasks)
            except ValueError as e:
                console(e)
                continue

    seen_entries = db.search(value=search_term, status=None, tasks=tasks, session=session)
    table = TerminalTable('Field', 'Value', table_type=options.table_type)
    for se in seen_entries.all():
        table.add_row('Title', se.title)
        for sf in se.fields:
            if sf.field.lower() == 'title':
                continue
            table.add_row('{}'.format(sf.field.upper()), str(sf.value))
        table.add_row('Task', se.task)
        if se.local:
            table.add_row('Local', 'Yes')
        table.add_row('Added', se.added.strftime('%Y-%m-%d %H:%M'), end_section=True)
    if not table.rows:
        console('No results found for search')
        return
    console(table)

Example #22

0

Show file

File: imdb_lookup.py Project: pospqsjac/Flexget

class ImdbLookup:
    """
    Retrieves imdb information for entries.
    Also provides imdb lookup functionality to all other imdb related plugins.

    Example::

        imdb_lookup: yes
    """

    field_map = {
        'imdb_url':
        'url',
        'imdb_id':
        lambda movie: extract_id(movie.url),
        'imdb_name':
        'title',
        'imdb_original_name':
        'original_title',
        'imdb_photo':
        'photo',
        'imdb_plot_outline':
        'plot_outline',
        'imdb_score':
        'score',
        'imdb_votes':
        'votes',
        'imdb_meta_score':
        'meta_score',
        'imdb_year':
        'year',
        'imdb_genres':
        lambda movie: [genre.name for genre in movie.genres],
        'imdb_languages':
        lambda movie: [lang.language.name for lang in movie.languages],
        'imdb_actors':
        lambda movie: dict(
            (actor.imdb_id, actor.name) for actor in movie.actors),
        'imdb_directors':
        lambda movie: dict(
            (director.imdb_id, director.name) for director in movie.directors),
        'imdb_writers':
        lambda movie: dict(
            (writer.imdb_id, writer.name) for writer in movie.writers),
        'imdb_mpaa_rating':
        'mpaa_rating',
        # Generic fields filled by all movie lookup plugins:
        'movie_name':
        'title',
        'movie_year':
        'year',
    }

    schema = {'type': 'boolean'}

    @plugin.priority(130)
    def on_task_metainfo(self, task, config):
        if not config:
            return
        for entry in task.entries:
            self.register_lazy_fields(entry)

    def register_lazy_fields(self, entry):
        entry.register_lazy_func(self.lazy_loader, self.field_map)

    def lazy_loader(self, entry):
        """Does the lookup for this entry and populates the entry fields."""
        try:
            self.lookup(entry)
        except plugin.PluginError as e:
            log_once(str(e.value).capitalize(), logger=log)

    @with_session
    def imdb_id_lookup(self,
                       movie_title=None,
                       movie_year=None,
                       raw_title=None,
                       session=None):
        """
        Perform faster lookup providing just imdb_id.
        Falls back to using basic lookup if data cannot be found from cache.

        .. note::

           API will be changed, it's dumb to return None on errors AND
           raise PluginError on some else

        :param movie_title: Name of the movie
        :param raw_title: Raw entry title
        :return: imdb id or None
        :raises PluginError: Failure reason
        """
        if movie_title:
            log.debug('imdb_id_lookup: trying with title: %s' % movie_title)
            query = session.query(
                db.Movie).filter(db.Movie.title == movie_title)
            if movie_year is not None:
                query = query.filter(db.Movie.year == movie_year)
            movie = query.first()
            if movie:
                log.debug('--> success! got %s returning %s' %
                          (movie, movie.imdb_id))
                return movie.imdb_id
        if raw_title:
            log.debug('imdb_id_lookup: trying cache with: %s' % raw_title)
            result = (session.query(db.SearchResult).filter(
                db.SearchResult.title == raw_title).first())
            if result:
                # this title is hopeless, give up ..
                if result.fails:
                    return None
                log.debug('--> success! got %s returning %s' %
                          (result, result.imdb_id))
                return result.imdb_id
        if raw_title:
            # last hope with hacky lookup
            fake_entry = Entry(raw_title, '')
            self.lookup(fake_entry)
            return fake_entry['imdb_id']

    @plugin.internet(log)
    @with_session
    def lookup(self, entry, search_allowed=True, session=None):
        """
        Perform imdb lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """

        from flexget.manager import manager

        if entry.get('imdb_id', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
        elif entry.get('imdb_url', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
        elif entry.get('title', eval_lazy=False):
            log.debug('lookup for %s' % entry['title'])
        else:
            raise plugin.PluginError(
                'looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.'
            )

        # if imdb_id is included, build the url.
        if entry.get('imdb_id', eval_lazy=False) and not entry.get(
                'imdb_url', eval_lazy=False):
            entry['imdb_url'] = make_url(entry['imdb_id'])

        # make sure imdb url is valid
        if entry.get('imdb_url', eval_lazy=False):
            imdb_id = extract_id(entry['imdb_url'])
            if imdb_id:
                entry['imdb_url'] = make_url(imdb_id)
            else:
                log.debug('imdb url %s is invalid, removing it' %
                          entry['imdb_url'])
                entry['imdb_url'] = ''

        # no imdb_url, check if there is cached result for it or if the
        # search is known to fail
        if not entry.get('imdb_url', eval_lazy=False):
            result = (session.query(db.SearchResult).filter(
                db.SearchResult.title == entry['title']).first())
            if result:
                # TODO: 1.2 this should really be checking task.options.retry
                if result.fails and not manager.options.execute.retry:
                    # this movie cannot be found, not worth trying again ...
                    log.debug('%s will fail lookup' % entry['title'])
                    raise plugin.PluginError('IMDB lookup failed for %s' %
                                             entry['title'])
                else:
                    if result.url:
                        log.trace('Setting imdb url for %s from db' %
                                  entry['title'])
                        entry['imdb_id'] = result.imdb_id
                        entry['imdb_url'] = result.url

        # no imdb url, but information required, try searching
        if not entry.get('imdb_url', eval_lazy=False) and search_allowed:
            log.verbose('Searching from imdb `%s`' % entry['title'])
            search = ImdbSearch()
            search_name = entry.get('movie_name',
                                    entry['title'],
                                    eval_lazy=False)
            search_result = search.smart_match(search_name)
            if search_result:
                entry['imdb_url'] = search_result['url']
                # store url for this movie, so we don't have to search on every run
                result = db.SearchResult(entry['title'], entry['imdb_url'])
                session.add(result)
                session.commit()
                log.verbose('Found %s' % (entry['imdb_url']))
            else:
                log_once(
                    'IMDB lookup failed for %s' % entry['title'],
                    log,
                    logging.WARN,
                    session=session,
                )
                # store FAIL for this title
                result = db.SearchResult(entry['title'])
                result.fails = True
                session.add(result)
                session.commit()
                raise plugin.PluginError('Title `%s` lookup failed' %
                                         entry['title'])

        # check if this imdb page has been parsed & cached
        movie = session.query(
            db.Movie).filter(db.Movie.url == entry['imdb_url']).first()

        # If we have a movie from cache, we are done
        if movie and not movie.expired:
            entry.update_using_map(self.field_map, movie)
            return

        # Movie was not found in cache, or was expired
        if movie is not None:
            if movie.expired:
                log.verbose('Movie `%s` details expired, refreshing ...' %
                            movie.title)
            # Remove the old movie, we'll store another one later.
            session.query(db.MovieLanguage).filter(
                db.MovieLanguage.movie_id == movie.id).delete()
            session.query(
                db.Movie).filter(db.Movie.url == entry['imdb_url']).delete()
            session.commit()

        # search and store to cache
        if 'title' in entry:
            log.verbose('Parsing imdb for `%s`' % entry['title'])
        else:
            log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
        try:
            movie = self._parse_new_movie(entry['imdb_url'], session)
        except UnicodeDecodeError:
            log.error(
                'Unable to determine encoding for %s. Installing chardet library may help.'
                % entry['imdb_url'])
            # store cache so this will not be tried again
            movie = db.Movie()
            movie.url = entry['imdb_url']
            session.add(movie)
            session.commit()
            raise plugin.PluginError('UnicodeDecodeError')
        except ValueError as e:
            # TODO: might be a little too broad catch, what was this for anyway? ;P
            if manager.options.debug:
                log.exception(e)
            raise plugin.PluginError(
                'Invalid parameter: %s' % entry['imdb_url'], log)

        for att in [
                'title',
                'score',
                'votes',
                'meta_score',
                'year',
                'genres',
                'languages',
                'actors',
                'directors',
                'writers',
                'mpaa_rating',
        ]:
            log.trace('movie.%s: %s' % (att, getattr(movie, att)))

        # Update the entry fields
        entry.update_using_map(self.field_map, movie)

    def _parse_new_movie(self, imdb_url, session):
        """
        Get Movie object by parsing imdb page and save movie into the database.

        :param imdb_url: IMDB url
        :param session: Session to be used
        :return: Newly added Movie
        """
        parser = ImdbParser()
        parser.parse(imdb_url)
        # store to database
        movie = db.Movie()
        movie.photo = parser.photo
        movie.title = parser.name
        movie.original_title = parser.original_name
        movie.score = parser.score
        movie.votes = parser.votes
        movie.meta_score = parser.meta_score
        movie.year = parser.year
        movie.mpaa_rating = parser.mpaa_rating
        movie.plot_outline = parser.plot_outline
        movie.url = imdb_url
        for name in parser.genres:
            genre = session.query(
                db.Genre).filter(db.Genre.name == name).first()
            if not genre:
                genre = db.Genre(name)
            movie.genres.append(genre)  # pylint:disable=E1101
        for index, name in enumerate(parser.languages):
            language = session.query(
                db.Language).filter(db.Language.name == name).first()
            if not language:
                language = db.Language(name)
            movie.languages.append(db.MovieLanguage(language,
                                                    prominence=index))
        for imdb_id, name in parser.actors.items():
            actor = session.query(
                db.Actor).filter(db.Actor.imdb_id == imdb_id).first()
            if not actor:
                actor = db.Actor(imdb_id, name)
            movie.actors.append(actor)  # pylint:disable=E1101
        for imdb_id, name in parser.directors.items():
            director = session.query(
                db.Director).filter(db.Director.imdb_id == imdb_id).first()
            if not director:
                director = db.Director(imdb_id, name)
            movie.directors.append(director)  # pylint:disable=E1101
        for imdb_id, name in parser.writers.items():
            writer = session.query(
                db.Writer).filter(db.Writer.imdb_id == imdb_id).first()
            if not writer:
                writer = db.Writer(imdb_id, name)
            movie.writers.append(writer)  # pylint:disable=E1101
            # so that we can track how long since we've updated the info later
        movie.updated = datetime.now()
        session.add(movie)
        return movie

    @property
    def movie_identifier(self):
        """Returns the plugin main identifier type"""
        return 'imdb_id'

Example #23

0

Show file

File: imdb_lookup.py Project: pospqsjac/Flexget

    def lookup(self, entry, search_allowed=True, session=None):
        """
        Perform imdb lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """

        from flexget.manager import manager

        if entry.get('imdb_id', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
        elif entry.get('imdb_url', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
        elif entry.get('title', eval_lazy=False):
            log.debug('lookup for %s' % entry['title'])
        else:
            raise plugin.PluginError(
                'looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.'
            )

        # if imdb_id is included, build the url.
        if entry.get('imdb_id', eval_lazy=False) and not entry.get(
                'imdb_url', eval_lazy=False):
            entry['imdb_url'] = make_url(entry['imdb_id'])

        # make sure imdb url is valid
        if entry.get('imdb_url', eval_lazy=False):
            imdb_id = extract_id(entry['imdb_url'])
            if imdb_id:
                entry['imdb_url'] = make_url(imdb_id)
            else:
                log.debug('imdb url %s is invalid, removing it' %
                          entry['imdb_url'])
                entry['imdb_url'] = ''

        # no imdb_url, check if there is cached result for it or if the
        # search is known to fail
        if not entry.get('imdb_url', eval_lazy=False):
            result = (session.query(db.SearchResult).filter(
                db.SearchResult.title == entry['title']).first())
            if result:
                # TODO: 1.2 this should really be checking task.options.retry
                if result.fails and not manager.options.execute.retry:
                    # this movie cannot be found, not worth trying again ...
                    log.debug('%s will fail lookup' % entry['title'])
                    raise plugin.PluginError('IMDB lookup failed for %s' %
                                             entry['title'])
                else:
                    if result.url:
                        log.trace('Setting imdb url for %s from db' %
                                  entry['title'])
                        entry['imdb_id'] = result.imdb_id
                        entry['imdb_url'] = result.url

        # no imdb url, but information required, try searching
        if not entry.get('imdb_url', eval_lazy=False) and search_allowed:
            log.verbose('Searching from imdb `%s`' % entry['title'])
            search = ImdbSearch()
            search_name = entry.get('movie_name',
                                    entry['title'],
                                    eval_lazy=False)
            search_result = search.smart_match(search_name)
            if search_result:
                entry['imdb_url'] = search_result['url']
                # store url for this movie, so we don't have to search on every run
                result = db.SearchResult(entry['title'], entry['imdb_url'])
                session.add(result)
                session.commit()
                log.verbose('Found %s' % (entry['imdb_url']))
            else:
                log_once(
                    'IMDB lookup failed for %s' % entry['title'],
                    log,
                    logging.WARN,
                    session=session,
                )
                # store FAIL for this title
                result = db.SearchResult(entry['title'])
                result.fails = True
                session.add(result)
                session.commit()
                raise plugin.PluginError('Title `%s` lookup failed' %
                                         entry['title'])

        # check if this imdb page has been parsed & cached
        movie = session.query(
            db.Movie).filter(db.Movie.url == entry['imdb_url']).first()

        # If we have a movie from cache, we are done
        if movie and not movie.expired:
            entry.update_using_map(self.field_map, movie)
            return

        # Movie was not found in cache, or was expired
        if movie is not None:
            if movie.expired:
                log.verbose('Movie `%s` details expired, refreshing ...' %
                            movie.title)
            # Remove the old movie, we'll store another one later.
            session.query(db.MovieLanguage).filter(
                db.MovieLanguage.movie_id == movie.id).delete()
            session.query(
                db.Movie).filter(db.Movie.url == entry['imdb_url']).delete()
            session.commit()

        # search and store to cache
        if 'title' in entry:
            log.verbose('Parsing imdb for `%s`' % entry['title'])
        else:
            log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
        try:
            movie = self._parse_new_movie(entry['imdb_url'], session)
        except UnicodeDecodeError:
            log.error(
                'Unable to determine encoding for %s. Installing chardet library may help.'
                % entry['imdb_url'])
            # store cache so this will not be tried again
            movie = db.Movie()
            movie.url = entry['imdb_url']
            session.add(movie)
            session.commit()
            raise plugin.PluginError('UnicodeDecodeError')
        except ValueError as e:
            # TODO: might be a little too broad catch, what was this for anyway? ;P
            if manager.options.debug:
                log.exception(e)
            raise plugin.PluginError(
                'Invalid parameter: %s' % entry['imdb_url'], log)

        for att in [
                'title',
                'score',
                'votes',
                'meta_score',
                'year',
                'genres',
                'languages',
                'actors',
                'directors',
                'writers',
                'mpaa_rating',
        ]:
            log.trace('movie.%s: %s' % (att, getattr(movie, att)))

        # Update the entry fields
        entry.update_using_map(self.field_map, movie)

Example #24

0

Show file

File: db.py Project: Flexget/Flexget

 def imdb_id(self):
     return extract_id(self.url)

Example #25

0

Show file

    def parse_html_list(self, task, config, url, params, headers):
        page = self.fetch_page(task, url, params, headers)
        soup = get_soup(page.text)
        try:
            item_text = soup.find(
                'div', class_='lister-total-num-results').string.split()
            total_item_count = int(item_text[0].replace(',', ''))
            logger.verbose('imdb list contains {} items', total_item_count)
        except AttributeError:
            total_item_count = 0
        except (ValueError, TypeError) as e:
            # TODO Something is wrong if we get a ValueError, I think
            raise plugin.PluginError(
                'Received invalid movie count: %s ; %s' % (soup.find(
                    'div', class_='lister-total-num-results').string, e))

        if not total_item_count:
            logger.verbose('No movies were found in imdb list: {}',
                           config['list'])
            return

        entries = []
        items_processed = 0
        page_no = 1
        while items_processed < total_item_count:
            # Fetch the next page unless we've just begun
            if items_processed:
                page_no += 1
                params['page'] = page_no
                page = self.fetch_page(task, url, params, headers)
                soup = get_soup(page.text)

            items = soup.find_all('div', class_='lister-item')
            if not items:
                logger.debug('no items found on page: {}, aborting.', url)
                break
            logger.debug('{} items found on page {}', len(items), page_no)

            for item in items:
                items_processed += 1
                a = item.find('h3', class_='lister-item-header').find('a')
                if not a:
                    logger.debug('no title link found for row, skipping')
                    continue

                link = ('http://www.imdb.com' + a.get('href')).rstrip('/')
                entry = Entry()
                entry['title'] = a.text
                try:
                    year = int(
                        item.find('span', class_='lister-item-year').text)
                    entry['title'] += ' (%s)' % year
                    entry['imdb_year'] = year
                except (ValueError, TypeError):
                    pass
                entry['url'] = link
                entry['imdb_id'] = extract_id(link)
                entry['imdb_name'] = entry['title']
                entries.append(entry)

        return entries

Example #26

0

Show file

File: imdb_lookup.py Project: Flexget/Flexget

    def lookup(self, entry, search_allowed=True, session=None):
        """
        Perform imdb lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """

        from flexget.manager import manager

        if entry.get('imdb_id', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
        elif entry.get('imdb_url', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
        elif entry.get('title', eval_lazy=False):
            log.debug('lookup for %s' % entry['title'])
        else:
            raise plugin.PluginError(
                'looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.'
            )

        # if imdb_id is included, build the url.
        if entry.get('imdb_id', eval_lazy=False) and not entry.get('imdb_url', eval_lazy=False):
            entry['imdb_url'] = make_url(entry['imdb_id'])

        # make sure imdb url is valid
        if entry.get('imdb_url', eval_lazy=False):
            imdb_id = extract_id(entry['imdb_url'])
            if imdb_id:
                entry['imdb_url'] = make_url(imdb_id)
            else:
                log.debug('imdb url %s is invalid, removing it' % entry['imdb_url'])
                entry['imdb_url'] = ''

        # no imdb_url, check if there is cached result for it or if the
        # search is known to fail
        if not entry.get('imdb_url', eval_lazy=False):
            result = (
                session.query(db.SearchResult)
                .filter(db.SearchResult.title == entry['title'])
                .first()
            )
            if result:
                # TODO: 1.2 this should really be checking task.options.retry
                if result.fails and not manager.options.execute.retry:
                    # this movie cannot be found, not worth trying again ...
                    log.debug('%s will fail lookup' % entry['title'])
                    raise plugin.PluginError('IMDB lookup failed for %s' % entry['title'])
                else:
                    if result.url:
                        log.trace('Setting imdb url for %s from db' % entry['title'])
                        entry['imdb_id'] = result.imdb_id
                        entry['imdb_url'] = result.url

        # no imdb url, but information required, try searching
        if not entry.get('imdb_url', eval_lazy=False) and search_allowed:
            log.verbose('Searching from imdb `%s`' % entry['title'])
            search = ImdbSearch()
            search_name = entry.get('movie_name', entry['title'], eval_lazy=False)
            search_result = search.smart_match(search_name)
            if search_result:
                entry['imdb_url'] = search_result['url']
                # store url for this movie, so we don't have to search on every run
                result = db.SearchResult(entry['title'], entry['imdb_url'])
                session.add(result)
                session.commit()
                log.verbose('Found %s' % (entry['imdb_url']))
            else:
                log_once(
                    'IMDB lookup failed for %s' % entry['title'],
                    log,
                    logging.WARN,
                    session=session,
                )
                # store FAIL for this title
                result = db.SearchResult(entry['title'])
                result.fails = True
                session.add(result)
                session.commit()
                raise plugin.PluginError('Title `%s` lookup failed' % entry['title'])

        # check if this imdb page has been parsed & cached
        movie = session.query(db.Movie).filter(db.Movie.url == entry['imdb_url']).first()

        # If we have a movie from cache, we are done
        if movie and not movie.expired:
            entry.update_using_map(self.field_map, movie)
            return

        # Movie was not found in cache, or was expired
        if movie is not None:
            if movie.expired:
                log.verbose('Movie `%s` details expired, refreshing ...' % movie.title)
            # Remove the old movie, we'll store another one later.
            session.query(db.MovieLanguage).filter(db.MovieLanguage.movie_id == movie.id).delete()
            session.query(db.Movie).filter(db.Movie.url == entry['imdb_url']).delete()
            session.commit()

        # search and store to cache
        if 'title' in entry:
            log.verbose('Parsing imdb for `%s`' % entry['title'])
        else:
            log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
        try:
            movie = self._parse_new_movie(entry['imdb_url'], session)
        except UnicodeDecodeError:
            log.error(
                'Unable to determine encoding for %s. Installing chardet library may help.'
                % entry['imdb_url']
            )
            # store cache so this will not be tried again
            movie = db.Movie()
            movie.url = entry['imdb_url']
            session.add(movie)
            session.commit()
            raise plugin.PluginError('UnicodeDecodeError')
        except ValueError as e:
            # TODO: might be a little too broad catch, what was this for anyway? ;P
            if manager.options.debug:
                log.exception(e)
            raise plugin.PluginError('Invalid parameter: %s' % entry['imdb_url'], log)

        for att in [
            'title',
            'score',
            'votes',
            'meta_score',
            'year',
            'genres',
            'languages',
            'actors',
            'directors',
            'writers',
            'mpaa_rating',
        ]:
            log.trace('movie.%s: %s' % (att, getattr(movie, att)))

        # Update the entry fields
        entry.update_using_map(self.field_map, movie)

Example #27

0

Show file

 def imdb_id(self):
     return extract_id(self.url)

Example #28

0

Show file

File: sceper.py Project: Flexget/Flexget

    def parse_site(self, url, task):
        """Parse configured url and return releases array"""

        try:
            page = task.requests.get(url).content
        except RequestException as e:
            raise plugin.PluginError('Error getting input page: %s' % e)
        soup = get_soup(page)

        releases = []
        for entry in soup.find_all('div', attrs={'class': 'entry'}):
            release = {}
            title = entry.find('h2')
            if not title:
                log.debug('No h2 entrytitle')
                continue
            release['title'] = title.a.contents[0].strip()

            log.debug('Processing title %s' % (release['title']))

            for link in entry.find_all('a'):
                # no content in the link
                if not link.contents:
                    continue
                link_name = link.contents[0]
                if link_name is None:
                    continue
                if not isinstance(link_name, NavigableString):
                    continue
                link_name = link_name.strip().lower()
                if link.has_attr('href'):
                    link_href = link['href']
                else:
                    continue
                log.debug('found link %s -> %s' % (link_name, link_href))
                # handle imdb link
                if link_name.lower() == 'imdb':
                    log.debug('found imdb link %s' % link_href)
                    release['imdb_id'] = extract_id(link_href)

                # test if entry with this url would be rewritable by known plugins (ie. downloadable)
                temp = {}
                temp['title'] = release['title']
                temp['url'] = link_href
                urlrewriting = plugin.get('urlrewriting', self)
                if urlrewriting.url_rewritable(task, temp):
                    release['url'] = link_href
                    log.trace('--> accepting %s (resolvable)' % link_href)
                else:
                    log.trace('<-- ignoring %s (non-resolvable)' % link_href)

            # reject if no torrent link
            if 'url' not in release:
                from flexget.utils.log import log_once

                log_once(
                    '%s skipped due to missing or unsupported (unresolvable) download link'
                    % (release['title']),
                    log,
                )
            else:
                releases.append(release)

        return releases