Example #1
0
    def parse_what(self, what):
        """Given an imdb id or movie title, looks up from imdb and returns a dict with imdb_id and title keys"""
        imdb_id = extract_id(what)
        title = what

        if imdb_id:
            # Given an imdb id, find title
            parser = ImdbParser()
            try:
                parser.parse('http://www.imdb.com/title/%s' % imdb_id)
            except Exception:
                raise QueueError('Error parsing info from imdb for %s' %
                                 imdb_id)
            if parser.name:
                title = parser.name
        else:
            # Given a title, try to do imdb search for id
            console('Searching imdb for %s' % what)
            search = ImdbSearch()
            result = search.smart_match(what)
            if not result:
                raise QueueError(
                    'ERROR: Unable to find any such movie from imdb, use imdb url instead.'
                )
            imdb_id = extract_id(result['url'])
            title = result['name']

        self.options['imdb_id'] = imdb_id
        self.options['title'] = title
        return {'title': title, 'imdb_id': imdb_id}
Example #2
0
    def on_task_metainfo(self, task, config):
        # check if disabled (value set to false)
        if 'scan_imdb' in task.config:
            if not task.config['scan_imdb']:
                return

        for entry in task.entries:
            # Don't override already populated imdb_ids
            if entry.get('imdb_id', eval_lazy=False):
                continue
            if 'description' not in entry:
                continue
            urls = re.findall(r'\bimdb.com/title/tt\d+\b', entry['description'])
            # Find unique imdb ids
            imdb_ids = [_f for _f in set(extract_id(url) for url in urls) if _f]
            if not imdb_ids:
                continue

            if len(imdb_ids) > 1:
                log.debug('Found multiple imdb ids; not using any of: %s' % ' '.join(imdb_ids))
                continue

            entry['imdb_id'] = imdb_ids[0]
            entry['imdb_url'] = make_url(entry['imdb_id'])
            log.debug('Found imdb url in description %s' % entry['imdb_url'])
Example #3
0
def seen_search(options, session=None):
    search_term = options.search_term
    if is_imdb_url(search_term):
        console('IMDB url detected, parsing ID')
        imdb_id = extract_id(search_term)
        if imdb_id:
            search_term = imdb_id
        else:
            console("Could not parse IMDB ID")
    else:
        search_term = '%' + options.search_term + '%'
    seen_entries = seen.search(value=search_term, status=None, session=session)
    table_data = []
    for se in seen_entries.all():
        table_data.append(['Title', se.title])
        for sf in se.fields:
            if sf.field.lower() == 'title':
                continue
            table_data.append(['{}'.format(sf.field.upper()), str(sf.value)])
        table_data.append(['Task', se.task])
        table_data.append(['Added', se.added.strftime('%Y-%m-%d %H:%M')])
        if options.table_type != 'porcelain':
            table_data.append(['', ''])
    if not table_data:
        console('No results found for search')
        return
    if options.table_type != 'porcelain':
        del table_data[-1]

    try:
        table = TerminalTable(options.table_type, table_data, wrap_columns=[1])
        table.table.inner_heading_row_border = False
        console(table.output)
    except TerminalTableError as e:
        console('ERROR: %s' % str(e))
Example #4
0
def parse_what(what, lookup=True, session=None):
    """
    Determines what information was provided by the search string `what`.
    If `lookup` is true, will fill in other information from tmdb.

    :param what: Can be one of:
      <Movie Title>: Search based on title
      imdb_id=<IMDB id>: search based on imdb id
      tmdb_id=<TMDB id>: search based on tmdb id
    :param bool lookup: Whether missing info should be filled in from tmdb.
    :param session: An existing session that will be used for lookups if provided.
    :rtype: dict
    :return: A dictionary with 'title', 'imdb_id' and 'tmdb_id' keys
    """

    tmdb_lookup = get_plugin_by_name('api_tmdb').instance.lookup

    result = {'title': None, 'imdb_id': None, 'tmdb_id': None}
    result['imdb_id'] = extract_id(what)
    if not result['imdb_id'] and what.startswith('tmdb_id='):
        result['tmdb_id'] = what[8:]
    else:
        result['title'] = what

    if not lookup:
        # If not doing an online lookup we can return here
        return result

    try:
        result['session'] = session
        movie = tmdb_lookup(**result)
    except LookupError, e:
        raise QueueError(e.message)
Example #5
0
def upgrade(ver, session):
    if ver is None:
        log.info('Converting seen imdb_url to imdb_id for seen movies.')
        field_table = table_schema('seen_field', session)
        for row in session.execute(select([field_table.c.id, field_table.c.value], field_table.c.field == 'imdb_url')):
            new_values = {'field': 'imdb_id', 'value': extract_id(row['value'])}
            session.execute(update(field_table, field_table.c.id == row['id'], new_values))
        ver = 1
    if ver == 1:
        field_table = table_schema('seen_field', session)
        log.info('Adding index to seen_field table.')
        Index('ix_seen_field_seen_entry_id', field_table.c.seen_entry_id).create(bind=session.bind)
        ver = 2
    if ver == 2:
        log.info('Adding local column to seen_entry table')
        table_add_column('seen_entry', 'local', Boolean, session, default=False)
        ver = 3
    if ver == 3:
        # setting the default to False in the last migration was broken, fix the data
        log.info('Repairing seen table')
        entry_table = table_schema('seen_entry', session)
        session.execute(update(entry_table, entry_table.c.local == None, {'local': False}))
        ver = 4

    return ver
Example #6
0
def parse_what(what, lookup=True, session=None):
    """
    Determines what information was provided by the search string `what`.
    If `lookup` is true, will fill in other information from tmdb.

    :param what: Can be one of:
      <Movie Title>: Search based on title
      imdb_id=<IMDB id>: search based on imdb id
      tmdb_id=<TMDB id>: search based on tmdb id
    :param bool lookup: Whether missing info should be filled in from tmdb.
    :param session: An existing session that will be used for lookups if provided.
    :rtype: dict
    :return: A dictionary with 'title', 'imdb_id' and 'tmdb_id' keys
    """

    tmdb_lookup = get_plugin_by_name('api_tmdb').instance.lookup

    result = {'title': None, 'imdb_id': None, 'tmdb_id': None}
    result['imdb_id'] = extract_id(what)
    if not result['imdb_id'] and what.startswith('tmdb_id='):
        result['tmdb_id'] = what[8:]
    else:
        result['title'] = what

    if not lookup:
        # If not doing an online lookup we can return here
        return result

    try:
        result['session'] = session
        movie = tmdb_lookup(**result)
    except LookupError, e:
        raise QueueError(e.message)
Example #7
0
    def on_task_metainfo(self, task, config):
        # check if disabled (value set to false)
        if 'scan_imdb' in task.config:
            if not task.config['scan_imdb']:
                return

        for entry in task.entries:
            # Don't override already populated imdb_ids
            if entry.get('imdb_id', eval_lazy=False):
                continue
            if not 'description' in entry:
                continue
            urls = re.findall(r'\bimdb.com/title/tt\d+\b',
                              entry['description'])
            # Find unique imdb ids
            imdb_ids = filter(None, set(extract_id(url) for url in urls))
            if not imdb_ids:
                continue

            if len(imdb_ids) > 1:
                log.debug('Found multiple imdb ids; not using any of: %s' %
                          ' '.join(imdb_ids))
                continue

            entry['imdb_id'] = imdb_ids[0]
            entry['imdb_url'] = make_url(entry['imdb_id'])
            log.debug('Found imdb url in description %s' % entry['imdb_url'])
Example #8
0
def upgrade(ver, session):
    if ver is None:
        log.info("Converting seen imdb_url to imdb_id for seen movies.")
        field_table = table_schema("seen_field", session)
        for row in session.execute(select([field_table.c.id, field_table.c.value], field_table.c.field == "imdb_url")):
            new_values = {"field": "imdb_id", "value": extract_id(row["value"])}
            session.execute(update(field_table, field_table.c.id == row["id"], new_values))
        ver = 1
    if ver == 1:
        field_table = table_schema("seen_field", session)
        log.info("Adding index to seen_field table.")
        Index("ix_seen_field_seen_entry_id", field_table.c.seen_entry_id).create(bind=session.bind)
        ver = 2
    if ver == 2:
        log.info("Adding local column to seen_entry table")
        table_add_column("seen_entry", "local", Boolean, session, default=False)
        ver = 3
    if ver == 3:
        # setting the default to False in the last migration was broken, fix the data
        log.info("Repairing seen table")
        entry_table = table_schema("seen_entry", session)
        session.execute(update(entry_table, entry_table.c.local == None, {"local": False}))
        ver = 4

    return ver
Example #9
0
def seen_search(options, session=None):
    search_term = options.search_term
    if is_imdb_url(search_term):
        console('IMDB url detected, parsing ID')
        imdb_id = extract_id(search_term)
        if imdb_id:
            search_term = imdb_id
        else:
            console("Could not parse IMDB ID")
    else:
        search_term = '%' + options.search_term + '%'
    seen_entries = plugin_seen.search(value=search_term, status=None, session=session)
    table_data = []
    for se in seen_entries.all():
        table_data.append(['Title', se.title])
        for sf in se.fields:
            if sf.field.lower() == 'title':
                continue
            table_data.append(['{}'.format(sf.field.upper()), str(sf.value)])
        table_data.append(['Task', se.task])
        table_data.append(['Added', se.added.strftime('%Y-%m-%d %H:%M')])
        if options.table_type != 'porcelain':
            table_data.append(['', ''])
    if not table_data:
        console('No results found for search')
        return
    if options.table_type != 'porcelain':
        del table_data[-1]

    try:
        table = TerminalTable(options.table_type, table_data, wrap_columns=[1])
        table.table.inner_heading_row_border = False
        console(table.output)
    except TerminalTableError as e:
        console('ERROR: %s' % str(e))
Example #10
0
    def __setitem__(self, key, value):
        # Enforce unicode compatibility. Check for all subclasses of basestring, so that NavigableStrings are also cast
        if isinstance(value, basestring) and not type(value) == unicode:
            try:
                value = unicode(value)
            except UnicodeDecodeError:
                raise EntryUnicodeError(key, value)

        # url and original_url handling
        if key == 'url':
            if not isinstance(value, basestring):
                raise PluginError('Tried to set %r url to %r' % (self.get('title'), value))
            if not 'original_url' in self:
                self['original_url'] = value

        # title handling
        if key == 'title':
            if not isinstance(value, basestring):
                raise PluginError('Tried to set title to %r' % value)

        # TODO: HACK! Implement via plugin once #348 (entry events) is implemented
        # enforces imdb_url in same format
        if key == 'imdb_url' and isinstance(value, basestring):
            imdb_id = extract_id(value)
            if imdb_id:
                value = make_url(imdb_id)
            else:
                log.debug('Tried to set imdb_id to invalid imdb url: %s' % value)
                value = None

        try:
            log.trace('ENTRY SET: %s = %r' % (key, value))
        except Exception, e:
            log.debug('trying to debug key `%s` value threw exception: %s' % (key, e))
Example #11
0
    def on_process_start(self, feed):
        if not feed.manager.options.repair_seen_movies:
            return

        feed.manager.disable_feeds()

        from progressbar import ProgressBar, Percentage, Bar, ETA
        from flexget.manager import Session
        from seen import SeenField
        from flexget.utils.imdb import extract_id

        session = Session()

        index = 0
        count = 0
        total = session.query(SeenField).filter(SeenField.field == u'imdb_url').count()

        widgets = ['Repairing: ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')]
        bar = ProgressBar(widgets=widgets, maxval=total).start()

        for seen in session.query(SeenField).filter(SeenField.field == u'imdb_url').all():
            index += 1
            if index % 5 == 0:
                bar.update(index)
            value = u'http://www.imdb.com/title/%s/' % extract_id(seen.value)
            if value != seen.value:
                count += 1
                seen.value = value
                seen.field = unicode('imdb_url')

        bar.finish()
        session.commit()

        print 'Fixed %s/%s URLs' % (count, total)
Example #12
0
    def __setitem__(self, key, value):
        # Enforce unicode compatibility. Check for all subclasses of basestring, so that NavigableStrings are also cast
        if isinstance(value, basestring) and not type(value) == unicode:
            try:
                value = unicode(value)
            except UnicodeDecodeError:
                raise EntryUnicodeError(key, value)

        # url and original_url handling
        if key == 'url':
            if not isinstance(value, basestring):
                raise PluginError('Tried to set %r url to %r' % (self.get('title'), value))
            self.setdefault('original_url', value)

        # title handling
        if key == 'title':
            if not isinstance(value, basestring):
                raise PluginError('Tried to set title to %r' % value)

        # TODO: HACK! Implement via plugin once #348 (entry events) is implemented
        # enforces imdb_url in same format
        if key == 'imdb_url' and isinstance(value, basestring):
            imdb_id = extract_id(value)
            if imdb_id:
                value = make_url(imdb_id)
            else:
                log.debug('Tried to set imdb_id to invalid imdb url: %s' % value)
                value = None

        try:
            log.trace('ENTRY SET: %s = %r' % (key, value))
        except Exception as e:
            log.debug('trying to debug key `%s` value threw exception: %s' % (key, e))

        dict.__setitem__(self, key, value)
Example #13
0
    def parse_site(self, url, task):
        """Parse configured url and return releases array"""

        try:
            page = task.requests.get(url).content
        except RequestException as e:
            raise plugin.PluginError('Error getting input page: %e' % e)
        soup = get_soup(page)

        releases = []
        for entry in soup.find_all('div', attrs={'class': 'entry'}):
            release = {}
            title = entry.find('h2')
            if not title:
                log.debug('No h2 entrytitle')
                continue
            release['title'] = title.a.contents[0].strip()

            log.debug('Processing title %s' % (release['title']))

            for link in entry.find_all('a'):
                # no content in the link
                if not link.contents:
                    continue
                link_name = link.contents[0]
                if link_name is None:
                    continue
                if not isinstance(link_name, NavigableString):
                    continue
                link_name = link_name.strip().lower()
                if link.has_attr('href'):
                    link_href = link['href']
                else:
                    continue
                log.debug('found link %s -> %s' % (link_name, link_href))
                # handle imdb link
                if link_name.lower() == 'imdb':
                    log.debug('found imdb link %s' % link_href)
                    release['imdb_id'] = extract_id(link_href)

                # test if entry with this url would be rewritable by known plugins (ie. downloadable)
                temp = {}
                temp['title'] = release['title']
                temp['url'] = link_href
                urlrewriting = plugin.get_plugin_by_name('urlrewriting')
                if urlrewriting['instance'].url_rewritable(task, temp):
                    release['url'] = link_href
                    log.trace('--> accepting %s (resolvable)' % link_href)
                else:
                    log.trace('<-- ignoring %s (non-resolvable)' % link_href)

            # reject if no torrent link
            if 'url' not in release:
                from flexget.utils.log import log_once
                log_once('%s skipped due to missing or unsupported (unresolvable) download link' % (release['title']),
                         log)
            else:
                releases.append(release)

        return releases
Example #14
0
    def on_task_input(self, task, config):
        log.verbose('Retrieving list %s ...' % config['list'])

        # Get the imdb list in RSS format
        if config['list'] in ['watchlist', 'ratings', 'checkins']:
            url = 'http://rss.imdb.com/user/%s/%s' % (config['user_id'], config['list'])
        else:
            url = 'http://rss.imdb.com/list/%s' % config['list']
        log.debug('Requesting %s' % url)
        try:
            rss = feedparser.parse(url)
        except LookupError as e:
            raise plugin.PluginError('Failed to parse RSS feed for list `%s` correctly: %s' % (config['list'], e))
        if rss.status == 404:
            raise plugin.PluginError('Unable to get imdb list. Either list is private or does not exist.')

        # Create an Entry for each movie in the list
        entries = []
        title_re = re.compile(r'(.*) \((\d{4})?.*?\)$')
        for entry in rss.entries:
            try:
                # IMDb puts some extra stuff in the titles, e.g. "Battlestar Galactica (2004 TV Series)"
                # Strip out everything but the date
                match = title_re.match(entry.title)
                title = match.group(1)
                if match.group(2):
                    title += ' (%s)' % match.group(2)
                entries.append(
                    Entry(title=title, url=entry.link, imdb_id=extract_id(entry.link), imdb_name=match.group(1)))
            except IndexError:
                log.critical('IndexError! Unable to handle RSS entry: %s' % entry)
        return entries
Example #15
0
def upgrade(ver, session):
    if ver is None:
        log.info('Converting seen imdb_url to imdb_id for seen movies.')
        field_table = table_schema('seen_field', session)
        for row in session.execute(select([field_table.c.id, field_table.c.value], field_table.c.field == 'imdb_url')):
            new_values = {'field': 'imdb_id', 'value': extract_id(row['value'])}
            session.execute(update(field_table, field_table.c.id == row['id'], new_values))
        ver = 1
    if ver == 1:
        field_table = table_schema('seen_field', session)
        log.info('Adding index to seen_field table.')
        Index('ix_seen_field_seen_entry_id', field_table.c.seen_entry_id).create(bind=session.bind)
        ver = 2
    if ver == 2:
        log.info('Adding local column to seen_entry table')
        table_add_column('seen_entry', 'local', Boolean, session, default=False)
        ver = 3
    if ver == 3:
        # setting the default to False in the last migration was broken, fix the data
        log.info('Repairing seen table')
        entry_table = table_schema('seen_entry', session)
        session.execute(update(entry_table, entry_table.c.local == None, {'local': False}))
        ver = 4

    return ver
Example #16
0
    def parse_site(self, url, task):
        """Parse configured url and return releases array"""

        try:
            page = task.requests.get(url).content
        except RequestException as e:
            raise plugin.PluginError('Error getting input page: %e' % e)
        soup = get_soup(page)

        releases = []
        for entry in soup.find_all('div', attrs={'class': 'entry'}):
            release = {}
            title = entry.find('h2')
            if not title:
                log.debug('No h2 entrytitle')
                continue
            release['title'] = title.a.contents[0].strip()

            log.debug('Processing title %s' % (release['title']))

            for link in entry.find_all('a'):
                # no content in the link
                if not link.contents:
                    continue
                link_name = link.contents[0]
                if link_name is None:
                    continue
                if not isinstance(link_name, NavigableString):
                    continue
                link_name = link_name.strip().lower()
                if link.has_attr('href'):
                    link_href = link['href']
                else:
                    continue
                log.debug('found link %s -> %s' % (link_name, link_href))
                # handle imdb link
                if link_name.lower() == 'imdb':
                    log.debug('found imdb link %s' % link_href)
                    release['imdb_id'] = extract_id(link_href)

                # test if entry with this url would be rewritable by known plugins (ie. downloadable)
                temp = {}
                temp['title'] = release['title']
                temp['url'] = link_href
                urlrewriting = plugin.get_plugin_by_name('urlrewriting')
                if urlrewriting['instance'].url_rewritable(task, temp):
                    release['url'] = link_href
                    log.trace('--> accepting %s (resolvable)' % link_href)
                else:
                    log.trace('<-- ignoring %s (non-resolvable)' % link_href)

            # reject if no torrent link
            if not 'url' in release:
                from flexget.utils.log import log_once
                log_once('%s skipped due to missing or unsupported (unresolvable) download link' % (release['title']), log)
            else:
                releases.append(release)

        return releases
Example #17
0
def seen_add(options):
    seen_name = options.add_value
    if is_imdb_url(seen_name):
        imdb_id = extract_id(seen_name)
        if imdb_id:
            seen_name = imdb_id
    seen.add(seen_name, "cli_add", {"cli_add": seen_name})
    console("Added %s as seen. This will affect all tasks." % seen_name)
Example #18
0
def seen_add(options):
    seen_name = options.add_value
    if is_imdb_url(seen_name):
        imdb_id = extract_id(seen_name)
        if imdb_id:
            seen_name = imdb_id
    seen.add(seen_name, 'cli_add', {'cli_add': seen_name})
    console('Added %s as seen. This will affect all tasks.' % seen_name)
Example #19
0
    def search(self, task, entry, config):
        if not session.cookies or not session.passkey:
            try:
                login_params = {
                    'username': config['username'],
                    'password': config['password'],
                    'loginkey': config['login_key']
                }
                r = session.post('https://piratethenet.org/takelogin.php',
                                 data=login_params,
                                 verify=False)
            except requests.RequestException as e:
                log.error('Error while logging in to PtN: %s', e)
                raise plugin.PluginError('Could not log in to PtN')

            # Sorty hacky, we'll just store the passkey on the session
            passkey = re.search('passkey=([\d\w]+)"', r.text)
            if passkey:
                session.passkey = passkey.group(1)
            else:
                log.error('PtN cookie info invalid')
                raise plugin.PluginError('PTN cookie info invalid')

        search_params = default_search_params.copy()
        if 'movie_name' in entry:
            if 'movie_year' in entry:
                search_params[
                    'advancedsearchparameters'] = '[year=%s]' % entry[
                        'movie_year']
            searches = [entry['movie_name']]
        else:
            searches = entry.get('search_strings', [entry['title']])

        results = set()
        for search in searches:
            search_params['searchstring'] = search
            try:
                r = session.get('http://piratethenet.org/torrentsutils.php',
                                params=search_params)
            except requests.RequestException as e:
                log.error('Error searching ptn: %s' % e)
                continue
            # html5parser doesn't work properly for some reason
            soup = get_soup(r.text, parser='html.parser')
            for movie in soup.select('.torrentstd'):
                imdb_id = movie.find('a',
                                     href=re.compile('.*imdb\.com/title/tt'))
                if imdb_id:
                    imdb_id = extract_id(imdb_id['href'])
                if imdb_id and 'imdb_id' in entry and imdb_id != entry[
                        'imdb_id']:
                    continue
                results.update(
                    self.create_entries(movie,
                                        passkey=session.passkey,
                                        imdb_id=imdb_id))

        return results
    def parse_html_list(self, task, config, url, params, headers):
        page = self.fetch_page(task, url, params, headers)
        soup = get_soup(page.text)
        try:
            item_text = soup.find('div', class_='lister-total-num-results').string.split()
            total_item_count = int(item_text[0].replace(',', ''))
            log.verbose('imdb list contains %d items', total_item_count)
        except AttributeError:
            total_item_count = 0
        except (ValueError, TypeError) as e:
            # TODO Something is wrong if we get a ValueError, I think
            raise plugin.PluginError('Received invalid movie count: %s ; %s' %
                                     (soup.find('div', class_='lister-total-num-results').string, e))

        if not total_item_count:
            log.verbose('No movies were found in imdb list: %s', config['list'])
            return

        entries = []
        items_processed = 0
        page_no = 1
        while items_processed < total_item_count:
            # Fetch the next page unless we've just begun
            if items_processed:
                page_no += 1
                params['page'] = page_no
                page = self.fetch_page(task, url, params, headers)
                soup = get_soup(page.text)

            items = soup.find_all('div', class_='lister-item')
            if not items:
                log.debug('no items found on page: %s, aborting.', url)
                break
            log.debug('%d items found on page %d', len(items), page_no)

            for item in items:
                items_processed += 1
                a = item.find('h3', class_='lister-item-header').find('a')
                if not a:
                    log.debug('no title link found for row, skipping')
                    continue

                link = ('http://www.imdb.com' + a.get('href')).rstrip('/')
                entry = Entry()
                entry['title'] = a.text
                try:
                    year = int(item.find('span', class_='lister-item-year').text)
                    entry['title'] += ' (%s)' % year
                    entry['imdb_year'] = year
                except (ValueError, TypeError):
                    pass
                entry['url'] = link
                entry['imdb_id'] = extract_id(link)
                entry['imdb_name'] = entry['title']
                entries.append(entry)

        return entries
Example #21
0
File: seen.py Project: kop1/flexget
def upgrade(ver, session):
    if ver is None:
        log.info('Converting seen imdb_url to imdb_id for seen movies.')
        field_table = table_schema('seen_field', session)
        for row in session.execute(select([field_table.c.id, field_table.c.value], field_table.c.field == 'imdb_url')):
            session.execute(update(field_table, field_table.c.id == row['id'],
                    {'field': 'imdb_id', 'value': extract_id(row['value'])}))
        ver = 1
    return ver
Example #22
0
def seen_forget(manager, options):
    forget_name = options.forget_value
    if is_imdb_url(forget_name):
        imdb_id = extract_id(forget_name)
        if imdb_id:
            forget_name = imdb_id

    count, fcount = plugin_seen.forget(forget_name)
    console('Removed %s titles (%s fields)' % (count, fcount))
    manager.config_changed()
Example #23
0
def seen_forget(manager, options):
    forget_name = options.forget_value
    if is_imdb_url(forget_name):
        imdb_id = extract_id(forget_name)
        if imdb_id:
            forget_name = imdb_id

    count, fcount = forget(forget_name)
    console("Removed %s titles (%s fields)" % (count, fcount))
    manager.config_changed()
Example #24
0
def seen_add(options):
    seen_name = options.add_value
    if is_imdb_url(seen_name):
        console('IMDB url detected, try to parse ID')
        imdb_id = extract_id(seen_name)
        if imdb_id:
            seen_name = imdb_id
        else:
            console("Could not parse IMDB ID")
    plugin_seen.add(seen_name, 'cli_add', {'cli_add': seen_name})
    console('Added %s as seen. This will affect all tasks.' % seen_name)
Example #25
0
def seen_add(options):
    seen_name = options.add_value
    if is_imdb_url(seen_name):
        console('IMDB url detected, try to parse ID')
        imdb_id = extract_id(seen_name)
        if imdb_id:
            seen_name = imdb_id
        else:
            console("Could not parse IMDB ID")
    seen.add(seen_name, 'cli_add', {'cli_add': seen_name})
    console('Added %s as seen. This will affect all tasks.' % seen_name)
Example #26
0
class ImdbLookup(object):
    """
        Retrieves imdb information for entries.

        Example:

        imdb_lookup: yes

        Also provides imdb lookup functionality to all other imdb related plugins.
    """

    field_map = {
        'imdb_url': 'url',
        'imdb_id': lambda movie: extract_id(movie.url),
        'imdb_name': 'title',
        'imdb_photo': 'photo',
        'imdb_plot_outline': 'plot_outline',
        'imdb_score': 'score',
        'imdb_votes': 'votes',
        'imdb_year': 'year',
        'imdb_genres': lambda movie: [genre.name for genre in movie.genres],
        'imdb_languages': lambda movie: [lang.language.name for lang in movie.languages],
        'imdb_actors': lambda movie: dict((actor.imdb_id, actor.name) for actor in movie.actors),
        'imdb_directors': lambda movie: dict((director.imdb_id, director.name) for director in movie.directors),
        'imdb_mpaa_rating': 'mpaa_rating',
        # Generic fields filled by all movie lookup plugins:
        'movie_name': 'title',
        'movie_year': 'year'}

    def validator(self):
        from flexget import validator
        return validator.factory('boolean')

    @priority(130)
    def on_task_metainfo(self, task, config):
        if not config:
            return
        for entry in task.entries:
            self.register_lazy_fields(entry)

    def register_lazy_fields(self, entry):
        entry.register_lazy_fields(self.field_map, self.lazy_loader)

    def lazy_loader(self, entry, field):
        """Does the lookup for this entry and populates the entry fields."""
        try:
            self.lookup(entry)
        except PluginError, e:
            log_once(e.value.capitalize(), logger=log)
            # Set all of our fields to None if the lookup failed
            entry.unregister_lazy_fields(self.field_map, self.lazy_loader)
        return entry[field]
Example #27
0
 def lazy_loader(self, entry):
     """Does the lookup for this entry and populates the entry fields."""
     imdb_id = (entry.get('imdb_id', eval_lazy=False) or
                imdb.extract_id(entry.get('imdb_url', eval_lazy=False)))
     try:
         with Session() as session:
             movie = lookup(smart_match=entry['title'],
                            tmdb_id=entry.get('tmdb_id', eval_lazy=False),
                            imdb_id=imdb_id,
                            session=session)
             entry.update_using_map(self.field_map, movie)
     except LookupError:
         log_once('TMDB lookup failed for %s' % entry['title'], log, logging.WARN)
Example #28
0
def seen_add(options):
    seen_name = options.add_value
    if is_imdb_url(seen_name):
        imdb_id = extract_id(seen_name)
        if imdb_id:
            seen_name = imdb_id

    with Session() as session:
        se = SeenEntry(seen_name, "cli_seen")
        sf = SeenField("cli_seen", seen_name)
        se.fields.append(sf)
        session.add(se)
    console("Added %s as seen. This will affect all tasks." % seen_name)
Example #29
0
 def lazy_loader(self, entry, field):
     """Does the lookup for this entry and populates the entry fields."""
     imdb_id = entry.get('imdb_id', eval_lazy=False) or \
               imdb.extract_id(entry.get('imdb_url', eval_lazy=False))
     try:
         movie = lookup(smart_match=entry['title'],
                        tmdb_id=entry.get('tmdb_id', eval_lazy=False),
                        imdb_id=imdb_id)
         entry.update_using_map(self.field_map, movie)
     except LookupError, e:
         log.debug(u'Tmdb lookup for %s failed: %s' % (entry['title'], e.message))
         # Set all of our fields to None if the lookup failed
         entry.unregister_lazy_fields(self.field_map, self.lazy_loader)
Example #30
0
 def lazy_loader(self, entry):
     """Does the lookup for this entry and populates the entry fields."""
     imdb_id = (entry.get('imdb_id', eval_lazy=False) or
                imdb.extract_id(entry.get('imdb_url', eval_lazy=False)))
     try:
         with Session() as session:
             movie = lookup(smart_match=entry['title'],
                            tmdb_id=entry.get('tmdb_id', eval_lazy=False),
                            imdb_id=imdb_id,
                            session=session)
             entry.update_using_map(self.field_map, movie)
     except LookupError:
         log_once('TMDB lookup failed for %s' % entry['title'], log, logging.WARN)
Example #31
0
def seen_add(options):
    seen_name = options.add_value
    if is_imdb_url(seen_name):
        imdb_id = extract_id(seen_name)
        if imdb_id:
            seen_name = imdb_id

    with Session() as session:
        se = SeenEntry(seen_name, 'cli_seen')
        sf = SeenField('cli_seen', seen_name)
        se.fields.append(sf)
        session.add(se)
    console('Added %s as seen. This will affect all tasks.' % seen_name)
Example #32
0
def seen_add(options):
    seen_name = options.add_value
    if is_imdb_url(seen_name):
        imdb_id = extract_id(seen_name)
        if imdb_id:
            seen_name = imdb_id

    with contextlib.closing(Session()) as session:
        se = SeenEntry(seen_name, 'cli_seen')
        sf = SeenField('cli_seen', seen_name)
        se.fields.append(sf)
        session.add(se)
        session.commit()
    console('Added %s as seen. This will affect all tasks.' % seen_name)
Example #33
0
def upgrade(ver, session):
    if ver is None:
        log.info('Converting seen imdb_url to imdb_id for seen movies.')
        field_table = table_schema('seen_field', session)
        for row in session.execute(select([field_table.c.id, field_table.c.value], field_table.c.field == 'imdb_url')):
            new_values = {'field': 'imdb_id', 'value': extract_id(row['value'])}
            session.execute(update(field_table, field_table.c.id == row['id'], new_values))
        ver = 1
    if ver == 1:
        field_table = table_schema('seen_field', session)
        log.info('Adding index to seen_field table.')
        Index('ix_seen_field_seen_entry_id', field_table.c.seen_entry_id).create(bind=session.bind)
        ver = 2
    return ver
Example #34
0
    def on_process_start(self, feed):
        if not feed.manager.options.forget:
            return

        feed.manager.disable_feeds()

        forget_name = unicode(feed.manager.options.forget)
        if is_imdb_url(forget_name):
            imdb_id = extract_id(forget_name)
            if imdb_id:
                forget_name = imdb_id

        count, fcount = forget(forget_name)
        log.info('Removed %s titles (%s fields)' % (count, fcount))
Example #35
0
 def lazy_loader(self, entry, field):
     """Does the lookup for this entry and populates the entry fields."""
     imdb_id = (entry.get('imdb_id', eval_lazy=False) or
                imdb.extract_id(entry.get('imdb_url', eval_lazy=False)))
     try:
         movie = lookup(smart_match=entry['title'],
                        tmdb_id=entry.get('tmdb_id', eval_lazy=False),
                        imdb_id=imdb_id)
         entry.update_using_map(self.field_map, movie)
     except LookupError:
         log_once('TMDB lookup failed for %s' % entry['title'], log, logging.WARN)
         # Set all of our fields to None if the lookup failed
         entry.unregister_lazy_fields(self.field_map, self.lazy_loader)
     return entry[field]
Example #36
0
 def lazy_loader(self, entry, field):
     """Does the lookup for this entry and populates the entry fields."""
     imdb_id = entry.get('imdb_id', eval_lazy=False) or \
               imdb.extract_id(entry.get('imdb_url', eval_lazy=False))
     try:
         movie = lookup(smart_match=entry['title'],
                        tmdb_id=entry.get('tmdb_id', eval_lazy=False),
                        imdb_id=imdb_id)
         entry.update_using_map(self.field_map, movie)
     except LookupError, e:
         log.debug(u'Tmdb lookup for %s failed: %s' %
                   (entry['title'], e.message))
         # Set all of our fields to None if the lookup failed
         entry.unregister_lazy_fields(self.field_map, self.lazy_loader)
Example #37
0
 def lazy_loader(self, entry, field):
     """Does the lookup for this entry and populates the entry fields."""
     imdb_id = entry.get('imdb_id', eval_lazy=False) or \
         imdb.extract_id(entry.get('imdb_url', eval_lazy=False))
     try:
         movie = lookup(smart_match=entry['title'],
                        tmdb_id=entry.get('tmdb_id', eval_lazy=False),
                        imdb_id=imdb_id)
         entry.update_using_map(self.field_map, movie)
     except LookupError:
         log_once('TMDB lookup failed for %s' % entry['title'], log, logging.WARN)
         # Set all of our fields to None if the lookup failed
         entry.unregister_lazy_fields(self.field_map, self.lazy_loader)
     return entry[field]
Example #38
0
def parse_what(what, lookup=True, session=None):
    """
    Determines what information was provided by the search string `what`.
    If `lookup` is true, will fill in other information from tmdb.

    :param what: Can be one of:
      <Movie Title>: Search based on title
      imdb_id=<IMDB id>: search based on imdb id
      tmdb_id=<TMDB id>: search based on tmdb id
    :param bool lookup: Whether missing info should be filled in from tmdb.
    :param session: An existing session that will be used for lookups if provided.
    :rtype: dict
    :return: A dictionary with 'title', 'imdb_id' and 'tmdb_id' keys
    """

    tmdb_lookup = plugin.get_plugin_by_name('api_tmdb').instance.lookup

    result = {'title': None, 'imdb_id': None, 'tmdb_id': None}
    result['imdb_id'] = extract_id(what)
    if not result['imdb_id']:
        if isinstance(what, int):
            result['tmdb_id'] = what
        elif what.startswith('tmdb_id='):
            result['tmdb_id'] = what[8:]
        else:
            result['title'] = what

    if not lookup:
        # If not doing an online lookup we can return here
        return result

    search_entry = Entry(title=result['title'] or '')
    for field in ['imdb_id', 'tmdb_id']:
        if result.get(field):
            search_entry[field] = result[field]
    # Put lazy lookup fields on the search entry
    plugin.get_plugin_by_name('imdb_lookup').instance.register_lazy_fields(
        search_entry)
    plugin.get_plugin_by_name('tmdb_lookup').instance.lookup(search_entry)

    try:
        # Both ids are optional, but if movie_name was populated at least one of them will be there
        return {
            'title': search_entry['movie_name'],
            'imdb_id': search_entry.get('imdb_id'),
            'tmdb_id': search_entry.get('tmdb_id')
        }
    except KeyError as e:
        raise QueueError(e.message)
Example #39
0
    def on_process_start(self, task):
        if not task.manager.options.forget:
            return

        task.manager.disable_tasks()

        forget_name = task.manager.options.forget
        if is_imdb_url(forget_name):
            imdb_id = extract_id(forget_name)
            if imdb_id:
                forget_name = imdb_id

        count, fcount = forget(forget_name)
        log.info('Removed %s titles (%s fields)' % (count, fcount))
        task.manager.config_changed()
Example #40
0
    def on_process_start(self, task):
        if not task.manager.options.forget:
            return

        task.manager.disable_tasks()

        forget_name = task.manager.options.forget
        if is_imdb_url(forget_name):
            imdb_id = extract_id(forget_name)
            if imdb_id:
                forget_name = imdb_id

        count, fcount = forget(forget_name)
        log.info('Removed %s titles (%s fields)' % (count, fcount))
        task.manager.config_changed()
Example #41
0
def parse_what(what, lookup=True, session=None):
    """
    Determines what information was provided by the search string `what`.
    If `lookup` is true, will fill in other information from tmdb.

    :param what: Can be one of:
      <Movie Title>: Search based on title
      imdb_id=<IMDB id>: search based on imdb id
      tmdb_id=<TMDB id>: search based on tmdb id
    :param bool lookup: Whether missing info should be filled in from tmdb.
    :param session: An existing session that will be used for lookups if provided.
    :rtype: dict
    :return: A dictionary with 'title', 'imdb_id' and 'tmdb_id' keys
    """

    tmdb_lookup = plugin.get_plugin_by_name("api_tmdb").instance.lookup

    result = {"title": None, "imdb_id": None, "tmdb_id": None}
    result["imdb_id"] = extract_id(what)
    if not result["imdb_id"]:
        if isinstance(what, int):
            result["tmdb_id"] = what
        elif what.startswith("tmdb_id="):
            result["tmdb_id"] = what[8:]
        else:
            result["title"] = what

    if not lookup:
        # If not doing an online lookup we can return here
        return result

    search_entry = Entry(title=result["title"] or "")
    for field in ["imdb_id", "tmdb_id"]:
        if result.get(field):
            search_entry[field] = result[field]
    # Put lazy lookup fields on the search entry
    plugin.get_plugin_by_name("imdb_lookup").instance.register_lazy_fields(search_entry)
    plugin.get_plugin_by_name("tmdb_lookup").instance.lookup(search_entry)

    try:
        # Both ids are optional, but if movie_name was populated at least one of them will be there
        return {
            "title": search_entry["movie_name"],
            "imdb_id": search_entry.get("imdb_id"),
            "tmdb_id": search_entry.get("tmdb_id"),
        }
    except KeyError as e:
        raise QueueError(e.message)
Example #42
0
    def search(self, task, entry, config):
        if not session.cookies or not session.passkey:
            try:
                login_params = {'username': config['username'],
                                'password': config['password'],
                                'loginkey': config['login_key']}
                r = session.post('https://piratethenet.org/takelogin.php', data=login_params, verify=False)
            except requests.RequestException as e:
                log.error('Error while logging in to PtN: %s', e)
                raise plugin.PluginError('Could not log in to PtN')

            # Sorty hacky, we'll just store the passkey on the session
            passkey = re.search('passkey=([\d\w]+)"', r.text)
            if passkey:
                session.passkey = passkey.group(1)
            else:
                log.error('PtN cookie info invalid')
                raise plugin.PluginError('PTN cookie info invalid')

        search_params = default_search_params.copy()
        if 'movie_name' in entry:
            if 'movie_year' in entry:
                search_params['advancedsearchparameters'] = '[year=%s]' % entry['movie_year']
            searches = [entry['movie_name']]
        else:
            searches = entry.get('search_strings', [entry['title']])

        results = set()
        for search in searches:
            search_params['searchstring'] = search
            try:
                r = session.get('http://piratethenet.org/torrentsutils.php', params=search_params)
            except requests.RequestException as e:
                log.error('Error searching ptn: %s' % e)
                continue
            # html5parser doesn't work properly for some reason
            soup = get_soup(r.text, parser='html.parser')
            for movie in soup.select('.torrentstd'):
                imdb_id = movie.find('a', href=re.compile('.*imdb\.com/title/tt'))
                if imdb_id:
                    imdb_id = extract_id(imdb_id['href'])
                if imdb_id and 'imdb_id' in entry and imdb_id != entry['imdb_id']:
                    continue
                results.update(self.create_entries(movie, passkey=session.passkey, imdb_id=imdb_id))

        return results
Example #43
0
    def parse_rlslog(self, rlslog_url, task):
        """
        :param rlslog_url: Url to parse from
        :param task: Task instance
        :return: List of release dictionaries
        """

        # BeautifulSoup doesn't seem to work if data is already decoded to unicode :/
        soup = get_soup(task.requests.get(rlslog_url, timeout=25).content)

        releases = []
        for entry in soup.find_all('div', attrs={'class': 'entry'}):
            release = {}
            h3 = entry.find('h3', attrs={'class': 'entrytitle'})
            if not h3:
                log.debug('FAIL: No h3 entrytitle')
                continue
            release['title'] = h3.a.contents[0].strip()
            entrybody = entry.find('div', attrs={'class': 'entrybody'})
            if not entrybody:
                log.debug('FAIL: No entrybody')
                continue

            log.trace('Processing title %s' % (release['title']))

            # find imdb url
            link_imdb = entrybody.find('a',
                                       text=re.compile(r'imdb', re.IGNORECASE))
            if link_imdb:
                release['imdb_id'] = extract_id(link_imdb['href'])
                release['imdb_url'] = link_imdb['href']

            # find google search url
            google = entrybody.find('a',
                                    href=re.compile(r'google', re.IGNORECASE))
            if google:
                release['url'] = google['href']
                releases.append(release)
            else:
                log_once(
                    '%s skipped due to missing or unsupported download link' %
                    (release['title']), log)

        return releases
Example #44
0
def parse_what(what, session=None):
    """Parses needed movie information for a given search string.

    Search string can be one of:
        <Movie Title>: Search based on title
        imdb_id=<IMDB id>: search based on imdb id
        tmdb_id=<TMDB id>: search based on tmdb id"""

    tmdb_lookup = get_plugin_by_name('api_tmdb').instance.lookup

    imdb_id = extract_id(what)
    try:
        if imdb_id:
            movie = tmdb_lookup(imdb_id=imdb_id, session=session)
        elif what.startswith('tmdb_id='):
            movie = tmdb_lookup(tmdb_id=what[8:], session=session)
        else:
            movie = tmdb_lookup(title=what, session=session)
    except LookupError, e:
        raise QueueError(e.message)
Example #45
0
    def on_process_start(self, task):
        if not task.manager.options.seen:
            return

        task.manager.disable_tasks()

        seen_name = task.manager.options.seen
        if is_imdb_url(seen_name):
            imdb_id = extract_id(seen_name)
            if imdb_id:
                seen_name = imdb_id

        session = Session()
        se = SeenEntry(u'--seen', unicode(task.name))
        sf = SeenField(u'--seen', seen_name)
        se.fields.append(sf)
        session.add(se)
        session.commit()

        log.info('Added %s as seen. This will affect all tasks.' % seen_name)
Example #46
0
    def on_process_start(self, task):
        if not task.manager.options.seen:
            return

        task.manager.disable_tasks()

        seen_name = task.manager.options.seen
        if is_imdb_url(seen_name):
            imdb_id = extract_id(seen_name)
            if imdb_id:
                seen_name = imdb_id

        session = Session()
        se = SeenEntry(u'--seen', unicode(task.name))
        sf = SeenField(u'--seen', seen_name)
        se.fields.append(sf)
        session.add(se)
        session.commit()

        log.info('Added %s as seen. This will affect all tasks.' % seen_name)
Example #47
0
    def on_task_input(self, task, config):
        log.verbose('Retrieving list %s ...' % config['list'])

        # Get the imdb list in RSS format
        if config['list'] in ['watchlist', 'ratings', 'checkins']:
            url = 'http://rss.imdb.com/user/%s/%s' % (config['user_id'],
                                                      config['list'])
        else:
            url = 'http://rss.imdb.com/list/%s' % config['list']
        log.debug('Requesting %s' % url)
        try:
            rss = feedparser.parse(url)
        except LookupError as e:
            raise plugin.PluginError(
                'Failed to parse RSS feed for list `%s` correctly: %s' %
                (config['list'], e))
        if rss.status == 404:
            raise plugin.PluginError(
                'Unable to get imdb list. Either list is private or does not exist.'
            )

        # Create an Entry for each movie in the list
        entries = []
        title_re = re.compile(r'(.*) \((\d{4})?.*?\)$')
        for entry in rss.entries:
            try:
                # IMDb puts some extra stuff in the titles, e.g. "Battlestar Galactica (2004 TV Series)"
                # Strip out everything but the date
                match = title_re.match(entry.title)
                title = match.group(1)
                if match.group(2):
                    title += ' (%s)' % match.group(2)
                entries.append(
                    Entry(title=title,
                          url=entry.link,
                          imdb_id=extract_id(entry.link),
                          imdb_name=match.group(1)))
            except IndexError:
                log.critical('IndexError! Unable to handle RSS entry: %s' %
                             entry)
        return entries
Example #48
0
    def matches(self, feed, config, entry):
        # make sure the entry has IMDB fields filled
        try:
            get_plugin_by_name('imdb_lookup').instance.lookup(feed, entry)
        except PluginError:
            # no IMDB data, can't do anything
            return

        imdb_id = None
        if entry.get('imdb_id'):
            imdb_id = entry['imdb_id']
        elif entry.get('imdb_url'):
            imdb_id = extract_id(entry['imdb_url'])

        if not imdb_id:
            log.warning("No imdb id could be determined for %s" %
                        entry['title'])
            return

        return feed.session.query(QueuedMovie).filter(QueuedMovie.imdb_id == imdb_id).\
                                               filter(QueuedMovie.downloaded == None).first()
Example #49
0
    def parse_rlslog(self, rlslog_url, task):
        """
        :param rlslog_url: Url to parse from
        :param task: Task instance
        :return: List of release dictionaries
        """

        # BeautifulSoup doesn't seem to work if data is already decoded to unicode :/
        soup = get_soup(task.requests.get(rlslog_url, timeout=25).content)

        releases = []
        for entry in soup.find_all('div', attrs={'class': 'entry'}):
            release = {}
            h3 = entry.find('h3', attrs={'class': 'entrytitle'})
            if not h3:
                log.debug('FAIL: No h3 entrytitle')
                continue
            release['title'] = h3.a.contents[0].strip()
            entrybody = entry.find('div', attrs={'class': 'entrybody'})
            if not entrybody:
                log.debug('FAIL: No entrybody')
                continue

            log.trace('Processing title %s' % (release['title']))

            # find imdb url
            link_imdb = entrybody.find('a', text=re.compile(r'imdb', re.IGNORECASE))
            if link_imdb:
                release['imdb_id'] = extract_id(link_imdb['href'])
                release['imdb_url'] = link_imdb['href']

            # find google search url
            google = entrybody.find('a', href=re.compile(r'google', re.IGNORECASE))
            if google:
                release['url'] = google['href']
                releases.append(release)
            else:
                log_once('%s skipped due to missing or unsupported download link' % (release['title']), log)

        return releases
Example #50
0
    def lookup(self, entry, search_allowed=True):
        """
        Perform Rotten Tomatoes lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """
        imdb_id = entry.get('imdb_id', eval_lazy=False) or \
                  imdb.extract_id(entry.get('imdb_url', eval_lazy=False))
        if imdb_id:
            movie = lookup_movie(title=entry.get('imdb_name'),
                                 year=entry.get('imdb_year'),
                                 rottentomatoes_id=entry.get('rt_id', eval_lazy=False),
                                 imdb_id=imdb_id,
                                 only_cached=(not search_allowed))
        else:
            movie = lookup_movie(smart_match=entry['title'],
                                 rottentomatoes_id=entry.get('rt_id', eval_lazy=False),
                                 only_cached=(not search_allowed))
        log.debug(u'Got movie: %s' % movie)
        entry.update_using_map(self.field_map, movie)
Example #51
0
    def on_task_input(self, task, config):
        log.verbose('Retrieving list %s ...' % config['list'])

        # Get the imdb list in RSS format
        if config['list'] in ['watchlist', 'ratings', 'checkins']:
            url = 'http://rss.imdb.com/user/%s/%s' % (config['user_id'], config['list'])
        else:
            url = 'http://rss.imdb.com/list/%s' % config['list']
        log.debug('Requesting %s' % url)
        try:
            rss = feedparser.parse(url)
        except LookupError as e:
            raise plugin.PluginError('Failed to parse RSS feed for list `%s` correctly: %s' % (config['list'], e))
        if rss.status == 404:
            raise plugin.PluginError('Unable to get imdb list. Either list is private or does not exist.')

        # Create an Entry for each movie in the list
        entries = []
        for entry in rss.entries:
            try:
                entries.append(Entry(title=entry.title, url=entry.link, imdb_id=extract_id(entry.link), imdb_name=entry.title))
            except IndexError:
                log.critical('IndexError! Unable to handle RSS entry: %s' % entry)
        return entries
Example #52
0
    def parse_html_list(self, task, config, url, params, headers):
        page = self.fetch_page(task, url, params, headers)
        soup = get_soup(page.text)
        try:
            item_text = soup.find(
                'div', class_='lister-total-num-results').string.split()
            total_item_count = int(item_text[0])
            log.verbose('imdb list contains %d items', total_item_count)
        except AttributeError:
            total_item_count = 0
        except (ValueError, TypeError) as e:
            # TODO Something is wrong if we get a ValueError, I think
            raise plugin.PluginError(
                'Received invalid movie count: %s ; %s' % (soup.find(
                    'div', class_='lister-total-num-results').string, e))

        if not total_item_count:
            log.verbose('No movies were found in imdb list: %s',
                        config['list'])
            return

        entries = []
        items_processed = 0
        page_no = 1
        while items_processed < total_item_count:
            # Fetch the next page unless we've just begun
            if items_processed:
                page_no += 1
                params['page'] = page_no
                page = self.fetch_page(task, url, params, headers)
                soup = get_soup(page.text)

            items = soup.find_all('div', class_='lister-item')
            if not items:
                log.debug('no items found on page: %s, aborting.', url)
                break
            log.debug('%d items found on page %d', len(items), page_no)

            for item in items:
                items_processed += 1
                a = item.find('h3', class_='lister-item-header').find('a')
                if not a:
                    log.debug('no title link found for row, skipping')
                    continue

                link = ('http://www.imdb.com' + a.get('href')).rstrip('/')
                entry = Entry()
                entry['title'] = a.text
                try:
                    year = int(
                        item.find('span', class_='lister-item-year').text)
                    entry['title'] += ' (%s)' % year
                    entry['imdb_year'] = year
                except (ValueError, TypeError):
                    pass
                entry['url'] = link
                entry['imdb_id'] = extract_id(link)
                entry['imdb_name'] = entry['title']
                entries.append(entry)

        return entries
Example #53
0
 def imdb_id(self):
     return extract_id(self.url)
Example #54
0
    def lookup(self, entry, search_allowed=True, session=None):
        """
        Perform imdb lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """

        from flexget.manager import manager

        if entry.get('imdb_id', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
        elif entry.get('imdb_url', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
        elif entry.get('title', eval_lazy=False):
            log.debug('lookup for %s' % entry['title'])
        else:
            raise plugin.PluginError(
                'looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.'
            )

        # if imdb_id is included, build the url.
        if entry.get('imdb_id', eval_lazy=False) and not entry.get(
                'imdb_url', eval_lazy=False):
            entry['imdb_url'] = make_url(entry['imdb_id'])

        # make sure imdb url is valid
        if entry.get('imdb_url', eval_lazy=False):
            imdb_id = extract_id(entry['imdb_url'])
            if imdb_id:
                entry['imdb_url'] = make_url(imdb_id)
            else:
                log.debug('imdb url %s is invalid, removing it' %
                          entry['imdb_url'])
                del (entry['imdb_url'])

        # no imdb_url, check if there is cached result for it or if the
        # search is known to fail
        if not entry.get('imdb_url', eval_lazy=False):
            result = session.query(SearchResult).filter(
                SearchResult.title == entry['title']).first()
            if result:
                # TODO: 1.2 this should really be checking task.options.retry
                if result.fails and not manager.options.execute.retry:
                    # this movie cannot be found, not worth trying again ...
                    log.debug('%s will fail lookup' % entry['title'])
                    raise plugin.PluginError('IMDB lookup failed for %s' %
                                             entry['title'])
                else:
                    if result.url:
                        log.trace('Setting imdb url for %s from db' %
                                  entry['title'])
                        entry['imdb_id'] = result.imdb_id
                        entry['imdb_url'] = result.url

        movie = None

        # no imdb url, but information required, try searching
        if not entry.get('imdb_url', eval_lazy=False) and search_allowed:
            log.verbose('Searching from imdb `%s`' % entry['title'])
            search = ImdbSearch()
            search_name = entry.get('movie_name',
                                    entry['title'],
                                    eval_lazy=False)
            search_result = search.smart_match(search_name)
            if search_result:
                entry['imdb_url'] = search_result['url']
                # store url for this movie, so we don't have to search on every run
                result = SearchResult(entry['title'], entry['imdb_url'])
                session.add(result)
                session.commit()
                log.verbose('Found %s' % (entry['imdb_url']))
            else:
                log_once('IMDB lookup failed for %s' % entry['title'],
                         log,
                         logging.WARN,
                         session=session)
                # store FAIL for this title
                result = SearchResult(entry['title'])
                result.fails = True
                session.add(result)
                session.commit()
                raise plugin.PluginError('Title `%s` lookup failed' %
                                         entry['title'])

        # check if this imdb page has been parsed & cached
        movie = session.query(Movie).filter(
            Movie.url == entry['imdb_url']).first()

        # If we have a movie from cache, we are done
        if movie and not movie.expired:
            entry.update_using_map(self.field_map, movie)
            return

        # Movie was not found in cache, or was expired
        if movie is not None:
            if movie.expired:
                log.verbose('Movie `%s` details expired, refreshing ...' %
                            movie.title)
            # Remove the old movie, we'll store another one later.
            session.query(MovieLanguage).filter(
                MovieLanguage.movie_id == movie.id).delete()
            session.query(Movie).filter(
                Movie.url == entry['imdb_url']).delete()
            session.commit()

        # search and store to cache
        if 'title' in entry:
            log.verbose('Parsing imdb for `%s`' % entry['title'])
        else:
            log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
        try:
            movie = self._parse_new_movie(entry['imdb_url'], session)
        except UnicodeDecodeError:
            log.error(
                'Unable to determine encoding for %s. Installing chardet library may help.'
                % entry['imdb_url'])
            # store cache so this will not be tried again
            movie = Movie()
            movie.url = entry['imdb_url']
            session.add(movie)
            session.commit()
            raise plugin.PluginError('UnicodeDecodeError')
        except ValueError as e:
            # TODO: might be a little too broad catch, what was this for anyway? ;P
            if manager.options.debug:
                log.exception(e)
            raise plugin.PluginError(
                'Invalid parameter: %s' % entry['imdb_url'], log)

        for att in [
                'title', 'score', 'votes', 'year', 'genres', 'languages',
                'actors', 'directors', 'mpaa_rating'
        ]:
            log.trace('movie.%s: %s' % (att, getattr(movie, att)))

        # Update the entry fields
        entry.update_using_map(self.field_map, movie)
Example #55
0
class ImdbLookup(object):
    """
        Retrieves imdb information for entries.

        Example:

        imdb_lookup: yes

        Also provides imdb lookup functionality to all other imdb related plugins.
    """

    field_map = {
        'imdb_url':
        'url',
        'imdb_id':
        lambda movie: extract_id(movie.url),
        'imdb_name':
        'title',
        'imdb_original_name':
        'original_title',
        'imdb_photo':
        'photo',
        'imdb_plot_outline':
        'plot_outline',
        'imdb_score':
        'score',
        'imdb_votes':
        'votes',
        'imdb_year':
        'year',
        'imdb_genres':
        lambda movie: [genre.name for genre in movie.genres],
        'imdb_languages':
        lambda movie: [lang.language.name for lang in movie.languages],
        'imdb_actors':
        lambda movie: dict(
            (actor.imdb_id, actor.name) for actor in movie.actors),
        'imdb_directors':
        lambda movie: dict(
            (director.imdb_id, director.name) for director in movie.directors),
        'imdb_mpaa_rating':
        'mpaa_rating',
        # Generic fields filled by all movie lookup plugins:
        'movie_name':
        'title',
        'movie_year':
        'year'
    }

    schema = {'type': 'boolean'}

    @plugin.priority(130)
    def on_task_metainfo(self, task, config):
        if not config:
            return
        for entry in task.entries:
            self.register_lazy_fields(entry)

    def register_lazy_fields(self, entry):
        entry.register_lazy_func(self.lazy_loader, self.field_map)

    def lazy_loader(self, entry):
        """Does the lookup for this entry and populates the entry fields."""
        try:
            self.lookup(entry)
        except plugin.PluginError as e:
            log_once(str(e.value).capitalize(), logger=log)

    @with_session
    def imdb_id_lookup(self, movie_title=None, raw_title=None, session=None):
        """
        Perform faster lookup providing just imdb_id.
        Falls back to using basic lookup if data cannot be found from cache.

        .. note::

           API will be changed, it's dumb to return None on errors AND
           raise PluginError on some else

        :param movie_title: Name of the movie
        :param raw_title: Raw entry title
        :return: imdb id or None
        :raises PluginError: Failure reason
        """
        if movie_title:
            log.debug('imdb_id_lookup: trying with title: %s' % movie_title)
            movie = session.query(Movie).filter(
                Movie.title == movie_title).first()
            if movie:
                log.debug('--> success! got %s returning %s' %
                          (movie, movie.imdb_id))
                return movie.imdb_id
        if raw_title:
            log.debug('imdb_id_lookup: trying cache with: %s' % raw_title)
            result = session.query(SearchResult).filter(
                SearchResult.title == raw_title).first()
            if result:
                # this title is hopeless, give up ..
                if result.fails:
                    return None
                log.debug('--> success! got %s returning %s' %
                          (result, result.imdb_id))
                return result.imdb_id
        if raw_title:
            # last hope with hacky lookup
            fake_entry = Entry(raw_title, '')
            self.lookup(fake_entry)
            return fake_entry['imdb_id']

    @plugin.internet(log)
    @with_session
    def lookup(self, entry, search_allowed=True, session=None):
        """
        Perform imdb lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """

        from flexget.manager import manager

        if entry.get('imdb_id', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
        elif entry.get('imdb_url', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
        elif entry.get('title', eval_lazy=False):
            log.debug('lookup for %s' % entry['title'])
        else:
            raise plugin.PluginError(
                'looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.'
            )

        # if imdb_id is included, build the url.
        if entry.get('imdb_id', eval_lazy=False) and not entry.get(
                'imdb_url', eval_lazy=False):
            entry['imdb_url'] = make_url(entry['imdb_id'])

        # make sure imdb url is valid
        if entry.get('imdb_url', eval_lazy=False):
            imdb_id = extract_id(entry['imdb_url'])
            if imdb_id:
                entry['imdb_url'] = make_url(imdb_id)
            else:
                log.debug('imdb url %s is invalid, removing it' %
                          entry['imdb_url'])
                del (entry['imdb_url'])

        # no imdb_url, check if there is cached result for it or if the
        # search is known to fail
        if not entry.get('imdb_url', eval_lazy=False):
            result = session.query(SearchResult).filter(
                SearchResult.title == entry['title']).first()
            if result:
                # TODO: 1.2 this should really be checking task.options.retry
                if result.fails and not manager.options.execute.retry:
                    # this movie cannot be found, not worth trying again ...
                    log.debug('%s will fail lookup' % entry['title'])
                    raise plugin.PluginError('IMDB lookup failed for %s' %
                                             entry['title'])
                else:
                    if result.url:
                        log.trace('Setting imdb url for %s from db' %
                                  entry['title'])
                        entry['imdb_id'] = result.imdb_id
                        entry['imdb_url'] = result.url

        movie = None

        # no imdb url, but information required, try searching
        if not entry.get('imdb_url', eval_lazy=False) and search_allowed:
            log.verbose('Searching from imdb `%s`' % entry['title'])
            search = ImdbSearch()
            search_name = entry.get('movie_name',
                                    entry['title'],
                                    eval_lazy=False)
            search_result = search.smart_match(search_name)
            if search_result:
                entry['imdb_url'] = search_result['url']
                # store url for this movie, so we don't have to search on every run
                result = SearchResult(entry['title'], entry['imdb_url'])
                session.add(result)
                session.commit()
                log.verbose('Found %s' % (entry['imdb_url']))
            else:
                log_once('IMDB lookup failed for %s' % entry['title'],
                         log,
                         logging.WARN,
                         session=session)
                # store FAIL for this title
                result = SearchResult(entry['title'])
                result.fails = True
                session.add(result)
                session.commit()
                raise plugin.PluginError('Title `%s` lookup failed' %
                                         entry['title'])

        # check if this imdb page has been parsed & cached
        movie = session.query(Movie).filter(
            Movie.url == entry['imdb_url']).first()

        # If we have a movie from cache, we are done
        if movie and not movie.expired:
            entry.update_using_map(self.field_map, movie)
            return

        # Movie was not found in cache, or was expired
        if movie is not None:
            if movie.expired:
                log.verbose('Movie `%s` details expired, refreshing ...' %
                            movie.title)
            # Remove the old movie, we'll store another one later.
            session.query(MovieLanguage).filter(
                MovieLanguage.movie_id == movie.id).delete()
            session.query(Movie).filter(
                Movie.url == entry['imdb_url']).delete()
            session.commit()

        # search and store to cache
        if 'title' in entry:
            log.verbose('Parsing imdb for `%s`' % entry['title'])
        else:
            log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
        try:
            movie = self._parse_new_movie(entry['imdb_url'], session)
        except UnicodeDecodeError:
            log.error(
                'Unable to determine encoding for %s. Installing chardet library may help.'
                % entry['imdb_url'])
            # store cache so this will not be tried again
            movie = Movie()
            movie.url = entry['imdb_url']
            session.add(movie)
            session.commit()
            raise plugin.PluginError('UnicodeDecodeError')
        except ValueError as e:
            # TODO: might be a little too broad catch, what was this for anyway? ;P
            if manager.options.debug:
                log.exception(e)
            raise plugin.PluginError(
                'Invalid parameter: %s' % entry['imdb_url'], log)

        for att in [
                'title', 'score', 'votes', 'year', 'genres', 'languages',
                'actors', 'directors', 'mpaa_rating'
        ]:
            log.trace('movie.%s: %s' % (att, getattr(movie, att)))

        # Update the entry fields
        entry.update_using_map(self.field_map, movie)

    def _parse_new_movie(self, imdb_url, session):
        """
        Get Movie object by parsing imdb page and save movie into the database.

        :param imdb_url: IMDB url
        :param session: Session to be used
        :return: Newly added Movie
        """
        parser = ImdbParser()
        parser.parse(imdb_url)
        # store to database
        movie = Movie()
        movie.photo = parser.photo
        movie.title = parser.name
        movie.original_title = parser.original_name
        movie.score = parser.score
        movie.votes = parser.votes
        movie.year = parser.year
        movie.mpaa_rating = parser.mpaa_rating
        movie.plot_outline = parser.plot_outline
        movie.url = imdb_url
        for name in parser.genres:
            genre = session.query(Genre).filter(Genre.name == name).first()
            if not genre:
                genre = Genre(name)
            movie.genres.append(genre)  # pylint:disable=E1101
        for index, name in enumerate(parser.languages):
            language = session.query(Language).filter(
                Language.name == name).first()
            if not language:
                language = Language(name)
            movie.languages.append(MovieLanguage(language, prominence=index))
        for imdb_id, name in parser.actors.items():
            actor = session.query(Actor).filter(
                Actor.imdb_id == imdb_id).first()
            if not actor:
                actor = Actor(imdb_id, name)
            movie.actors.append(actor)  # pylint:disable=E1101
        for imdb_id, name in parser.directors.items():
            director = session.query(Director).filter(
                Director.imdb_id == imdb_id).first()
            if not director:
                director = Director(imdb_id, name)
            movie.directors.append(director)  # pylint:disable=E1101
            # so that we can track how long since we've updated the info later
        movie.updated = datetime.now()
        session.add(movie)
        return movie
Example #56
0
    def on_task_input(self, task, config):
        # Create movie entries by parsing imdb list page(s) html using beautifulsoup
        log.verbose('Retrieving imdb list: %s' % config['list'])

        params = {'view': 'compact'}
        if config['list'] in ['watchlist', 'ratings', 'checkins']:
            url = 'http://www.imdb.com/user/%s/%s' % (config['user_id'], config['list'])
        else:
            url = 'http://www.imdb.com/list/%s' % config['list']

        log.debug('Requesting: %s' % url)
        page = task.requests.get(url, params=params)
        if page.status_code != 200:
            raise plugin.PluginError('Unable to get imdb list. Either list is private or does not exist.')

        soup = get_soup(page.text)
        # TODO: Something is messed up with the html5lib parser and imdb, have to get to our subsection without
        # recursion before doing a regular find. Repeated in the loop below as well.
        soup = soup.find('div', id='root').find('div', id='pagecontent', recursive=False)
        div = soup.find('div', class_='desc')
        if div:
            total_movie_count = int(div.get('data-size'))
        else:
            total_movie_count = 0

        if total_movie_count == 0:
            log.verbose('No movies were found in imdb list: %s' % config['list'])
            return

        entries = []
        start = 1
        while start < total_movie_count:
            if start == 1:
                trs = soup.find_all(attrs={'data-item-id': True})
            else:
                params['start'] = start
                page = task.requests.get(url, params=params)
                if page.status_code != 200:
                    raise plugin.PluginError('Unable to get imdb list.')
                soup = get_soup(page.text)
                # TODO: This is a hack, see above
                soup = soup.find('div', id='root').find('div', id='pagecontent', recursive=False)
                trs = soup.find_all(attrs={'data-item-id': True})

            for tr in trs:
                a = tr.find('td', class_='title').find('a')
                if not a:
                    log.debug('no title link found for row, skipping')
                    continue
                link = ('http://www.imdb.com' + a.get('href')).rstrip('/')
                entry = Entry()
                entry['title'] = a.string
                try:
                    year = int(tr.find('td', class_='year').string)
                    entry['title'] += ' (%s)' % year
                    entry['imdb_year'] = year
                except ValueError:
                    pass
                entry['url'] = link
                entry['imdb_id'] = extract_id(link)
                entry['imdb_name'] = entry['title']
                entries.append(entry)

            start = len(entries) + 1

        return entries
Example #57
0
    def search(self, entry, config):
        login_sess = requests.Session()
        login_params = {
            'username': config['username'],
            'password': config['password'],
            'loginkey': config['login_key']
        }
        try:
            login_sess.post('https://piratethenet.org/takelogin.php',
                            data=login_params,
                            verify=False)
        except requests.RequestException as e:
            log.error('Error while logging in to PtN: %s', e)

        download_auth = CookieAuth(login_sess.cookies)
        # Default to searching by title (0=title 3=imdb_id)
        search_by = 0
        if 'imdb_id' in entry:
            searches = [entry['imdb_id']]
            search_by = 3
        elif 'movie_name' in entry:
            search = entry['movie_name']
            if 'movie_year' in entry:
                search += ' %s' % entry['movie_year']
            searches = [search]
        else:
            searches = entry.get('search_strings', [entry['title']])

        params = {'_by': search_by}
        if config.get('categories'):
            for cat in config['categories']:
                params[categories[cat]] = 1
        results = set()
        for search in searches:
            params['search'] = search
            try:
                r = login_sess.get('http://piratethenet.org/browse.php',
                                   params=params)
            except requests.RequestException as e:
                log.error('Error searching ptn: %s' % e)
                continue
            soup = get_soup(r.text)
            if 'login' in soup.head.title.text.lower():
                log.error('PtN cookie info invalid')
                raise plugin.PluginError('PTN cookie info invalid')
            try:
                results_table = soup.find_all('table',
                                              attrs={'class': 'main'},
                                              limit=2)[1]
            except IndexError:
                log.debug('no results found for `%s`' % search)
                continue
            for row in results_table.find_all('tr')[1:]:
                columns = row.find_all('td')
                entry = Entry()
                links = columns[1].find_all('a', recursive=False, limit=2)
                entry['title'] = links[0].text
                if len(links) > 1:
                    entry['imdb_id'] = extract_id(links[1].get('href'))
                entry['url'] = 'http://piratethenet.org/' + columns[2].a.get(
                    'href')
                entry['download_auth'] = download_auth
                entry['torrent_seeds'] = int(columns[8].text)
                entry['torrent_leeches'] = int(columns[9].text)
                entry['search_sort'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])
                size = columns[6].find('br').previous_sibling
                unit = columns[6].find('br').next_sibling
                if unit == 'GB':
                    entry['content_size'] = int(float(size) * 1024)
                elif unit == 'MB':
                    entry['content_size'] = int(float(size))
                elif unit == 'KB':
                    entry['content_size'] = int(float(size) / 1024)
                results.add(entry)
        return results
Example #58
0
 def imdb_id(self):
     return extract_id(self.url)