Example #1
0
    def __setitem__(self, key, value):
        # Enforce unicode compatibility. Check for all subclasses of basestring, so that NavigableStrings are also cast
        if isinstance(value, basestring) and not type(value) == unicode:
            try:
                value = unicode(value)
            except UnicodeDecodeError:
                raise EntryUnicodeError(key, value)

        # url and original_url handling
        if key == 'url':
            if not isinstance(value, basestring):
                raise PluginError('Tried to set %r url to %r' % (self.get('title'), value))
            if not 'original_url' in self:
                self['original_url'] = value

        # title handling
        if key == 'title':
            if not isinstance(value, basestring):
                raise PluginError('Tried to set title to %r' % value)

        # TODO: HACK! Implement via plugin once #348 (entry events) is implemented
        # enforces imdb_url in same format
        if key == 'imdb_url' and isinstance(value, basestring):
            imdb_id = extract_id(value)
            if imdb_id:
                value = make_url(imdb_id)
            else:
                log.debug('Tried to set imdb_id to invalid imdb url: %s' % value)
                value = None

        try:
            log.trace('ENTRY SET: %s = %r' % (key, value))
        except Exception, e:
            log.debug('trying to debug key `%s` value threw exception: %s' % (key, e))
Example #2
0
    def on_task_metainfo(self, task, config):
        # check if disabled (value set to false)
        if 'scan_imdb' in task.config:
            if not task.config['scan_imdb']:
                return

        for entry in task.entries:
            # Don't override already populated imdb_ids
            if entry.get('imdb_id', eval_lazy=False):
                continue
            if 'description' not in entry:
                continue
            urls = re.findall(r'\bimdb.com/title/tt\d+\b', entry['description'])
            # Find unique imdb ids
            imdb_ids = [_f for _f in set(extract_id(url) for url in urls) if _f]
            if not imdb_ids:
                continue

            if len(imdb_ids) > 1:
                log.debug('Found multiple imdb ids; not using any of: %s' % ' '.join(imdb_ids))
                continue

            entry['imdb_id'] = imdb_ids[0]
            entry['imdb_url'] = make_url(entry['imdb_id'])
            log.debug('Found imdb url in description %s' % entry['imdb_url'])
Example #3
0
    def __setitem__(self, key, value):
        # Enforce unicode compatibility. Check for all subclasses of basestring, so that NavigableStrings are also cast
        if isinstance(value, basestring) and not type(value) == unicode:
            try:
                value = unicode(value)
            except UnicodeDecodeError:
                raise EntryUnicodeError(key, value)

        # url and original_url handling
        if key == 'url':
            if not isinstance(value, basestring):
                raise PluginError('Tried to set %r url to %r' % (self.get('title'), value))
            self.setdefault('original_url', value)

        # title handling
        if key == 'title':
            if not isinstance(value, basestring):
                raise PluginError('Tried to set title to %r' % value)

        # TODO: HACK! Implement via plugin once #348 (entry events) is implemented
        # enforces imdb_url in same format
        if key == 'imdb_url' and isinstance(value, basestring):
            imdb_id = extract_id(value)
            if imdb_id:
                value = make_url(imdb_id)
            else:
                log.debug('Tried to set imdb_id to invalid imdb url: %s' % value)
                value = None

        try:
            log.trace('ENTRY SET: %s = %r' % (key, value))
        except Exception as e:
            log.debug('trying to debug key `%s` value threw exception: %s' % (key, e))

        dict.__setitem__(self, key, value)
Example #4
0
    def on_task_metainfo(self, task, config):
        # check if disabled (value set to false)
        if 'scan_imdb' in task.config:
            if not task.config['scan_imdb']:
                return

        for entry in task.entries:
            # Don't override already populated imdb_ids
            if entry.get('imdb_id', eval_lazy=False):
                continue
            if not 'description' in entry:
                continue
            urls = re.findall(r'\bimdb.com/title/tt\d+\b',
                              entry['description'])
            # Find unique imdb ids
            imdb_ids = filter(None, set(extract_id(url) for url in urls))
            if not imdb_ids:
                continue

            if len(imdb_ids) > 1:
                log.debug('Found multiple imdb ids; not using any of: %s' %
                          ' '.join(imdb_ids))
                continue

            entry['imdb_id'] = imdb_ids[0]
            entry['imdb_url'] = make_url(entry['imdb_id'])
            log.debug('Found imdb url in description %s' % entry['imdb_url'])
Example #5
0
    def lookup(self, entry, search_allowed=True, session=None):
        """
        Perform imdb lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """

        from flexget.manager import manager

        if entry.get('imdb_id', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
        elif entry.get('imdb_url', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
        elif entry.get('title', eval_lazy=False):
            log.debug('lookup for %s' % entry['title'])
        else:
            raise plugin.PluginError(
                'looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.'
            )

        # if imdb_id is included, build the url.
        if entry.get('imdb_id', eval_lazy=False) and not entry.get(
                'imdb_url', eval_lazy=False):
            entry['imdb_url'] = make_url(entry['imdb_id'])

        # make sure imdb url is valid
        if entry.get('imdb_url', eval_lazy=False):
            imdb_id = extract_id(entry['imdb_url'])
            if imdb_id:
                entry['imdb_url'] = make_url(imdb_id)
            else:
                log.debug('imdb url %s is invalid, removing it' %
                          entry['imdb_url'])
                del (entry['imdb_url'])

        # no imdb_url, check if there is cached result for it or if the
        # search is known to fail
        if not entry.get('imdb_url', eval_lazy=False):
            result = session.query(SearchResult).filter(
                SearchResult.title == entry['title']).first()
            if result:
                # TODO: 1.2 this should really be checking task.options.retry
                if result.fails and not manager.options.execute.retry:
                    # this movie cannot be found, not worth trying again ...
                    log.debug('%s will fail lookup' % entry['title'])
                    raise plugin.PluginError('IMDB lookup failed for %s' %
                                             entry['title'])
                else:
                    if result.url:
                        log.trace('Setting imdb url for %s from db' %
                                  entry['title'])
                        entry['imdb_id'] = result.imdb_id
                        entry['imdb_url'] = result.url

        movie = None

        # no imdb url, but information required, try searching
        if not entry.get('imdb_url', eval_lazy=False) and search_allowed:
            log.verbose('Searching from imdb `%s`' % entry['title'])
            search = ImdbSearch()
            search_name = entry.get('movie_name',
                                    entry['title'],
                                    eval_lazy=False)
            search_result = search.smart_match(search_name)
            if search_result:
                entry['imdb_url'] = search_result['url']
                # store url for this movie, so we don't have to search on every run
                result = SearchResult(entry['title'], entry['imdb_url'])
                session.add(result)
                session.commit()
                log.verbose('Found %s' % (entry['imdb_url']))
            else:
                log_once('IMDB lookup failed for %s' % entry['title'],
                         log,
                         logging.WARN,
                         session=session)
                # store FAIL for this title
                result = SearchResult(entry['title'])
                result.fails = True
                session.add(result)
                session.commit()
                raise plugin.PluginError('Title `%s` lookup failed' %
                                         entry['title'])

        # check if this imdb page has been parsed & cached
        movie = session.query(Movie).filter(
            Movie.url == entry['imdb_url']).first()

        # If we have a movie from cache, we are done
        if movie and not movie.expired:
            entry.update_using_map(self.field_map, movie)
            return

        # Movie was not found in cache, or was expired
        if movie is not None:
            if movie.expired:
                log.verbose('Movie `%s` details expired, refreshing ...' %
                            movie.title)
            # Remove the old movie, we'll store another one later.
            session.query(MovieLanguage).filter(
                MovieLanguage.movie_id == movie.id).delete()
            session.query(Movie).filter(
                Movie.url == entry['imdb_url']).delete()
            session.commit()

        # search and store to cache
        if 'title' in entry:
            log.verbose('Parsing imdb for `%s`' % entry['title'])
        else:
            log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
        try:
            movie = self._parse_new_movie(entry['imdb_url'], session)
        except UnicodeDecodeError:
            log.error(
                'Unable to determine encoding for %s. Installing chardet library may help.'
                % entry['imdb_url'])
            # store cache so this will not be tried again
            movie = Movie()
            movie.url = entry['imdb_url']
            session.add(movie)
            session.commit()
            raise plugin.PluginError('UnicodeDecodeError')
        except ValueError as e:
            # TODO: might be a little too broad catch, what was this for anyway? ;P
            if manager.options.debug:
                log.exception(e)
            raise plugin.PluginError(
                'Invalid parameter: %s' % entry['imdb_url'], log)

        for att in [
                'title', 'score', 'votes', 'year', 'genres', 'languages',
                'actors', 'directors', 'mpaa_rating'
        ]:
            log.trace('movie.%s: %s' % (att, getattr(movie, att)))

        # Update the entry fields
        entry.update_using_map(self.field_map, movie)
Example #6
0
    def lookup(self, entry, search_allowed=True):
        """
        Perform imdb lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """

        from flexget.manager import manager

        if entry.get('imdb_url', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
        elif entry.get('imdb_id', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
        elif entry.get('title', eval_lazy=False):
            log.debug('lookup for %s' % entry['title'])
        else:
            raise PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.')

        session = Session()

        try:
            # entry sanity checks
            for field in ['imdb_votes', 'imdb_score']:
                if entry.get(field, eval_lazy=False):
                    value = entry[field]
                    if not isinstance(value, (int, float)):
                        raise PluginError('Entry field %s should be a number!' % field)

            # if imdb_id is included, build the url.
            if entry.get('imdb_id', eval_lazy=False) and not entry.get('imdb_url', eval_lazy=False):
                entry['imdb_url'] = make_url(entry['imdb_id'])

            # make sure imdb url is valid
            if entry.get('imdb_url', eval_lazy=False):
                imdb_id = extract_id(entry['imdb_url'])
                if imdb_id:
                    entry['imdb_url'] = make_url(imdb_id)
                else:
                    log.debug('imdb url %s is invalid, removing it' % entry['imdb_url'])
                    del(entry['imdb_url'])

            # no imdb_url, check if there is cached result for it or if the
            # search is known to fail
            if not entry.get('imdb_url', eval_lazy=False):
                result = session.query(SearchResult).\
                    filter(SearchResult.title == entry['title']).first()
                if result:
                    if result.fails and not manager.options.retry:
                        # this movie cannot be found, not worth trying again ...
                        log.debug('%s will fail lookup' % entry['title'])
                        raise PluginError('Title `%s` lookup fails' % entry['title'])
                    else:
                        if result.url:
                            log.trace('Setting imdb url for %s from db' % entry['title'])
                            entry['imdb_url'] = result.url

            # no imdb url, but information required, try searching
            if not entry.get('imdb_url', eval_lazy=False) and search_allowed:
                log.verbose('Searching from imdb `%s`' % entry['title'])

                search = ImdbSearch()
                search_name = entry.get('movie_name', entry['title'], eval_lazy=False)
                search_result = search.smart_match(search_name)
                if search_result:
                    entry['imdb_url'] = search_result['url']
                    # store url for this movie, so we don't have to search on
                    # every run
                    result = SearchResult(entry['title'], entry['imdb_url'])
                    session.add(result)
                    log.verbose('Found %s' % (entry['imdb_url']))
                else:
                    log_once('Imdb lookup failed for %s' % entry['title'], log)
                    # store FAIL for this title
                    result = SearchResult(entry['title'])
                    result.fails = True
                    session.add(result)
                    raise PluginError('Title `%s` lookup failed' % entry['title'])

            # check if this imdb page has been parsed & cached
            movie = session.query(Movie).\
                options(joinedload_all(Movie.genres),
                    joinedload_all(Movie.languages),
                    joinedload_all(Movie.actors),
                    joinedload_all(Movie.directors)).\
                filter(Movie.url == entry['imdb_url']).first()

            # determine whether or not movie details needs to be parsed
            req_parse = False
            if not movie:
                req_parse = True
            elif movie.expired:
                req_parse = True

            if req_parse:
                if movie is not None:
                    if movie.expired:
                        log.verbose('Movie `%s` details expired, refreshing ...' % movie.title)
                    # Remove the old movie, we'll store another one later.
                    session.query(MovieLanguage).filter(MovieLanguage.movie_id == movie.id).delete()
                    session.query(Movie).filter(Movie.url == entry['imdb_url']).delete()

                # search and store to cache
                if 'title' in entry:
                    log.verbose('Parsing imdb for `%s`' % entry['title'])
                else:
                    log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
                try:
                    movie = self._parse_new_movie(entry['imdb_url'], session)
                except UnicodeDecodeError:
                    log.error('Unable to determine encoding for %s. Installing chardet library may help.' %
                              entry['imdb_url'])
                    # store cache so this will not be tried again
                    movie = Movie()
                    movie.url = entry['imdb_url']
                    session.add(movie)
                    raise PluginError('UnicodeDecodeError')
                except ValueError as e:
                    # TODO: might be a little too broad catch, what was this for anyway? ;P
                    if manager.options.debug:
                        log.exception(e)
                    raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log)

            for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']:
                log.trace('movie.%s: %s' % (att, getattr(movie, att)))

            # store to entry
            entry.update_using_map(self.field_map, movie)
        finally:
            log.trace('committing session')
            session.commit()
Example #7
0
    def on_task_input(self, task, config):
        sess = requests.Session()
        if config.get('username') and config.get('password'):

            log.verbose('Logging in ...')

            # Log in to imdb with our handler
            params = {'login': config['username'], 'password': config['password']}
            try:
                # First get the login page so we can get the hidden input value
                soup = get_soup(sess.get('https://secure.imdb.com/register-imdb/login').content)

                # Fix for bs4 bug. see #2313 and github#118
                auxsoup = soup.find('div', id='nb20').next_sibling.next_sibling
                tag = auxsoup.find('input', attrs={'name': '49e6c'})
                if tag:
                    params['49e6c'] = tag['value']
                else:
                    log.warning('Unable to find required info for imdb login, maybe their login method has changed.')
                # Now we do the actual login with appropriate parameters
                r = sess.post('https://secure.imdb.com/register-imdb/login', data=params, raise_status=False)
            except requests.RequestException as e:
                raise plugin.PluginError('Unable to login to imdb: %s' % e.message)

            # IMDb redirects us upon a successful login.
            # removed - doesn't happen always?
            # if r.status_code != 302:
            #     log.warning('It appears logging in to IMDb was unsuccessful.')

            # try to automatically figure out user_id from watchlist redirect url
            if not 'user_id' in config:
                log.verbose('Getting user_id ...')
                try:
                    response = sess.get('http://www.imdb.com/list/watchlist')
                except requests.RequestException as e:
                    log.error('Error retrieving user ID from imdb: %s' % e.message)
                    user_id = ''
                else:
                    log.debug('redirected to %s' % response.url)
                    user_id = response.url.split('/')[-2]
                if re.match(USER_ID_RE, user_id):
                    config['user_id'] = user_id
                else:
                    raise plugin.PluginError('Couldn\'t figure out user_id, please configure it manually.')

        if not 'user_id' in config:
            raise plugin.PluginError('Configuration option `user_id` required.')

        log.verbose('Retrieving list %s ...' % config['list'])

        # Get the imdb list in csv format
        try:
            url = 'http://www.imdb.com/list/export'
            params = {'list_id': config['list'], 'author_id': config['user_id']}
            log.debug('Requesting %s' % url)
            opener = sess.get(url, params=params)
            mime_type = parse_header(opener.headers['content-type'])[0]
            log.debug('mime_type: %s' % mime_type)
            if mime_type != 'text/csv':
                raise plugin.PluginError('Didn\'t get CSV export as response. Probably specified list `%s` '
                                         'does not exist.' % config['list'])
            csv_rows = csv.reader(opener.iter_lines())
        except requests.RequestException as e:
            raise plugin.PluginError('Unable to get imdb list: %s' % e.message)

        # Create an Entry for each movie in the list
        entries = []
        for row in csv_rows:
            if not row or row[0] == 'position':
                # Don't use blank rows or the headings row
                continue
            try:
                title = decode_html(row[5]).decode('utf-8')
                entries.append(Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title))
            except IndexError:
                log.critical('IndexError! Unable to handle row: %s' % row)
        return entries
Example #8
0
    def on_task_input(self, task, config):
        sess = requests.Session()
        if config.get('username') and config.get('password'):

            log.verbose('Logging in ...')

            # Log in to imdb with our handler
            params = {
                'login': config['username'],
                'password': config['password']
            }
            try:
                # First get the login page so we can get the hidden input value
                soup = get_soup(
                    sess.get(
                        'https://secure.imdb.com/register-imdb/login').content)

                # Fix for bs4 bug. see #2313 and github#118
                auxsoup = soup.find('div', id='nb20').next_sibling.next_sibling
                tag = auxsoup.find('input', attrs={'name': '49e6c'})
                if tag:
                    params['49e6c'] = tag['value']
                else:
                    log.warning(
                        'Unable to find required info for imdb login, maybe their login method has changed.'
                    )
                # Now we do the actual login with appropriate parameters
                r = sess.post('https://secure.imdb.com/register-imdb/login',
                              data=params,
                              raise_status=False)
            except requests.RequestException as e:
                raise plugin.PluginError('Unable to login to imdb: %s' %
                                         e.message)

            # IMDb redirects us upon a successful login.
            # removed - doesn't happen always?
            # if r.status_code != 302:
            #     log.warning('It appears logging in to IMDb was unsuccessful.')

            # try to automatically figure out user_id from watchlist redirect url
            if not 'user_id' in config:
                log.verbose('Getting user_id ...')
                try:
                    response = sess.get('http://www.imdb.com/list/watchlist')
                except requests.RequestException as e:
                    log.error('Error retrieving user ID from imdb: %s' %
                              e.message)
                    user_id = ''
                else:
                    log.debug('redirected to %s' % response.url)
                    user_id = response.url.split('/')[-2]
                if re.match(USER_ID_RE, user_id):
                    config['user_id'] = user_id
                else:
                    raise plugin.PluginError(
                        'Couldn\'t figure out user_id, please configure it manually.'
                    )

        if not 'user_id' in config:
            raise plugin.PluginError(
                'Configuration option `user_id` required.')

        log.verbose('Retrieving list %s ...' % config['list'])

        # Get the imdb list in csv format
        try:
            url = 'http://www.imdb.com/list/export'
            params = {
                'list_id': config['list'],
                'author_id': config['user_id']
            }
            log.debug('Requesting %s' % url)
            opener = sess.get(url, params=params)
            mime_type = parse_header(opener.headers['content-type'])[0]
            log.debug('mime_type: %s' % mime_type)
            if mime_type != 'text/csv':
                raise plugin.PluginError(
                    'Didn\'t get CSV export as response. Probably specified list `%s` '
                    'does not exist.' % config['list'])
            csv_rows = csv.reader(opener.iter_lines())
        except requests.RequestException as e:
            raise plugin.PluginError('Unable to get imdb list: %s' % e.message)

        # Create an Entry for each movie in the list
        entries = []
        for row in csv_rows:
            if not row or row[0] == 'position':
                # Don't use blank rows or the headings row
                continue
            try:
                title = decode_html(row[5]).decode('utf-8')
                entries.append(
                    Entry(title=title,
                          url=make_url(row[1]),
                          imdb_id=row[1],
                          imdb_name=title))
            except IndexError:
                log.critical('IndexError! Unable to handle row: %s' % row)
        return entries
Example #9
0
    def lookup(self, entry, search_allowed=True):
        """
        Perform imdb lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """

        from flexget.manager import manager

        if entry.get("imdb_url", eval_lazy=False):
            log.debug("No title passed. Lookup for %s" % entry["imdb_url"])
        elif entry.get("imdb_id", eval_lazy=False):
            log.debug("No title passed. Lookup for %s" % entry["imdb_id"])
        elif entry.get("title", eval_lazy=False):
            log.debug("lookup for %s" % entry["title"])
        else:
            raise PluginError("looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.")

        take_a_break = False
        session = Session()

        try:
            # entry sanity checks
            for field in ["imdb_votes", "imdb_score"]:
                if entry.get(field, eval_lazy=False):
                    value = entry[field]
                    if not isinstance(value, (int, float)):
                        raise PluginError("Entry field %s should be a number!" % field)

            # if imdb_id is included, build the url.
            if entry.get("imdb_id", eval_lazy=False) and not entry.get("imdb_url", eval_lazy=False):
                entry["imdb_url"] = make_url(entry["imdb_id"])

            # make sure imdb url is valid
            if entry.get("imdb_url", eval_lazy=False):
                imdb_id = extract_id(entry["imdb_url"])
                if imdb_id:
                    entry["imdb_url"] = make_url(imdb_id)
                else:
                    log.debug("imdb url %s is invalid, removing it" % entry["imdb_url"])
                    del (entry["imdb_url"])

            # no imdb_url, check if there is cached result for it or if the
            # search is known to fail
            if not entry.get("imdb_url", eval_lazy=False):
                result = session.query(SearchResult).filter(SearchResult.title == entry["title"]).first()
                if result:
                    if result.fails and not manager.options.retry:
                        # this movie cannot be found, not worth trying again ...
                        log.debug("%s will fail lookup" % entry["title"])
                        raise PluginError("Title `%s` lookup fails" % entry["title"])
                    else:
                        if result.url:
                            log.trace("Setting imdb url for %s from db" % entry["title"])
                            entry["imdb_url"] = result.url

            # no imdb url, but information required, try searching
            if not entry.get("imdb_url", eval_lazy=False) and search_allowed:
                log.verbose("Searching from imdb `%s`" % entry["title"])

                take_a_break = True
                search = ImdbSearch()
                search_result = search.smart_match(entry["title"])
                if search_result:
                    entry["imdb_url"] = search_result["url"]
                    # store url for this movie, so we don't have to search on
                    # every run
                    result = SearchResult(entry["title"], entry["imdb_url"])
                    session.add(result)
                    log.verbose("Found %s" % (entry["imdb_url"]))
                else:
                    log_once("Imdb lookup failed for %s" % entry["title"], log)
                    # store FAIL for this title
                    result = SearchResult(entry["title"])
                    result.fails = True
                    session.add(result)
                    raise PluginError("Title `%s` lookup failed" % entry["title"])

            # check if this imdb page has been parsed & cached
            movie = (
                session.query(Movie)
                .options(
                    joinedload_all(Movie.genres),
                    joinedload_all(Movie.languages),
                    joinedload_all(Movie.actors),
                    joinedload_all(Movie.directors),
                )
                .filter(Movie.url == entry["imdb_url"])
                .first()
            )

            # determine whether or not movie details needs to be parsed
            req_parse = False
            if not movie:
                req_parse = True
            elif movie.expired:
                req_parse = True

            if req_parse:
                if movie is not None:
                    if movie.expired:
                        log.verbose("Movie `%s` details expired, refreshing ..." % movie.title)
                    # Remove the old movie, we'll store another one later.
                    session.query(Movie).filter(Movie.url == entry["imdb_url"]).delete()

                # search and store to cache
                if "title" in entry:
                    log.verbose("Parsing imdb for `%s`" % entry["title"])
                else:
                    log.verbose("Parsing imdb for `%s`" % entry["imdb_id"])
                try:
                    take_a_break = True
                    movie = self._parse_new_movie(entry["imdb_url"], session)
                except UnicodeDecodeError:
                    log.error(
                        "Unable to determine encoding for %s. Installing chardet library may help." % entry["imdb_url"]
                    )
                    # store cache so this will not be tried again
                    movie = Movie()
                    movie.url = entry["imdb_url"]
                    session.add(movie)
                    raise PluginError("UnicodeDecodeError")
                except ValueError, e:
                    # TODO: might be a little too broad catch, what was this for anyway? ;P
                    if manager.options.debug:
                        log.exception(e)
                    raise PluginError("Invalid parameter: %s" % entry["imdb_url"], log)

            for att in ["title", "score", "votes", "year", "genres", "languages", "actors", "directors", "mpaa_rating"]:
                log.trace("movie.%s: %s" % (att, getattr(movie, att)))

            # store to entry
            entry.update_using_map(self.field_map, movie)

            # give imdb a little break between requests (see: http://flexget.com/ticket/129#comment:1)
            if take_a_break and not manager.options.debug and not manager.unit_test:
                import time

                time.sleep(3)
Example #10
0
        # Get the imdb list in csv format
        try:
            url = 'http://www.imdb.com/list/export'
            params = {'list_id': config['list'], 'author_id': config['user_id']}
            log.debug('Requesting %s' % url)
            opener = sess.get(url, params=params)
            mime_type = parse_header(opener.headers['content-type'])[0]
            log.debug('mime_type: %s' % mime_type)
            if mime_type != 'text/csv':
                raise PluginError('Didn\'t get CSV export as response. Probably specified list `%s` does not exists.'
                    % config['list'])
            csv_rows = csv.reader(opener.iter_lines())
        except requests.RequestException, e:
            raise PluginError('Unable to get imdb list: %s' % e.message)

        # Create an Entry for each movie in the list
        entries = []
        for row in csv_rows:
            if not row or row[0] == 'position':
                # Don't use blank rows or the headings row
                continue
            try:
                title = decode_html(row[5]).decode('utf-8')
                entries.append(Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title))
            except IndexError:
                log.critical('IndexError! Unable to handle row: %s' % row)
        return entries


register_plugin(ImdbList, 'imdb_list', api_ver=2)
Example #11
0
    def lookup(self, entry, search_allowed=True):
        """Perform imdb lookup for entry.
        Raises PluginError with failure reason."""

        from flexget.manager import manager

        if entry.get('imdb_url', lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
        elif entry.get('imdb_id', lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
        elif entry.get('title', lazy=False):
            log.debug('lookup for %s' % entry['title'])
        else:
            raise PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.')

        take_a_break = False
        session = Session()

        try:
            # entry sanity checks
            for field in ['imdb_votes', 'imdb_score']:
                if entry.get(field, lazy=False):
                    value = entry[field]
                    if not isinstance(value, (int, float)):
                        raise PluginError('Entry field %s should be a number!' % field)

            # if imdb_id is included, build the url.
            if entry.get('imdb_id', lazy=False) and not entry.get('imdb_url', lazy=False):
                entry['imdb_url'] = make_url(entry['imdb_id'])

            # make sure imdb url is valid
            if entry.get('imdb_url', lazy=False):
                imdb_id = extract_id(entry['imdb_url'])
                if imdb_id:
                    entry['imdb_url'] = make_url(imdb_id)
                else:
                    log.debug('imdb url %s is invalid, removing it' % entry['imdb_url'])
                    del(entry['imdb_url'])

            # no imdb_url, check if there is cached result for it or if the
            # search is known to fail
            if not entry.get('imdb_url', lazy=False):
                result = session.query(SearchResult).\
                         filter(SearchResult.title == entry['title']).first()
                if result:
                    if result.fails and not manager.options.retry:
                        # this movie cannot be found, not worth trying again ...
                        log.debug('%s will fail lookup' % entry['title'])
                        raise PluginError('Title `%s` lookup fails' % entry['title'])
                    else:
                        if result.url:
                            log.trace('Setting imdb url for %s from db' % entry['title'])
                            entry['imdb_url'] = result.url

            # no imdb url, but information required, try searching
            if not entry.get('imdb_url', lazy=False) and search_allowed:
                log.verbose('Searching from imdb `%s`' % entry['title'])

                take_a_break = True
                search = ImdbSearch()
                search_result = search.smart_match(entry['title'])
                if search_result:
                    entry['imdb_url'] = search_result['url']
                    # store url for this movie, so we don't have to search on
                    # every run
                    result = SearchResult(entry['title'], entry['imdb_url'])
                    session.add(result)
                    log.verbose('Found %s' % (entry['imdb_url']))
                else:
                    log_once('Imdb lookup failed for %s' % entry['title'], log)
                    # store FAIL for this title
                    result = SearchResult(entry['title'])
                    result.fails = True
                    session.add(result)
                    raise PluginError('Title `%s` lookup failed' % entry['title'])


            # check if this imdb page has been parsed & cached
            movie = session.query(Movie).\
                options(joinedload_all(Movie.genres, Movie.languages,
                Movie.actors, Movie.directors)).\
                filter(Movie.url == entry['imdb_url']).first()

            refresh_interval = 2
            if movie:
                if movie.year:
                    age = (datetime.now().year - movie.year)
                    refresh_interval += age * 5
                    log.debug('cached movie `%s` age %i refresh interval %i days' % (movie.title, age, refresh_interval))

            if not movie or movie.updated is None or \
               movie.updated < datetime.now() - timedelta(days=refresh_interval):
                # Remove the old movie, we'll store another one later.
                session.query(Movie).filter(Movie.url == entry['imdb_url']).delete()
                # search and store to cache
                if 'title' in entry:
                    log.verbose('Parsing imdb for `%s`' % entry['title'])
                else:
                    log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
                try:
                    take_a_break = True
                    imdb = ImdbParser()
                    imdb.parse(entry['imdb_url'])
                    # store to database
                    movie = Movie()
                    movie.photo = imdb.photo
                    movie.title = imdb.name
                    movie.score = imdb.score
                    movie.votes = imdb.votes
                    movie.year = imdb.year
                    movie.mpaa_rating = imdb.mpaa_rating
                    movie.plot_outline = imdb.plot_outline
                    movie.url = entry['imdb_url']
                    for name in imdb.genres:
                        genre = session.query(Genre).\
                            filter(Genre.name == name).first()
                        if not genre:
                            genre = Genre(name)
                        movie.genres.append(genre) # pylint:disable=E1101
                    for name in imdb.languages:
                        language = session.query(Language).\
                            filter(Language.name == name).first()
                        if not language:
                            language = Language(name)
                        movie.languages.append(language) # pylint:disable=E1101
                    for imdb_id, name in imdb.actors.iteritems():
                        actor = session.query(Actor).\
                            filter(Actor.imdb_id == imdb_id).first()
                        if not actor:
                            actor = Actor(imdb_id, name)
                        movie.actors.append(actor) # pylint:disable=E1101
                    for imdb_id, name in imdb.directors.iteritems():
                        director = session.query(Director).\
                            filter(Director.imdb_id == imdb_id).first()
                        if not director:
                            director = Director(imdb_id, name)
                        movie.directors.append(director) # pylint:disable=E1101
                    # so that we can track how long since we've updated the info later
                    movie.updated = datetime.now()
                    session.add(movie)

                except UnicodeDecodeError:
                    log.error('Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url'])
                    # store cache so this will not be tried again
                    movie = Movie()
                    movie.url = entry['imdb_url']
                    session.add(movie)
                    raise PluginError('UnicodeDecodeError')
                except ValueError, e:
                    # TODO: might be a little too broad catch, what was this for anyway? ;P
                    if manager.options.debug:
                        log.exception(e)
                    raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log)

            for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']:
                log.trace('movie.%s: %s' % (att, getattr(movie, att)))

            # store to entry
            entry.update_using_map(self.field_map, movie)

            # give imdb a little break between requests (see: http://flexget.com/ticket/129#comment:1)
            if (take_a_break and
                not manager.options.debug and
                not manager.unit_test):
                import time
                time.sleep(3)
Example #12
0
    def lookup(self, entry, search_allowed=True, session=None):
        """
        Perform imdb lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """

        from flexget.manager import manager

        if entry.get("imdb_id", eval_lazy=False):
            log.debug("No title passed. Lookup for %s" % entry["imdb_id"])
        elif entry.get("imdb_url", eval_lazy=False):
            log.debug("No title passed. Lookup for %s" % entry["imdb_url"])
        elif entry.get("title", eval_lazy=False):
            log.debug("lookup for %s" % entry["title"])
        else:
            raise plugin.PluginError("looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.")

        # if imdb_id is included, build the url.
        if entry.get("imdb_id", eval_lazy=False) and not entry.get("imdb_url", eval_lazy=False):
            entry["imdb_url"] = make_url(entry["imdb_id"])

        # make sure imdb url is valid
        if entry.get("imdb_url", eval_lazy=False):
            imdb_id = extract_id(entry["imdb_url"])
            if imdb_id:
                entry["imdb_url"] = make_url(imdb_id)
            else:
                log.debug("imdb url %s is invalid, removing it" % entry["imdb_url"])
                del (entry["imdb_url"])

        # no imdb_url, check if there is cached result for it or if the
        # search is known to fail
        if not entry.get("imdb_url", eval_lazy=False):
            result = session.query(SearchResult).filter(SearchResult.title == entry["title"]).first()
            if result:
                # TODO: 1.2 this should really be checking task.options.retry
                if result.fails and not manager.options.execute.retry:
                    # this movie cannot be found, not worth trying again ...
                    log.debug("%s will fail lookup" % entry["title"])
                    raise plugin.PluginError("IMDB lookup failed for %s" % entry["title"])
                else:
                    if result.url:
                        log.trace("Setting imdb url for %s from db" % entry["title"])
                        entry["imdb_id"] = result.imdb_id
                        entry["imdb_url"] = result.url

        # no imdb url, but information required, try searching
        if not entry.get("imdb_url", eval_lazy=False) and search_allowed:
            log.verbose("Searching from imdb `%s`" % entry["title"])
            search = ImdbSearch()
            search_name = entry.get("movie_name", entry["title"], eval_lazy=False)
            search_result = search.smart_match(search_name)
            if search_result:
                entry["imdb_url"] = search_result["url"]
                # store url for this movie, so we don't have to search on every run
                result = SearchResult(entry["title"], entry["imdb_url"])
                session.add(result)
                session.commit()
                log.verbose("Found %s" % (entry["imdb_url"]))
            else:
                log_once("IMDB lookup failed for %s" % entry["title"], log, logging.WARN, session=session)
                # store FAIL for this title
                result = SearchResult(entry["title"])
                result.fails = True
                session.add(result)
                session.commit()
                raise plugin.PluginError("Title `%s` lookup failed" % entry["title"])

        # check if this imdb page has been parsed & cached
        movie = session.query(Movie).filter(Movie.url == entry["imdb_url"]).first()

        # If we have a movie from cache, we are done
        if movie and not movie.expired:
            entry.update_using_map(self.field_map, movie)
            return

        # Movie was not found in cache, or was expired
        if movie is not None:
            if movie.expired:
                log.verbose("Movie `%s` details expired, refreshing ..." % movie.title)
            # Remove the old movie, we'll store another one later.
            session.query(MovieLanguage).filter(MovieLanguage.movie_id == movie.id).delete()
            session.query(Movie).filter(Movie.url == entry["imdb_url"]).delete()
            session.commit()

        # search and store to cache
        if "title" in entry:
            log.verbose("Parsing imdb for `%s`" % entry["title"])
        else:
            log.verbose("Parsing imdb for `%s`" % entry["imdb_id"])
        try:
            movie = self._parse_new_movie(entry["imdb_url"], session)
        except UnicodeDecodeError:
            log.error("Unable to determine encoding for %s. Installing chardet library may help." % entry["imdb_url"])
            # store cache so this will not be tried again
            movie = Movie()
            movie.url = entry["imdb_url"]
            session.add(movie)
            session.commit()
            raise plugin.PluginError("UnicodeDecodeError")
        except ValueError as e:
            # TODO: might be a little too broad catch, what was this for anyway? ;P
            if manager.options.debug:
                log.exception(e)
            raise plugin.PluginError("Invalid parameter: %s" % entry["imdb_url"], log)

        for att in [
            "title",
            "score",
            "votes",
            "year",
            "genres",
            "languages",
            "actors",
            "directors",
            "writers",
            "mpaa_rating",
        ]:
            log.trace("movie.%s: %s" % (att, getattr(movie, att)))

        # Update the entry fields
        entry.update_using_map(self.field_map, movie)
Example #13
0
            opener = sess.get(url, params=params)
            mime_type = parse_header(opener.headers['content-type'])[0]
            log.debug('mime_type: %s' % mime_type)
            if mime_type != 'text/csv':
                raise PluginError(
                    'Didn\'t get CSV export as response. Probably specified list `%s` does not exist.'
                    % config['list'])
            csv_rows = csv.reader(opener.iter_lines())
        except requests.RequestException, e:
            raise PluginError('Unable to get imdb list: %s' % e.message)

        # Create an Entry for each movie in the list
        entries = []
        for row in csv_rows:
            if not row or row[0] == 'position':
                # Don't use blank rows or the headings row
                continue
            try:
                title = decode_html(row[5]).decode('utf-8')
                entries.append(
                    Entry(title=title,
                          url=make_url(row[1]),
                          imdb_id=row[1],
                          imdb_name=title))
            except IndexError:
                log.critical('IndexError! Unable to handle row: %s' % row)
        return entries


register_plugin(ImdbList, 'imdb_list', api_ver=2)