Ejemplo n.º 1
def lookup_movie(
    title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None
    """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.

    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup


    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title = title_parser.name
        year = title_parser.year
        if title == "" and not (rottentomatoes_id or imdb_id or title):
            raise PluginError("Failed to parse name from %s" % smart_match)

    if title:
        search_string = title.lower()
        if year:
            search_string = "%s %s" % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError("No criteria specified for rotten tomatoes lookup")

    def id_str():
        return "<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>" % (title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug("Looking up rotten tomatoes information for %s" % id_str())

    movie = None

    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = (
            .filter(RottenTomatoesAlternateId.name.in_(["imdb", "flexget_imdb"]))
            .filter(RottenTomatoesAlternateId.id == imdb_id.lstrip("t"))
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            log.debug("No matches in movie cache found, checking search cache.")
            found = (
                .filter(func.lower(RottenTomatoesSearchResult.search) == search_string)
            if found and found.movie:
                log.debug("Movie found in search cache.")
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug("Cache has expired for %s, attempting to refresh from Rotten Tomatoes." % id_str())
                imdb_alt_id = (
                    and filter(lambda alt_id: alt_id.name in ["imdb", "flexget_imdb"], movie.alternate_ids)[0].id
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, "imdb")
                    result = movies_info(movie.id)
                movie = set_movie_details(movie, session, result)
            except URLError:
                log.error("Error refreshing movie details from Rotten Tomatoes, cached info being used.")
            log.debug("Movie %s information restored from cache." % id_str())
        if only_cached:
            raise PluginError("Movie %s not found from cache" % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug("Movie %s not found in cache, looking up from rotten tomatoes." % id_str())
            # Lookups using imdb_id
            if imdb_id:
                log.debug("Using IMDB alias %s." % imdb_id)
                result = movies_alias(imdb_id, "imdb")
                if result:
                    mismatch = []
                    if (
                        and difflib.SequenceMatcher(
                            lambda x: x == " ", re.sub("\s+\(.*\)$", "", result["title"].lower()), title.lower()
                        < MIN_MATCH
                        mismatch.append("the title (%s <-?-> %s)" % (title, result["title"]))
                    result["year"] = int(result["year"])
                    if year and fabs(result["year"] - year) > 1:
                        mismatch.append("the year (%s <-?-> %s)" % (year, result["year"]))
                        release_year = None
                        if result.get("release_dates", {}).get("theater"):
                            log.debug("Checking year against theater release date")
                            release_year = time.strptime(result["release_dates"].get("theater"), "%Y-%m-%d").tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append("the theater release (%s)" % release_year)
                        elif result.get("release_dates", {}).get("dvd"):
                            log.debug("Checking year against dvd release date")
                            release_year = time.strptime(result["release_dates"].get("dvd"), "%Y-%m-%d").tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append("the DVD release (%s)" % release_year)
                    if mismatch:
                            "Rotten Tomatoes had an imdb alias for %s but it didn't match %s."
                            % (imdb_id, ", or ".join(mismatch))
                        log.debug("imdb_id %s maps to rt_id %s, checking db for info." % (imdb_id, result["id"]))
                        movie = (
                            .filter(RottenTomatoesMovie.id == result.get("id"))
                        if movie:
                                "Movie %s was in database, but did not have the imdb_id stored, "
                                "forcing an update" % movie
                            movie = set_movie_details(movie, session, result)
                            log.debug("%s was not in database, setting info." % result["title"])
                            movie = RottenTomatoesMovie()
                            movie = set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError("set_movie_details returned %s" % movie)
                    log.debug("IMDB alias %s returned no results." % imdb_id)
            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = set_movie_details(movie, session, result)
            if not movie and title:
                log.verbose("Searching from rt `%s`" % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get("movies")
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(lambda x: x == " ", movie_res["title"].lower(), title.lower())
                            movie_res["match"] = seq.ratio()
                        results.sort(key=lambda x: x["match"], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get("year"):
                                movie_res["year"] = int(movie_res["year"])
                                if movie_res["year"] != year:
                                    release_year = False
                                    if movie_res.get("release_dates", {}).get("theater"):
                                        log.debug("Checking year against theater release date")
                                        release_year = time.strptime(
                                            movie_res["release_dates"].get("theater"), "%Y-%m-%d"
                                    elif movie_res.get("release_dates", {}).get("dvd"):
                                        log.debug("Checking year against dvd release date")
                                        release_year = time.strptime(
                                            movie_res["release_dates"].get("dvd"), "%Y-%m-%d"
                                    if not (release_year and release_year == year):
                                            "removing %s - %s (wrong year: %s)"
                                            % (
                                                str(release_year or movie_res["year"]),
                            if movie_res["match"] < MIN_MATCH:
                                log.debug("removing %s (min_match)" % movie_res["title"])

                        if not results:
                            raise PluginError("no appropiate results")

                        if len(results) == 1:
                            log.debug("SUCCESS: only one movie remains")
                            # Check min difference between best two hits
                            diff = results[0]["match"] - results[1]["match"]
                            if diff < MIN_DIFF:
                                    "unable to determine correct movie, min_diff too small"
                                    "(`%s (%d) - %s` <-?-> `%s (%d) - %s`)"
                                    % (
                                for r in results:
                                    log.debug("remain: %s (match: %s) %s" % (r["title"], r["match"], r["id"]))
                                raise PluginError("min_diff")

                        imdb_alt_id = results[0].get("alternate_ids", {}).get("imdb")
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                            result = movies_info(results[0].get("id"))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(
                                lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"),
                                log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie))
                                    RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t"))
                        except IntegrityError:
                                "Found movie %s in database after search even though we "
                                "already looked, updating it with search result." % movie
                            movie = (
                                .filter(RottenTomatoesMovie.id == result["id"])
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(
                                lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"),
                                log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie))
                                    RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t"))

                        if title.lower() != movie.title.lower():
                            log.debug("Saving search result for '%s'" % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError("Error looking up movie from RottenTomatoes")

    if not movie:
        raise PluginError("No results found from rotten tomatoes for %s" % id_str())
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ["alternate_ids", "cast", "directors", "genres", "links", "posters", "release_dates"]:
            getattr(movie, attr)
        return movie
Ejemplo n.º 2
    def lookup(title=None, year=None, tmdb_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None):
        """Do a lookup from tmdb for the movie matching the passed arguments.

        Any combination of criteria can be passed, the most specific criteria specified will be used.

            The Movie object populated with data from tmdb

            LookupError if a match cannot be found or there are other problems with the lookup

            tmdb_id: tmdb_id of desired movie
            imdb_id: imdb_id of desired movie
            title: title of desired movie
            year: release year of desired movie
            smart_match: attempt to clean and parse title and year from a string
            only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
            session: optionally specify a session to use, if specified, returned Movie will be live in that session

        if not (tmdb_id or imdb_id or title) and smart_match:
            # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
            title_parser = MovieParser()
            title = title_parser.name
            year = title_parser.year

        if title:
            search_string = title.lower()
            if year:
                search_string = '%s %s' % (search_string, year)
        elif not (tmdb_id or imdb_id):
            raise LookupError('No criteria specified for tmdb lookup')
        log.debug('Looking up tmdb information for %r' % {'title': title, 'tmdb_id': tmdb_id, 'imdb_id': imdb_id})

        movie = None

        def id_str():
            return '<title=%s,tmdb_id=%s,imdb_id=%s>' % (title, tmdb_id, imdb_id)
        if tmdb_id:
            movie = session.query(TMDBMovie).filter(TMDBMovie.id == tmdb_id).first()
        if not movie and imdb_id:
            movie = session.query(TMDBMovie).filter(TMDBMovie.imdb_id == imdb_id).first()
        if not movie and title:
            movie_filter = session.query(TMDBMovie).filter(func.lower(TMDBMovie.name) == title.lower())
            if year:
                movie_filter = movie_filter.filter(TMDBMovie.year == year)
            movie = movie_filter.first()
            if not movie:
                found = session.query(TMDBSearchResult). \
                    filter(func.lower(TMDBSearchResult.search) == search_string).first()
                if found and found.movie:
                    movie = found.movie
        if movie:
            # Movie found in cache, check if cache has expired.
            refresh_time = timedelta(days=2)
            if movie.released:
                if movie.released > datetime.now() - timedelta(days=7):
                    # Movie is less than a week old, expire after 1 day
                    refresh_time = timedelta(days=1)
                    age_in_years = (datetime.now() - movie.released).days / 365
                    refresh_time += timedelta(days=age_in_years * 5)
            if movie.updated < datetime.now() - refresh_time and not only_cached:
                log.debug('Cache has expired for %s, attempting to refresh from TMDb.' % id_str())
                    ApiTmdb.get_movie_details(movie, session)
                except URLError:
                    log.error('Error refreshing movie details from TMDb, cached info being used.')
                log.debug('Movie %s information restored from cache.' % id_str())
            if only_cached:
                raise LookupError('Movie %s not found from cache' % id_str())
            # There was no movie found in the cache, do a lookup from tmdb
            log.debug('Movie %s not found in cache, looking up from tmdb.' % id_str())
                if imdb_id and not tmdb_id:
                    result = get_first_result('imdbLookup', imdb_id)
                    if result:
                        movie = session.query(TMDBMovie).filter(TMDBMovie.id == result['id']).first()
                        if movie:
                            # Movie was in database, but did not have the imdb_id stored, force an update
                            ApiTmdb.get_movie_details(movie, session)
                            tmdb_id = result['id']
                if tmdb_id:
                    movie = TMDBMovie()
                    movie.id = tmdb_id
                    ApiTmdb.get_movie_details(movie, session)
                    if movie.name:
                        movie = None
                elif title:
                    result = get_first_result('search', search_string)
                    if result:
                        movie = session.query(TMDBMovie).filter(TMDBMovie.id == result['id']).first()
                        if not movie:
                            movie = TMDBMovie(result)
                            ApiTmdb.get_movie_details(movie, session)
                        if title.lower() != movie.name.lower():
                            session.merge(TMDBSearchResult(search=search_string, movie=movie))
            except URLError:
                raise LookupError('Error looking up movie from TMDb')

        if not movie:
            raise LookupError('No results found from tmdb for %s' % id_str())
            # Access attributes to force the relationships to eager load before we detach from session
            return movie
Ejemplo n.º 3
    def lookup(title=None,
        """Do a lookup from tmdb for the movie matching the passed arguments.

        Any combination of criteria can be passed, the most specific criteria specified will be used.

            The Movie object populated with data from tmdb

            LookupError if a match cannot be found or there are other problems with the lookup

            tmdb_id: tmdb_id of desired movie
            imdb_id: imdb_id of desired movie
            title: title of desired movie
            year: release year of desired movie
            smart_match: attempt to clean and parse title and year from a string
            only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
            session: optionally specify a session to use, if specified, returned Movie will be live in that session

        if not (tmdb_id or imdb_id or title) and smart_match:
            # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
            title_parser = MovieParser()
            title = title_parser.name
            year = title_parser.year

        if title:
            search_string = title.lower()
            if year:
                search_string = '%s %s' % (search_string, year)
        elif not (tmdb_id or imdb_id):
            raise LookupError('No criteria specified for tmdb lookup')
        log.debug('Looking up tmdb information for %r' % {
            'title': title,
            'tmdb_id': tmdb_id,
            'imdb_id': imdb_id

        movie = None

        def id_str():
            return '<title=%s,tmdb_id=%s,imdb_id=%s>' % (title, tmdb_id,

        if tmdb_id:
            movie = session.query(TMDBMovie).filter(
                TMDBMovie.id == tmdb_id).first()
        if not movie and imdb_id:
            movie = session.query(TMDBMovie).filter(
                TMDBMovie.imdb_id == imdb_id).first()
        if not movie and title:
            movie_filter = session.query(TMDBMovie).filter(
                func.lower(TMDBMovie.name) == title.lower())
            if year:
                movie_filter = movie_filter.filter(TMDBMovie.year == year)
            movie = movie_filter.first()
            if not movie:
                found = session.query(TMDBSearchResult). \
                    filter(func.lower(TMDBSearchResult.search) == search_string).first()
                if found and found.movie:
                    movie = found.movie
        if movie:
            # Movie found in cache, check if cache has expired.
            refresh_time = timedelta(days=2)
            if movie.released:
                if movie.released > datetime.now() - timedelta(days=7):
                    # Movie is less than a week old, expire after 1 day
                    refresh_time = timedelta(days=1)
                    age_in_years = (datetime.now() - movie.released).days / 365
                    refresh_time += timedelta(days=age_in_years * 5)
            if movie.updated < datetime.now(
            ) - refresh_time and not only_cached:
                    'Cache has expired for %s, attempting to refresh from TMDb.'
                    % id_str())
                    ApiTmdb.get_movie_details(movie, session)
                except URLError:
                        'Error refreshing movie details from TMDb, cached info being used.'
                log.debug('Movie %s information restored from cache.' %
            if only_cached:
                raise LookupError('Movie %s not found from cache' % id_str())
            # There was no movie found in the cache, do a lookup from tmdb
            log.debug('Movie %s not found in cache, looking up from tmdb.' %
                if imdb_id and not tmdb_id:
                    result = get_first_result('imdbLookup', imdb_id)
                    if result:
                        movie = session.query(TMDBMovie).filter(
                            TMDBMovie.id == result['id']).first()
                        if movie:
                            # Movie was in database, but did not have the imdb_id stored, force an update
                            ApiTmdb.get_movie_details(movie, session)
                            tmdb_id = result['id']
                if tmdb_id:
                    movie = TMDBMovie()
                    movie.id = tmdb_id
                    ApiTmdb.get_movie_details(movie, session)
                    if movie.name:
                        movie = None
                elif title:
                    result = get_first_result('search', search_string)
                    if result:
                        movie = session.query(TMDBMovie).filter(
                            TMDBMovie.id == result['id']).first()
                        if not movie:
                            movie = TMDBMovie(result)
                            ApiTmdb.get_movie_details(movie, session)
                        if title.lower() != movie.name.lower():
            except URLError:
                raise LookupError('Error looking up movie from TMDb')

        if not movie:
            raise LookupError('No results found from tmdb for %s' % id_str())
            # Access attributes to force the relationships to eager load before we detach from session
            return movie
Ejemplo n.º 4
def lookup_movie(title=None, year=None, rottentomatoes_id=None, smart_match=None,
                 only_cached=False, session=None):
    Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.
    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param string title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup


    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or title):
            raise PluginError('Failed to parse name from %s' % smart_match)

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not rottentomatoes_id:
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str():
        return '<title=%s,year=%s,rottentomatoes_id=%s>' % (title, year, rottentomatoes_id)

    if not session:
        session = Session()

    log.debug('Looking up rotten tomatoes information for %s' % id_str())

    movie = None

    # Try to lookup from cache
    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).\
            filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            log.debug('No matches in movie cache found, checking search cache.')
            found = session.query(RottenTomatoesSearchResult).\
                filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                log.debug('Movie found in search cache.')
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str())
                result = movies_info(movie.id)
                movie = _set_movie_details(movie, session, result)
            except URLError:
                log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.')
            log.debug('Movie %s information restored from cache.' % id_str())
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str())
            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = _set_movie_details(movie, session, result)

            if not movie and title:
                # TODO: Extract to method
                log.verbose('Searching from rt `%s`' % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(
                                lambda x: x == ' ', movie_res['title'].lower(), title.lower())
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year'):
                                movie_res['year'] = int(movie_res['year'])
                                if movie_res['year'] != year:
                                    release_year = False
                                    if movie_res.get('release_dates', {}).get('theater'):
                                        log.debug('Checking year against theater release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('theater'),
                                    elif movie_res.get('release_dates', {}).get('dvd'):
                                        log.debug('Checking year against dvd release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('dvd'),
                                    if not (release_year and release_year == year):
                                        log.debug('removing %s - %s (wrong year: %s)' %
                                                  (movie_res['title'], movie_res['id'],
                                                   str(release_year or movie_res['year'])))
                            if movie_res['match'] < MIN_MATCH:
                                log.debug('removing %s (min_match)' % movie_res['title'])

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            log.debug('SUCCESS: only one movie remains')
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                log.debug('unable to determine correct movie, min_diff too small'
                                          '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' %
                                          (results[0]['title'], results[0]['year'], results[0]['id'],
                                           results[1]['title'], results[1]['year'], results[1]['id']))
                                for r in results:
                                    log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id']))
                                raise PluginError('min_diff')

                        result = movies_info(results[0].get('id'))

                        if not result:
                            result = results[0]

                        movie = session.query(RottenTomatoesMovie).filter(
                            RottenTomatoesMovie.id == result['id']).first()

                        if not movie:
                            movie = RottenTomatoesMovie()
                            movie = _set_movie_details(movie, session, result)

                        if title.lower() != movie.title.lower():
                            log.debug('Saving search result for \'%s\'' % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' % id_str())
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']:
            getattr(movie, attr)
        return movie
Ejemplo n.º 5
 def parse(self, data):
     movieparser = MovieParser()
     movieparser.data = data
     return movieparser
Ejemplo n.º 6
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None):
    """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.

    Any combination of criteria can be passed, the most specific criteria specified will be used.

        The Movie object populated with data from Rotten Tomatoes

        LookupError if a match cannot be found or there are other problems with the lookup

        rottentomatoes_id: rottentomatoes_id of desired movie
        imdb_id: imdb_id of desired movie
        title: title of desired movie
        year: release year of desired movie
        smart_match: attempt to clean and parse title and year from a string
        only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
        session: optionally specify a session to use, if specified, returned Movie will be live in that session

    if not (rottentomatoes_id or imdb_id or title) and smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title = title_parser.name
        year = title_parser.year

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise LookupError('No criteria specified for rotten tomatoes lookup')
    log.debug('Looking up rotten tomatoes information for %r' % {'title': title, 'rottentomatoes_id': rottentomatoes_id, 'imdb_id': imdb_id})

    movie = None

    def id_str():
        return '<title=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, rottentomatoes_id, imdb_id)
    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = session.query(RottenTomatoesAlternateId).filter(RottenTomatoesAlternateId.id == imdb_id).first()
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            found = session.query(RottenTomatoesSearchResult). \
                    filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired() and not only_cached:
            log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str())
                get_movie_details(movie, session)
            except URLError:
                log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.')
            log.debug('Movie %s information restored from cache.' % id_str())
        if only_cached:
            raise LookupError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str())
            if imdb_id:
                result = movies_alias(imdb_id, 'imdb')
                if result:
                    movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result.get('id')).first()
                    if movie:
                        # Movie was in database, but did not have the imdb_id stored, force an update
                        get_movie_details(movie, session)
                        movie = RottenTomatoesMovie()
                        get_movie_details(movie, session, result)
            elif rottentomatoes_id:
                movie = RottenTomatoesMovie()
                movie.id = rottentomatoes_id
                get_movie_details(movie, session)
                if movie.title:
                    movie = None
            elif title:
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if len(results) > 0 and year:
                        results = filter(lambda movie_res: movie_res.get('year') == year, results)
                    if len(results) > 1 and results[0].get('title') != title:
                        exact_matches = filter(lambda movie_res: movie_res.get('title') == title, results)
                        if len(exact_matches) > 0:
                            results = exact_matches
                    if len(results) > 0:
                        result = results[0]
                        movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result.get('id')).first()
                        if not movie:
                            movie = RottenTomatoesMovie(result)
                            get_movie_details(movie, session)
                        if title.lower() != movie.title.lower():
                            session.merge(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise LookupError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise LookupError('No results found from rotten tomatoes for %s' % id_str())
        # Access attributes to force the relationships to eager load before we detach from session
        return movie
Ejemplo n.º 7
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None):
    """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.

    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup


    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or imdb_id or title):
            raise PluginError('Failed to parse name from %s' % raw_name)

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str():
        return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug('Looking up rotten tomatoes information for %s' % id_str())

    movie = None

    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).\
                filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = session.query(RottenTomatoesAlternateId).\
                filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\
                filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first()
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            found = session.query(RottenTomatoesSearchResult). \
                    filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str())
                imdb_alt_id = movie.alternate_ids and filter(lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, 'imdb')
                    result = movies_info(movie.id)
                movie = set_movie_details(movie, session, result)
            except URLError:
                log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.')
            log.debug('Movie %s information restored from cache.' % id_str())
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str())
            # Lookups using imdb_id
            if imdb_id:
                log.debug('Using IMDB alias %s.' % imdb_id)
                result = movies_alias(imdb_id, 'imdb')
                if result:
                    mismatch = []
                    if title and difflib.SequenceMatcher(lambda x: x == ' ', re.sub('\s+\(.*\)$', '', result['title'].lower()),
                            title.lower()).ratio() < MIN_MATCH:
                        mismatch.append('the title (%s <-?-> %s)' % (title, result['title']))
                    if year and fabs(result['year'] - year) > 1:
                        mismatch.append('the year (%s <-?-> %s)' % (year, result['year']))
                        release_year = None
                        if result.get('release_dates', {}).get('theater'):
                            log.debug('Checking year against theater release date')
                            release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the theater release (%s)' % release_year)
                        elif result.get('release_dates', {}).get('dvd'):
                            log.debug('Checking year against dvd release date')
                            release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the DVD release (%s)' % release_year)
                    if mismatch:
                        log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' % \
                            (imdb_id, ', or '.join(mismatch)))
                        log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id']))
                        movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result.get('id')).first()
                        if movie:
                            log.debug('Movie %s was in database, but did not have the imdb_id stored, '
                                    'forcing an update' % movie)
                            movie = set_movie_details(movie, session, result)
                            log.debug('%s was not in database, setting info.' % result['title'])
                            movie = RottenTomatoesMovie()
                            movie = set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError('set_movie_details returned %s' % movie)
                    log.debug('IMDB alias %s returned no results.' % imdb_id)
            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = set_movie_details(movie, session, result)
            if not movie and title:
                log.verbose('Searching from rt `%s`' % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(lambda x: x == ' ',
                                    movie_res['title'].lower(), title.lower())
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year') and movie_res['year'] != year:
                                release_year = False
                                if movie_res.get('release_dates', {}).get('theater'):
                                    log.debug('Checking year against theater release date')
                                    release_year = time.strptime(movie_res['release_dates'].get('theater'), '%Y-%m-%d').tm_year
                                elif movie_res.get('release_dates', {}).get('dvd'):
                                    log.debug('Checking year against dvd release date')
                                    release_year = time.strptime(movie_res['release_dates'].get('dvd'), '%Y-%m-%d').tm_year
                                if not (release_year and release_year == year):
                                    log.debug('removing %s - %s (wrong year: %s)' % (movie_res['title'],
                                        movie_res['id'], str(release_year or movie_res['year'])))
                            if movie_res['match'] < MIN_MATCH:
                                log.debug('removing %s (min_match)' % movie_res['title'])

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            log.debug('SUCCESS: only one movie remains')
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                log.debug('unable to determine correct movie, min_diff too small'
                                        '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' %
                                        (results[0]['title'], results[0]['year'], results[0]['id'],
                                            results[1]['title'], results[1]['year'], results[1]['id']))
                                for r in results:
                                    log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'],
                                raise PluginError('min_diff')

                        imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb')
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                            result = movies_info(results[0].get('id'))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and
                                    alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids):
                                log.warning('Adding flexget_imdb alternate id %s for movie %s' %
                                        (imdb_id, movie))
                        except IntegrityError:
                            log.warning('Found movie %s in database after search even though we '
                                'already looked, updating it with search result.' % movie)
                            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result['id']).first()
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and
                                    alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids):
                                log.warning('Adding flexget_imdb alternate id %s for movie %s' %
                                        (imdb_id, movie))

                        if title.lower() != movie.title.lower():
                            log.debug('Saving search result for \'%s\'' % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' % id_str())
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']:
            getattr(movie, attr)
        return movie
Ejemplo n.º 8
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None,
                 only_cached=False, session=None):
    Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.
    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup


    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or imdb_id or title):
            raise PluginError('Failed to parse name from %s' % smart_match)

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str():
        return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug('Looking up rotten tomatoes information for %s' % id_str())

    movie = None

    # Try to lookup from cache
    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).\
            filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = session.query(RottenTomatoesAlternateId).\
            filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\
            filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first()
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            log.debug('No matches in movie cache found, checking search cache.')
            found = session.query(RottenTomatoesSearchResult).\
                filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                log.debug('Movie found in search cache.')
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str())
                imdb_alt_id = movie.alternate_ids and filter(
                    lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, 'imdb')
                    result = movies_info(movie.id)
                movie = _set_movie_details(movie, session, result)
            except URLError:
                log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.')
            log.debug('Movie %s information restored from cache.' % id_str())
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str())
            # Lookups using imdb_id
            # TODO: extract to method
            if imdb_id:
                log.debug('Using IMDB alias %s.' % imdb_id)
                result = movies_alias(imdb_id, 'imdb')
                if result:
                    mismatch = []
                    min_match = difflib.SequenceMatcher(lambda x: x == ' ',
                                                        re.sub('\s+\(.*\)$', '', result['title'].lower()),
                                                        title.lower()).ratio() < MIN_MATCH
                    if title and min_match:
                        mismatch.append('the title (%s <-?-> %s)' % (title, result['title']))
                    result['year'] = int(result['year'])
                    if year and fabs(result['year'] - year) > 1:
                        mismatch.append('the year (%s <-?-> %s)' % (year, result['year']))
                        release_year = None
                        if result.get('release_dates', {}).get('theater'):
                            log.debug('Checking year against theater release date')
                            release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the theater release (%s)' % release_year)
                        elif result.get('release_dates', {}).get('dvd'):
                            log.debug('Checking year against dvd release date')
                            release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the DVD release (%s)' % release_year)
                    if mismatch:
                        log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' %
                                    (imdb_id, ', or '.join(mismatch)))
                        log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id']))
                        movie = session.query(RottenTomatoesMovie).\
                            filter(RottenTomatoesMovie.id == result.get('id')).first()
                        if movie:
                            log.debug('Movie %s was in database, but did not have the imdb_id stored, '
                                      'forcing an update' % movie)
                            movie = _set_movie_details(movie, session, result)
                            log.debug('%s was not in database, setting info.' % result['title'])
                            movie = RottenTomatoesMovie()
                            movie = _set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError('set_movie_details returned %s' % movie)
                    log.debug('IMDB alias %s returned no results.' % imdb_id)

            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = _set_movie_details(movie, session, result)

            if not movie and title:
                # TODO: Extract to method
                log.verbose('Searching from rt `%s`' % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(
                                lambda x: x == ' ', movie_res['title'].lower(), title.lower())
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year'):
                                movie_res['year'] = int(movie_res['year'])
                                if movie_res['year'] != year:
                                    release_year = False
                                    if movie_res.get('release_dates', {}).get('theater'):
                                        log.debug('Checking year against theater release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('theater'),
                                    elif movie_res.get('release_dates', {}).get('dvd'):
                                        log.debug('Checking year against dvd release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('dvd'),
                                    if not (release_year and release_year == year):
                                        log.debug('removing %s - %s (wrong year: %s)' %
                                                  (movie_res['title'], movie_res['id'],
                                                   str(release_year or movie_res['year'])))
                            if movie_res['match'] < MIN_MATCH:
                                log.debug('removing %s (min_match)' % movie_res['title'])

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            log.debug('SUCCESS: only one movie remains')
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                log.debug('unable to determine correct movie, min_diff too small'
                                          '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' %
                                          (results[0]['title'], results[0]['year'], results[0]['id'],
                                           results[1]['title'], results[1]['year'], results[1]['id']))
                                for r in results:
                                    log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id']))
                                raise PluginError('min_diff')

                        imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb')
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                            result = movies_info(results[0].get('id'))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                        movie = _set_movie_details(movie, session, result)
                        if imdb_id and not filter(
                            lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'),
                                movie.alternate_ids):  # TODO: get rid of these confusing lambdas
                            log.warning('Adding flexget_imdb alternate id %s for movie %s' %
                                        (imdb_id, movie))

                        if title.lower() != movie.title.lower():
                            log.debug('Saving search result for \'%s\'' % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' % id_str())
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']:
            getattr(movie, attr)
        return movie
Ejemplo n.º 9
 def parse(self, data):
     movieparser = MovieParser()
     movieparser.data = data
     return movieparser