Example #1
0
    def add_failed(self, entry, reason=None, **kwargs):
        """Adds entry to internal failed list, displayed with --failed"""
        reason = reason or 'Unknown'
        failed = Session()
        try:
            # query item's existence
            item = failed.query(FailedEntry).filter(FailedEntry.title == entry['title']).\
                filter(FailedEntry.url == entry['original_url']).first()
            if not item:
                item = FailedEntry(entry['title'], entry['original_url'],
                                   reason)
            else:
                item.count += 1
                item.tof = datetime.now()
                item.reason = reason
            failed.merge(item)
            log.debug('Marking %s in failed list. Has failed %s times.' %
                      (item.title, item.count))

            # limit item number to 25
            for row in failed.query(FailedEntry).order_by(
                    FailedEntry.tof.desc())[25:]:
                failed.delete(row)
            failed.commit()
        finally:
            failed.close()
Example #2
0
    def add_failed(self, entry):
        """Adds entry to internal failed list, displayed with --failed"""
        failed = Session()
        try:
            # query item's existence
            item = failed.query(FailedEntry).filter(FailedEntry.title == entry['title']).\
                                             filter(FailedEntry.url == entry['url']).first()
            if not item:
                item = FailedEntry(entry['title'], entry['url'])
            else:
                item.count += 1
                item.tof = datetime.now()
            failed.merge(item)

            # limit item number to 25
            for row in failed.query(FailedEntry).order_by(FailedEntry.tof.desc())[25:]:
                failed.delete(row)
            failed.commit()
        finally:
            failed.close()
Example #3
0
    def add_failed(self, entry, reason=None, **kwargs):
        """Adds entry to internal failed list, displayed with --failed"""
        reason = reason or 'Unknown'
        failed = Session()
        try:
            # query item's existence
            item = failed.query(FailedEntry).filter(FailedEntry.title == entry['title']).\
                filter(FailedEntry.url == entry['original_url']).first()
            if not item:
                item = FailedEntry(entry['title'], entry['original_url'], reason)
            else:
                item.count += 1
                item.tof = datetime.now()
                item.reason = reason
            failed.merge(item)
            log.debug('Marking %s in failed list. Has failed %s times.' % (item.title, item.count))

            # limit item number to 25
            for row in failed.query(FailedEntry).order_by(FailedEntry.tof.desc())[25:]:
                failed.delete(row)
            failed.commit()
        finally:
            failed.close()
Example #4
0
def lookup_movie(
    title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None
):
    """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.

    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup

    """

    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title_parser.parse(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == "" and not (rottentomatoes_id or imdb_id or title):
            raise PluginError("Failed to parse name from %s" % smart_match)

    if title:
        search_string = title.lower()
        if year:
            search_string = "%s %s" % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError("No criteria specified for rotten tomatoes lookup")

    def id_str():
        return "<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>" % (title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug("Looking up rotten tomatoes information for %s" % id_str())

    movie = None

    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = (
            session.query(RottenTomatoesAlternateId)
            .filter(RottenTomatoesAlternateId.name.in_(["imdb", "flexget_imdb"]))
            .filter(RottenTomatoesAlternateId.id == imdb_id.lstrip("t"))
            .first()
        )
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            log.debug("No matches in movie cache found, checking search cache.")
            found = (
                session.query(RottenTomatoesSearchResult)
                .filter(func.lower(RottenTomatoesSearchResult.search) == search_string)
                .first()
            )
            if found and found.movie:
                log.debug("Movie found in search cache.")
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug("Cache has expired for %s, attempting to refresh from Rotten Tomatoes." % id_str())
            try:
                imdb_alt_id = (
                    movie.alternate_ids
                    and filter(lambda alt_id: alt_id.name in ["imdb", "flexget_imdb"], movie.alternate_ids)[0].id
                )
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, "imdb")
                else:
                    result = movies_info(movie.id)
                movie = set_movie_details(movie, session, result)
                session.merge(movie)
            except URLError:
                log.error("Error refreshing movie details from Rotten Tomatoes, cached info being used.")
        else:
            log.debug("Movie %s information restored from cache." % id_str())
    else:
        if only_cached:
            raise PluginError("Movie %s not found from cache" % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug("Movie %s not found in cache, looking up from rotten tomatoes." % id_str())
        try:
            # Lookups using imdb_id
            if imdb_id:
                log.debug("Using IMDB alias %s." % imdb_id)
                result = movies_alias(imdb_id, "imdb")
                if result:
                    mismatch = []
                    if (
                        title
                        and difflib.SequenceMatcher(
                            lambda x: x == " ", re.sub("\s+\(.*\)$", "", result["title"].lower()), title.lower()
                        ).ratio()
                        < MIN_MATCH
                    ):
                        mismatch.append("the title (%s <-?-> %s)" % (title, result["title"]))
                    result["year"] = int(result["year"])
                    if year and fabs(result["year"] - year) > 1:
                        mismatch.append("the year (%s <-?-> %s)" % (year, result["year"]))
                        release_year = None
                        if result.get("release_dates", {}).get("theater"):
                            log.debug("Checking year against theater release date")
                            release_year = time.strptime(result["release_dates"].get("theater"), "%Y-%m-%d").tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append("the theater release (%s)" % release_year)
                        elif result.get("release_dates", {}).get("dvd"):
                            log.debug("Checking year against dvd release date")
                            release_year = time.strptime(result["release_dates"].get("dvd"), "%Y-%m-%d").tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append("the DVD release (%s)" % release_year)
                    if mismatch:
                        log.warning(
                            "Rotten Tomatoes had an imdb alias for %s but it didn't match %s."
                            % (imdb_id, ", or ".join(mismatch))
                        )
                    else:
                        log.debug("imdb_id %s maps to rt_id %s, checking db for info." % (imdb_id, result["id"]))
                        movie = (
                            session.query(RottenTomatoesMovie)
                            .filter(RottenTomatoesMovie.id == result.get("id"))
                            .first()
                        )
                        if movie:
                            log.debug(
                                "Movie %s was in database, but did not have the imdb_id stored, "
                                "forcing an update" % movie
                            )
                            movie = set_movie_details(movie, session, result)
                            session.merge(movie)
                        else:
                            log.debug("%s was not in database, setting info." % result["title"])
                            movie = RottenTomatoesMovie()
                            movie = set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError("set_movie_details returned %s" % movie)
                            session.add(movie)
                else:
                    log.debug("IMDB alias %s returned no results." % imdb_id)
            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = set_movie_details(movie, session, result)
                    session.add(movie)
            if not movie and title:
                log.verbose("Searching from rt `%s`" % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get("movies")
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(lambda x: x == " ", movie_res["title"].lower(), title.lower())
                            movie_res["match"] = seq.ratio()
                        results.sort(key=lambda x: x["match"], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get("year"):
                                movie_res["year"] = int(movie_res["year"])
                                if movie_res["year"] != year:
                                    release_year = False
                                    if movie_res.get("release_dates", {}).get("theater"):
                                        log.debug("Checking year against theater release date")
                                        release_year = time.strptime(
                                            movie_res["release_dates"].get("theater"), "%Y-%m-%d"
                                        ).tm_year
                                    elif movie_res.get("release_dates", {}).get("dvd"):
                                        log.debug("Checking year against dvd release date")
                                        release_year = time.strptime(
                                            movie_res["release_dates"].get("dvd"), "%Y-%m-%d"
                                        ).tm_year
                                    if not (release_year and release_year == year):
                                        log.debug(
                                            "removing %s - %s (wrong year: %s)"
                                            % (
                                                movie_res["title"],
                                                movie_res["id"],
                                                str(release_year or movie_res["year"]),
                                            )
                                        )
                                        results.remove(movie_res)
                                        continue
                            if movie_res["match"] < MIN_MATCH:
                                log.debug("removing %s (min_match)" % movie_res["title"])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError("no appropiate results")

                        if len(results) == 1:
                            log.debug("SUCCESS: only one movie remains")
                        else:
                            # Check min difference between best two hits
                            diff = results[0]["match"] - results[1]["match"]
                            if diff < MIN_DIFF:
                                log.debug(
                                    "unable to determine correct movie, min_diff too small"
                                    "(`%s (%d) - %s` <-?-> `%s (%d) - %s`)"
                                    % (
                                        results[0]["title"],
                                        results[0]["year"],
                                        results[0]["id"],
                                        results[1]["title"],
                                        results[1]["year"],
                                        results[1]["id"],
                                    )
                                )
                                for r in results:
                                    log.debug("remain: %s (match: %s) %s" % (r["title"], r["match"], r["id"]))
                                raise PluginError("min_diff")

                        imdb_alt_id = results[0].get("alternate_ids", {}).get("imdb")
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                        else:
                            result = movies_info(results[0].get("id"))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                        try:
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(
                                lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"),
                                movie.alternate_ids,
                            ):
                                log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie))
                                movie.alternate_ids.append(
                                    RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t"))
                                )
                            session.add(movie)
                            session.commit()
                        except IntegrityError:
                            log.warning(
                                "Found movie %s in database after search even though we "
                                "already looked, updating it with search result." % movie
                            )
                            session.rollback()
                            movie = (
                                session.query(RottenTomatoesMovie)
                                .filter(RottenTomatoesMovie.id == result["id"])
                                .first()
                            )
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(
                                lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"),
                                movie.alternate_ids,
                            ):
                                log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie))
                                movie.alternate_ids.append(
                                    RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t"))
                                )
                            session.merge(movie)
                            session.commit()

                        if title.lower() != movie.title.lower():
                            log.debug("Saving search result for '%s'" % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError("Error looking up movie from RottenTomatoes")

    if not movie:
        raise PluginError("No results found from rotten tomatoes for %s" % id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ["alternate_ids", "cast", "directors", "genres", "links", "posters", "release_dates"]:
            getattr(movie, attr)
        session.commit()
        return movie
Example #5
0
def lookup_movie(title=None, year=None, rottentomatoes_id=None, smart_match=None,
                 only_cached=False, session=None):
    """
    Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.
    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param string title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup

    """

    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title_parser.parse(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or title):
            raise PluginError('Failed to parse name from %s' % smart_match)

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not rottentomatoes_id:
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str():
        return '<title=%s,year=%s,rottentomatoes_id=%s>' % (title, year, rottentomatoes_id)

    if not session:
        session = Session()

    log.debug('Looking up rotten tomatoes information for %s' % id_str())

    movie = None

    # Try to lookup from cache
    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).\
            filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            log.debug('No matches in movie cache found, checking search cache.')
            found = session.query(RottenTomatoesSearchResult).\
                filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                log.debug('Movie found in search cache.')
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str())
            try:
                result = movies_info(movie.id)
                movie = _set_movie_details(movie, session, result)
                session.merge(movie)
            except URLError:
                log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.')
        else:
            log.debug('Movie %s information restored from cache.' % id_str())
    else:
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str())
        try:
            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = _set_movie_details(movie, session, result)
                    session.add(movie)

            if not movie and title:
                # TODO: Extract to method
                log.verbose('Searching from rt `%s`' % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(
                                lambda x: x == ' ', movie_res['title'].lower(), title.lower())
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year'):
                                movie_res['year'] = int(movie_res['year'])
                                if movie_res['year'] != year:
                                    release_year = False
                                    if movie_res.get('release_dates', {}).get('theater'):
                                        log.debug('Checking year against theater release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('theater'),
                                                                     '%Y-%m-%d').tm_year
                                    elif movie_res.get('release_dates', {}).get('dvd'):
                                        log.debug('Checking year against dvd release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('dvd'),
                                                                     '%Y-%m-%d').tm_year
                                    if not (release_year and release_year == year):
                                        log.debug('removing %s - %s (wrong year: %s)' %
                                                  (movie_res['title'], movie_res['id'],
                                                   str(release_year or movie_res['year'])))
                                        results.remove(movie_res)
                                        continue
                            if movie_res['match'] < MIN_MATCH:
                                log.debug('removing %s (min_match)' % movie_res['title'])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            log.debug('SUCCESS: only one movie remains')
                        else:
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                log.debug('unable to determine correct movie, min_diff too small'
                                          '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' %
                                          (results[0]['title'], results[0]['year'], results[0]['id'],
                                           results[1]['title'], results[1]['year'], results[1]['id']))
                                for r in results:
                                    log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id']))
                                raise PluginError('min_diff')

                        result = movies_info(results[0].get('id'))

                        if not result:
                            result = results[0]

                        movie = session.query(RottenTomatoesMovie).filter(
                            RottenTomatoesMovie.id == result['id']).first()

                        if not movie:
                            movie = RottenTomatoesMovie()
                            movie = _set_movie_details(movie, session, result)
                            session.add(movie)
                            session.commit()


                        if title.lower() != movie.title.lower():
                            log.debug('Saving search result for \'%s\'' % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' % id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']:
            getattr(movie, attr)
        session.commit()
        return movie
Example #6
0
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None):
    """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.

    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup

    """

    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title_parser.parse(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or imdb_id or title):
            raise PluginError('Failed to parse name from %s' % raw_name)

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str():
        return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug('Looking up rotten tomatoes information for %s' % id_str())

    movie = None

    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).\
                filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = session.query(RottenTomatoesAlternateId).\
                filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\
                filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first()
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            found = session.query(RottenTomatoesSearchResult). \
                    filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str())
            try:
                imdb_alt_id = movie.alternate_ids and filter(lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, 'imdb')
                else:
                    result = movies_info(movie.id)
                movie = set_movie_details(movie, session, result)
                session.merge(movie)
            except URLError:
                log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.')
        else:
            log.debug('Movie %s information restored from cache.' % id_str())
    else:
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str())
        try:
            # Lookups using imdb_id
            if imdb_id:
                log.debug('Using IMDB alias %s.' % imdb_id)
                result = movies_alias(imdb_id, 'imdb')
                if result:
                    mismatch = []
                    if title and difflib.SequenceMatcher(lambda x: x == ' ', re.sub('\s+\(.*\)$', '', result['title'].lower()),
                            title.lower()).ratio() < MIN_MATCH:
                        mismatch.append('the title (%s <-?-> %s)' % (title, result['title']))
                    if year and fabs(result['year'] - year) > 1:
                        mismatch.append('the year (%s <-?-> %s)' % (year, result['year']))
                        release_year = None
                        if result.get('release_dates', {}).get('theater'):
                            log.debug('Checking year against theater release date')
                            release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the theater release (%s)' % release_year)
                        elif result.get('release_dates', {}).get('dvd'):
                            log.debug('Checking year against dvd release date')
                            release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the DVD release (%s)' % release_year)
                    if mismatch:
                        log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' % \
                            (imdb_id, ', or '.join(mismatch)))
                    else:
                        log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id']))
                        movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result.get('id')).first()
                        if movie:
                            log.debug('Movie %s was in database, but did not have the imdb_id stored, '
                                    'forcing an update' % movie)
                            movie = set_movie_details(movie, session, result)
                            session.merge(movie)
                        else:
                            log.debug('%s was not in database, setting info.' % result['title'])
                            movie = RottenTomatoesMovie()
                            movie = set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError('set_movie_details returned %s' % movie)
                            session.add(movie)
                else:
                    log.debug('IMDB alias %s returned no results.' % imdb_id)
            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = set_movie_details(movie, session, result)
                    session.add(movie)
            if not movie and title:
                log.verbose('Searching from rt `%s`' % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(lambda x: x == ' ',
                                    movie_res['title'].lower(), title.lower())
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year') and movie_res['year'] != year:
                                release_year = False
                                if movie_res.get('release_dates', {}).get('theater'):
                                    log.debug('Checking year against theater release date')
                                    release_year = time.strptime(movie_res['release_dates'].get('theater'), '%Y-%m-%d').tm_year
                                elif movie_res.get('release_dates', {}).get('dvd'):
                                    log.debug('Checking year against dvd release date')
                                    release_year = time.strptime(movie_res['release_dates'].get('dvd'), '%Y-%m-%d').tm_year
                                if not (release_year and release_year == year):
                                    log.debug('removing %s - %s (wrong year: %s)' % (movie_res['title'],
                                        movie_res['id'], str(release_year or movie_res['year'])))
                                    results.remove(movie_res)
                                    continue
                            if movie_res['match'] < MIN_MATCH:
                                log.debug('removing %s (min_match)' % movie_res['title'])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            log.debug('SUCCESS: only one movie remains')
                        else:
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                log.debug('unable to determine correct movie, min_diff too small'
                                        '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' %
                                        (results[0]['title'], results[0]['year'], results[0]['id'],
                                            results[1]['title'], results[1]['year'], results[1]['id']))
                                for r in results:
                                    log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'],
                                        r['id']))
                                raise PluginError('min_diff')

                        imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb')
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                        else:
                            result = movies_info(results[0].get('id'))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                        try:
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and
                                    alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids):
                                log.warning('Adding flexget_imdb alternate id %s for movie %s' %
                                        (imdb_id, movie))
                                movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\
                                        imdb_id.lstrip('t')))
                            session.add(movie)
                        except IntegrityError:
                            log.warning('Found movie %s in database after search even though we '
                                'already looked, updating it with search result.' % movie)
                            session.rollback()
                            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result['id']).first()
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and
                                    alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids):
                                log.warning('Adding flexget_imdb alternate id %s for movie %s' %
                                        (imdb_id, movie))
                                movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\
                                        imdb_id.lstrip('t')))
                            session.merge(movie)

                        if title.lower() != movie.title.lower():
                            log.debug('Saving search result for \'%s\'' % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' % id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']:
            getattr(movie, attr)
        session.commit()
        return movie
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None,
                 only_cached=False, session=None):
    """
    Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.
    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup

    """

    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title_parser.parse(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or imdb_id or title):
            raise PluginError('Failed to parse name from %s' % smart_match)

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str():
        return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug('Looking up rotten tomatoes information for %s' % id_str())

    movie = None

    # Try to lookup from cache
    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).\
            filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = session.query(RottenTomatoesAlternateId).\
            filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\
            filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first()
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            log.debug('No matches in movie cache found, checking search cache.')
            found = session.query(RottenTomatoesSearchResult).\
                filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                log.debug('Movie found in search cache.')
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str())
            try:
                imdb_alt_id = movie.alternate_ids and filter(
                    lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, 'imdb')
                else:
                    result = movies_info(movie.id)
                movie = _set_movie_details(movie, session, result)
                session.merge(movie)
            except URLError:
                log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.')
        else:
            log.debug('Movie %s information restored from cache.' % id_str())
    else:
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str())
        try:
            # Lookups using imdb_id
            # TODO: extract to method
            if imdb_id:
                log.debug('Using IMDB alias %s.' % imdb_id)
                result = movies_alias(imdb_id, 'imdb')
                if result:
                    mismatch = []
                    min_match = difflib.SequenceMatcher(lambda x: x == ' ',
                                                        re.sub('\s+\(.*\)$', '', result['title'].lower()),
                                                        title.lower()).ratio() < MIN_MATCH
                    if title and min_match:
                        mismatch.append('the title (%s <-?-> %s)' % (title, result['title']))
                    result['year'] = int(result['year'])
                    if year and fabs(result['year'] - year) > 1:
                        mismatch.append('the year (%s <-?-> %s)' % (year, result['year']))
                        release_year = None
                        if result.get('release_dates', {}).get('theater'):
                            log.debug('Checking year against theater release date')
                            release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the theater release (%s)' % release_year)
                        elif result.get('release_dates', {}).get('dvd'):
                            log.debug('Checking year against dvd release date')
                            release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the DVD release (%s)' % release_year)
                    if mismatch:
                        log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' %
                                    (imdb_id, ', or '.join(mismatch)))
                    else:
                        log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id']))
                        movie = session.query(RottenTomatoesMovie).\
                            filter(RottenTomatoesMovie.id == result.get('id')).first()
                        if movie:
                            log.debug('Movie %s was in database, but did not have the imdb_id stored, '
                                      'forcing an update' % movie)
                            movie = _set_movie_details(movie, session, result)
                            session.merge(movie)
                        else:
                            log.debug('%s was not in database, setting info.' % result['title'])
                            movie = RottenTomatoesMovie()
                            movie = _set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError('set_movie_details returned %s' % movie)
                            session.add(movie)
                else:
                    log.debug('IMDB alias %s returned no results.' % imdb_id)

            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = _set_movie_details(movie, session, result)
                    session.add(movie)

            if not movie and title:
                # TODO: Extract to method
                log.verbose('Searching from rt `%s`' % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(
                                lambda x: x == ' ', movie_res['title'].lower(), title.lower())
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year'):
                                movie_res['year'] = int(movie_res['year'])
                                if movie_res['year'] != year:
                                    release_year = False
                                    if movie_res.get('release_dates', {}).get('theater'):
                                        log.debug('Checking year against theater release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('theater'),
                                                                     '%Y-%m-%d').tm_year
                                    elif movie_res.get('release_dates', {}).get('dvd'):
                                        log.debug('Checking year against dvd release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('dvd'),
                                                                     '%Y-%m-%d').tm_year
                                    if not (release_year and release_year == year):
                                        log.debug('removing %s - %s (wrong year: %s)' %
                                                  (movie_res['title'], movie_res['id'],
                                                   str(release_year or movie_res['year'])))
                                        results.remove(movie_res)
                                        continue
                            if movie_res['match'] < MIN_MATCH:
                                log.debug('removing %s (min_match)' % movie_res['title'])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            log.debug('SUCCESS: only one movie remains')
                        else:
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                log.debug('unable to determine correct movie, min_diff too small'
                                          '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' %
                                          (results[0]['title'], results[0]['year'], results[0]['id'],
                                           results[1]['title'], results[1]['year'], results[1]['id']))
                                for r in results:
                                    log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id']))
                                raise PluginError('min_diff')

                        imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb')
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                        else:
                            result = movies_info(results[0].get('id'))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                        movie = _set_movie_details(movie, session, result)
                        if imdb_id and not filter(
                            lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'),
                                movie.alternate_ids):  # TODO: get rid of these confusing lambdas
                            log.warning('Adding flexget_imdb alternate id %s for movie %s' %
                                        (imdb_id, movie))
                            movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',
                                                                                 imdb_id.lstrip('t')))
                        session.add(movie)
                        session.commit()

                        if title.lower() != movie.title.lower():
                            log.debug('Saving search result for \'%s\'' % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' % id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']:
            getattr(movie, attr)
        session.commit()
        return movie