예제 #1
0
 def test_withsession(self):
     session = Session()
     persist = SimplePersistence('testplugin', session=session)
     persist['aoeu'] = 'test'
     assert persist['aoeu'] == 'test'
     # Make sure it didn't commit or close our session
     session.rollback()
     assert 'aoeu' not in persist
예제 #2
0
파일: archive.py 프로젝트: DColl/Flexget
def consolidate():
    """
    Converts previous archive data model to new one.
    """

    session = Session()
    try:
        log.verbose('Checking archive size ...')
        count = session.query(ArchiveEntry).count()
        log.verbose('Found %i items to migrate, this can be aborted with CTRL-C safely.' % count)

        # consolidate old data
        from progressbar import ProgressBar, Percentage, Bar, ETA

        widgets = ['Process - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')]
        bar = ProgressBar(widgets=widgets, maxval=count).start()

        # id's for duplicates
        duplicates = []

        for index, orig in enumerate(session.query(ArchiveEntry).yield_per(5)):
            bar.update(index)

            # item already processed
            if orig.id in duplicates:
                continue

            # item already migrated
            if orig.sources:
                log.info('Database looks like it has already been consolidated, '
                         'item %s has already sources ...' % orig.title)
                session.rollback()
                return

            # add legacy task to the sources list
            orig.sources.append(get_source(orig.task, session))
            # remove task, deprecated .. well, let's still keep it ..
            #orig.task = None

            for dupe in session.query(ArchiveEntry).\
                filter(ArchiveEntry.id != orig.id).\
                filter(ArchiveEntry.title == orig.title).\
                    filter(ArchiveEntry.url == orig.url).all():
                orig.sources.append(get_source(dupe.task, session))
                duplicates.append(dupe.id)

        if duplicates:
            log.info('Consolidated %i items, removing duplicates ...' % len(duplicates))
            for id in duplicates:
                session.query(ArchiveEntry).filter(ArchiveEntry.id == id).delete()
        session.commit()
        log.info('Completed! This does NOT need to be ran again.')
    except KeyboardInterrupt:
        session.rollback()
        log.critical('Aborted, no changes saved')
    finally:
        session.close()
예제 #3
0
def consolidate():
    """
    Converts previous archive data model to new one.
    """

    session = Session()
    try:
        log.verbose('Checking archive size ...')
        count = session.query(ArchiveEntry).count()
        log.verbose('Found %i items to migrate, this can be aborted with CTRL-C safely.' % count)

        # consolidate old data
        from progressbar import ProgressBar, Percentage, Bar, ETA

        widgets = ['Process - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')]
        bar = ProgressBar(widgets=widgets, maxval=count).start()

        # id's for duplicates
        duplicates = []

        for index, orig in enumerate(session.query(ArchiveEntry).yield_per(5)):
            bar.update(index)

            # item already processed
            if orig.id in duplicates:
                continue

            # item already migrated
            if orig.sources:
                log.info('Database looks like it has already been consolidated, '
                         'item %s has already sources ...' % orig.title)
                session.rollback()
                return

            # add legacy task to the sources list
            orig.sources.append(get_source(orig.task, session))
            # remove task, deprecated .. well, let's still keep it ..
            # orig.task = None

            for dupe in session.query(ArchiveEntry).\
                filter(ArchiveEntry.id != orig.id).\
                filter(ArchiveEntry.title == orig.title).\
                    filter(ArchiveEntry.url == orig.url).all():
                orig.sources.append(get_source(dupe.task, session))
                duplicates.append(dupe.id)

        if duplicates:
            log.info('Consolidated %i items, removing duplicates ...' % len(duplicates))
            for id in duplicates:
                session.query(ArchiveEntry).filter(ArchiveEntry.id == id).delete()
        session.commit()
        log.info('Completed! This does NOT need to be ran again.')
    except KeyboardInterrupt:
        session.rollback()
        log.critical('Aborted, no changes saved')
    finally:
        session.close()
예제 #4
0
class Task(object):
    """
    Represents one task in the configuration.

    **Fires events:**

    * task.execute.before_plugin

      Before a plugin is about to be executed. Note that since this will also include all
      builtin plugins the amount of calls can be quite high

      ``parameters: task, keyword``

    * task.execute.after_plugin

      After a plugin has been executed.

      ``parameters: task, keyword``

    * task.execute.completed

      After task execution has been completed

      ``parameters: task``

    """

    max_reruns = 5

    def __init__(self, manager, name, config=None, options=None):
        """
        :param Manager manager: Manager instance.
        :param string name: Name of the task.
        :param dict config: Task configuration.
        """
        self.name = unicode(name)
        self.manager = manager
        # raw_config should remain the untouched input config
        if config is None:
            config = manager.config['tasks'].get(name, {})
        self.config = copy.deepcopy(config)
        self.prepared_config = None
        if options is None:
            options = copy.copy(self.manager.options.execute)
        elif isinstance(options, dict):
            options_namespace = copy.copy(self.manager.options.execute)
            options_namespace.__dict__.update(options)
            options = options_namespace
        self.options = options

        # simple persistence
        self.simple_persistence = SimpleTaskPersistence(self)

        # not to be reset
        self._rerun_count = 0

        self.config_modified = None

        # use reset to init variables when creating
        self._reset()

    @property
    def undecided(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.undecided

    @property
    def failed(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.failed

    @property
    def rejected(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.rejected

    @property
    def accepted(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.accepted

    @property
    def entries(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.entries

    @property
    def all_entries(self):
        """
        .. deprecated:: Use API v3
        """
        return self._all_entries

    @property
    def is_rerun(self):
        return self._rerun_count

    # TODO: can we get rid of this now that Tasks are instantiated on demand?
    def _reset(self):
        """Reset task state"""
        log.debug('resetting %s' % self.name)
        self.enabled = not self.name.startswith('_')
        self.session = None
        self.priority = 65535

        self.requests = requests.Session()

        # List of all entries in the task
        self._all_entries = EntryContainer()

        self.disabled_phases = []

        # These are just to query what happened in task. Call task.abort to set.
        self.aborted = False
        self.abort_reason = None
        self.silent_abort = False

        self._rerun = False

        # current state
        self.current_phase = None
        self.current_plugin = None

    def __cmp__(self, other):
        return cmp(self.priority, other.priority)

    def __str__(self):
        return '<Task(name=%s,aborted=%s)>' % (self.name, self.aborted)

    def disable_phase(self, phase):
        """Disable ``phase`` from execution.

        All disabled phases are re-enabled by :meth:`Task._reset()` after task
        execution has been completed.

        :param string phase: Name of ``phase``
        :raises ValueError: *phase* could not be found.
        """
        if phase not in task_phases:
            raise ValueError('%s is not a valid phase' % phase)
        if phase not in self.disabled_phases:
            log.debug('Disabling %s phase' % phase)
            self.disabled_phases.append(phase)

    def abort(self, reason='Unknown', silent=False):
        """Abort this task execution, no more plugins will be executed except the abort handling ones."""
        self.aborted = True
        self.abort_reason = reason
        self.silent_abort = silent
        if not self.silent_abort:
            log.warning('Aborting task (plugin: %s)' % self.current_plugin)
        else:
            log.debug('Aborting task (plugin: %s)' % self.current_plugin)
        raise TaskAbort(reason, silent=silent)

    def find_entry(self, category='entries', **values):
        """
        Find and return :class:`~flexget.entry.Entry` with given attributes from task or None

        :param string category: entries, accepted, rejected or failed. Defaults to entries.
        :param values: Key values of entries to be searched
        :return: Entry or None
        """
        cat = getattr(self, category)
        if not isinstance(cat, EntryIterator):
            raise TypeError('category must be a EntryIterator')
        for entry in cat:
            for k, v in values.iteritems():
                if not (k in entry and entry[k] == v):
                    break
            else:
                return entry
        return None

    def plugins(self, phase=None):
        """Get currently enabled plugins.

        :param string phase:
          Optional, limits to plugins currently configured on given phase, sorted in phase order.
        :return:
          An iterator over configured :class:`flexget.plugin.PluginInfo` instances enabled on this task.
        """
        if phase:
            plugins = sorted(get_plugins(phase=phase),
                             key=lambda p: p.phase_handlers[phase],
                             reverse=True)
        else:
            plugins = all_plugins.itervalues()
        return (p for p in plugins if p.name in self.config or p.builtin)

    def __run_task_phase(self, phase):
        """Executes task phase, ie. call all enabled plugins on the task.

        Fires events:

        * task.execute.before_plugin
        * task.execute.after_plugin

        :param string phase: Name of the phase
        """
        if phase not in phase_methods:
            raise Exception('%s is not a valid task phase' % phase)
        # warn if no inputs, filters or outputs in the task
        if phase in ['input', 'filter', 'output']:
            if not self.manager.unit_test:
                # Check that there is at least one manually configured plugin for these phases
                for p in self.plugins(phase):
                    if not p.builtin:
                        break
                else:
                    log.warning(
                        'Task doesn\'t have any %s plugins, you should add (at least) one!'
                        % phase)

        for plugin in self.plugins(phase):
            # Abort this phase if one of the plugins disables it
            if phase in self.disabled_phases:
                return
            # store execute info, except during entry events
            self.current_phase = phase
            self.current_plugin = plugin.name

            if plugin.api_ver == 1:
                # backwards compatibility
                # pass method only task (old behaviour)
                args = (self, )
            else:
                # pass method task, copy of config (so plugin cannot modify it)
                args = (self, copy.copy(self.config.get(plugin.name)))

            try:
                fire_event('task.execute.before_plugin', self, plugin.name)
                response = self.__run_plugin(plugin, phase, args)
                if phase == 'input' and response:
                    # add entries returned by input to self.all_entries
                    for e in response:
                        e.task = self
                    self.all_entries.extend(response)
            finally:
                fire_event('task.execute.after_plugin', self, plugin.name)

    def __run_plugin(self, plugin, phase, args=None, kwargs=None):
        """
        Execute given plugins phase method, with supplied args and kwargs.
        If plugin throws unexpected exceptions :meth:`abort` will be called.

        :param PluginInfo plugin: Plugin to be executed
        :param string phase: Name of the phase to be executed
        :param args: Passed to the plugin
        :param kwargs: Passed to the plugin
        """
        keyword = plugin.name
        method = plugin.phase_handlers[phase]
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}

        # log.trace('Running %s method %s' % (keyword, method))
        # call the plugin
        try:
            return method(*args, **kwargs)
        except TaskAbort:
            raise
        except PluginWarning as warn:
            # check if this warning should be logged only once (may keep repeating)
            if warn.kwargs.get('log_once', False):
                from flexget.utils.log import log_once
                log_once(warn.value, warn.log)
            else:
                warn.log.warning(warn)
        except EntryUnicodeError as eue:
            msg = (
                'Plugin %s tried to create non-unicode compatible entry (key: %s, value: %r)'
                % (keyword, eue.key, eue.value))
            log.critical(msg)
            self.abort(msg)
        except PluginError as err:
            err.log.critical(err.value)
            self.abort(err.value)
        except DependencyError as e:
            msg = (
                'Plugin `%s` cannot be used because dependency `%s` is missing.'
                % (keyword, e.missing))
            log.critical(msg)
            log.debug(e.message)
            self.abort(msg)
        except Warning as e:
            # If warnings have been elevated to errors
            msg = 'Warning during plugin %s: %s' % (keyword, e)
            log.exception(msg)
            self.abort(msg)
        except Exception as e:
            msg = 'BUG: Unhandled error in plugin %s: %s' % (keyword, e)
            log.exception(msg)
            self.abort(msg)

    def rerun(self):
        """Immediately re-run the task after execute has completed,
        task can be re-run up to :attr:`.max_reruns` times."""
        msg = 'Plugin %s has requested task to be ran again after execution has completed.' % self.current_plugin
        # Only print the first request for a rerun to the info log
        log.debug(msg) if self._rerun else log.info(msg)
        if self._rerun_count >= self.max_reruns:
            self._rerun = False
            log.info(
                'Task has been re-run %s times already, stopping for now' %
                self._rerun_count)
            return
        self._rerun = True

    def config_changed(self):
        """
        Sets config_modified flag to True for the remainder of this run.
        Used when the db changes, and all entries need to be reprocessed.
        """
        self.config_modified = True

    @useTaskLogging
    def execute(self):
        """
        Executes the the task.

        If :attr:`.enabled` is False task is not executed. Certain :attr:`.options`
        affect how execution is handled.

        - :attr:`.options.disable_phases` is a list of phases that are not enabled
          for this execution.
        - :attr:`.options.inject` is a list of :class:`Entry` instances used instead
          of running input phase.
        """
        if not self.enabled:
            log.debug('Not running disabled task %s' % self.name)
        if self.options.cron:
            self.manager.db_cleanup()

        self._reset()
        log.debug('executing %s' % self.name)
        if not self.enabled:
            log.debug('task %s disabled during preparation, not running' %
                      self.name)
            return

        # Handle keyword args
        if self.options.learn:
            log.info('Disabling download and output phases because of --learn')
            self.disable_phase('download')
            self.disable_phase('output')
        if self.options.disable_phases:
            map(self.disable_phase, self.options.disable_phases)
        if self.options.inject:
            # If entries are passed for this execution (eg. rerun), disable the input phase
            self.disable_phase('input')
            self.all_entries.extend(self.options.inject)

        log.debug('starting session')
        self.session = Session()

        # Save current config hash and set config_modidied flag
        config_hash = hashlib.md5(str(sorted(self.config.items()))).hexdigest()
        last_hash = self.session.query(TaskConfigHash).filter(
            TaskConfigHash.task == self.name).first()
        if self.is_rerun:
            # Restore the config to state right after start phase
            if self.prepared_config:
                self.config = copy.deepcopy(self.prepared_config)
            else:
                log.error('BUG: No prepared_config on rerun, please report.')
            self.config_modified = False
        elif not last_hash:
            self.config_modified = True
            last_hash = TaskConfigHash(task=self.name, hash=config_hash)
            self.session.add(last_hash)
        elif last_hash.hash != config_hash:
            self.config_modified = True
            last_hash.hash = config_hash
        else:
            self.config_modified = False

        # run phases
        try:
            for phase in task_phases:
                if phase in self.disabled_phases:
                    # log keywords not executed
                    for plugin in self.plugins(phase):
                        if plugin.name in self.config:
                            log.info(
                                'Plugin %s is not executed because %s phase is disabled (e.g. --test)'
                                % (plugin.name, phase))
                    continue
                if phase == 'start' and self.is_rerun:
                    log.debug('skipping task_start during rerun')
                elif phase == 'exit' and self._rerun:
                    log.debug(
                        'not running task_exit yet because task will rerun')
                else:
                    # run all plugins with this phase
                    self.__run_task_phase(phase)
                    if phase == 'start':
                        # Store a copy of the config state after start phase to restore for reruns
                        self.prepared_config = copy.deepcopy(self.config)
        except TaskAbort:
            # Roll back the session before calling abort handlers
            self.session.rollback()
            try:
                self.__run_task_phase('abort')
                # Commit just the abort handler changes if no exceptions are raised there
                self.session.commit()
            except TaskAbort as e:
                log.exception('abort handlers aborted: %s' % e)
            raise
        else:
            for entry in self.all_entries:
                entry.complete()
            log.debug('committing session')
            self.session.commit()
            fire_event('task.execute.completed', self)
        finally:
            # this will cause database rollback on exception
            self.session.close()

        # rerun task
        if self._rerun:
            log.info(
                'Rerunning the task in case better resolution can be achieved.'
            )
            self._rerun_count += 1
            # TODO: Potential optimization is to take snapshots (maybe make the ones backlog uses built in instead of
            # taking another one) after input and just inject the same entries for the rerun
            self.execute()

    def __eq__(self, other):
        if hasattr(other, 'name'):
            return self.name == other.name
        return NotImplemented

    def __copy__(self):
        new = type(self)(self.manager, self.name, self.config, self.options)
        # Update all the variables of new instance to match our own
        new.__dict__.update(self.__dict__)
        # Some mutable objects need to be copies
        new.options = copy.copy(self.options)
        new.config = copy.deepcopy(self.config)
        return new

    copy = __copy__
예제 #5
0
def lookup_movie(
    title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None
):
    """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.

    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup

    """

    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title_parser.parse(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == "" and not (rottentomatoes_id or imdb_id or title):
            raise PluginError("Failed to parse name from %s" % smart_match)

    if title:
        search_string = title.lower()
        if year:
            search_string = "%s %s" % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError("No criteria specified for rotten tomatoes lookup")

    def id_str():
        return "<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>" % (title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug("Looking up rotten tomatoes information for %s" % id_str())

    movie = None

    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = (
            session.query(RottenTomatoesAlternateId)
            .filter(RottenTomatoesAlternateId.name.in_(["imdb", "flexget_imdb"]))
            .filter(RottenTomatoesAlternateId.id == imdb_id.lstrip("t"))
            .first()
        )
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            log.debug("No matches in movie cache found, checking search cache.")
            found = (
                session.query(RottenTomatoesSearchResult)
                .filter(func.lower(RottenTomatoesSearchResult.search) == search_string)
                .first()
            )
            if found and found.movie:
                log.debug("Movie found in search cache.")
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug("Cache has expired for %s, attempting to refresh from Rotten Tomatoes." % id_str())
            try:
                imdb_alt_id = (
                    movie.alternate_ids
                    and filter(lambda alt_id: alt_id.name in ["imdb", "flexget_imdb"], movie.alternate_ids)[0].id
                )
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, "imdb")
                else:
                    result = movies_info(movie.id)
                movie = set_movie_details(movie, session, result)
                session.merge(movie)
            except URLError:
                log.error("Error refreshing movie details from Rotten Tomatoes, cached info being used.")
        else:
            log.debug("Movie %s information restored from cache." % id_str())
    else:
        if only_cached:
            raise PluginError("Movie %s not found from cache" % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug("Movie %s not found in cache, looking up from rotten tomatoes." % id_str())
        try:
            # Lookups using imdb_id
            if imdb_id:
                log.debug("Using IMDB alias %s." % imdb_id)
                result = movies_alias(imdb_id, "imdb")
                if result:
                    mismatch = []
                    if (
                        title
                        and difflib.SequenceMatcher(
                            lambda x: x == " ", re.sub("\s+\(.*\)$", "", result["title"].lower()), title.lower()
                        ).ratio()
                        < MIN_MATCH
                    ):
                        mismatch.append("the title (%s <-?-> %s)" % (title, result["title"]))
                    result["year"] = int(result["year"])
                    if year and fabs(result["year"] - year) > 1:
                        mismatch.append("the year (%s <-?-> %s)" % (year, result["year"]))
                        release_year = None
                        if result.get("release_dates", {}).get("theater"):
                            log.debug("Checking year against theater release date")
                            release_year = time.strptime(result["release_dates"].get("theater"), "%Y-%m-%d").tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append("the theater release (%s)" % release_year)
                        elif result.get("release_dates", {}).get("dvd"):
                            log.debug("Checking year against dvd release date")
                            release_year = time.strptime(result["release_dates"].get("dvd"), "%Y-%m-%d").tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append("the DVD release (%s)" % release_year)
                    if mismatch:
                        log.warning(
                            "Rotten Tomatoes had an imdb alias for %s but it didn't match %s."
                            % (imdb_id, ", or ".join(mismatch))
                        )
                    else:
                        log.debug("imdb_id %s maps to rt_id %s, checking db for info." % (imdb_id, result["id"]))
                        movie = (
                            session.query(RottenTomatoesMovie)
                            .filter(RottenTomatoesMovie.id == result.get("id"))
                            .first()
                        )
                        if movie:
                            log.debug(
                                "Movie %s was in database, but did not have the imdb_id stored, "
                                "forcing an update" % movie
                            )
                            movie = set_movie_details(movie, session, result)
                            session.merge(movie)
                        else:
                            log.debug("%s was not in database, setting info." % result["title"])
                            movie = RottenTomatoesMovie()
                            movie = set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError("set_movie_details returned %s" % movie)
                            session.add(movie)
                else:
                    log.debug("IMDB alias %s returned no results." % imdb_id)
            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = set_movie_details(movie, session, result)
                    session.add(movie)
            if not movie and title:
                log.verbose("Searching from rt `%s`" % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get("movies")
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(lambda x: x == " ", movie_res["title"].lower(), title.lower())
                            movie_res["match"] = seq.ratio()
                        results.sort(key=lambda x: x["match"], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get("year"):
                                movie_res["year"] = int(movie_res["year"])
                                if movie_res["year"] != year:
                                    release_year = False
                                    if movie_res.get("release_dates", {}).get("theater"):
                                        log.debug("Checking year against theater release date")
                                        release_year = time.strptime(
                                            movie_res["release_dates"].get("theater"), "%Y-%m-%d"
                                        ).tm_year
                                    elif movie_res.get("release_dates", {}).get("dvd"):
                                        log.debug("Checking year against dvd release date")
                                        release_year = time.strptime(
                                            movie_res["release_dates"].get("dvd"), "%Y-%m-%d"
                                        ).tm_year
                                    if not (release_year and release_year == year):
                                        log.debug(
                                            "removing %s - %s (wrong year: %s)"
                                            % (
                                                movie_res["title"],
                                                movie_res["id"],
                                                str(release_year or movie_res["year"]),
                                            )
                                        )
                                        results.remove(movie_res)
                                        continue
                            if movie_res["match"] < MIN_MATCH:
                                log.debug("removing %s (min_match)" % movie_res["title"])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError("no appropiate results")

                        if len(results) == 1:
                            log.debug("SUCCESS: only one movie remains")
                        else:
                            # Check min difference between best two hits
                            diff = results[0]["match"] - results[1]["match"]
                            if diff < MIN_DIFF:
                                log.debug(
                                    "unable to determine correct movie, min_diff too small"
                                    "(`%s (%d) - %s` <-?-> `%s (%d) - %s`)"
                                    % (
                                        results[0]["title"],
                                        results[0]["year"],
                                        results[0]["id"],
                                        results[1]["title"],
                                        results[1]["year"],
                                        results[1]["id"],
                                    )
                                )
                                for r in results:
                                    log.debug("remain: %s (match: %s) %s" % (r["title"], r["match"], r["id"]))
                                raise PluginError("min_diff")

                        imdb_alt_id = results[0].get("alternate_ids", {}).get("imdb")
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                        else:
                            result = movies_info(results[0].get("id"))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                        try:
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(
                                lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"),
                                movie.alternate_ids,
                            ):
                                log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie))
                                movie.alternate_ids.append(
                                    RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t"))
                                )
                            session.add(movie)
                            session.commit()
                        except IntegrityError:
                            log.warning(
                                "Found movie %s in database after search even though we "
                                "already looked, updating it with search result." % movie
                            )
                            session.rollback()
                            movie = (
                                session.query(RottenTomatoesMovie)
                                .filter(RottenTomatoesMovie.id == result["id"])
                                .first()
                            )
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(
                                lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"),
                                movie.alternate_ids,
                            ):
                                log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie))
                                movie.alternate_ids.append(
                                    RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t"))
                                )
                            session.merge(movie)
                            session.commit()

                        if title.lower() != movie.title.lower():
                            log.debug("Saving search result for '%s'" % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError("Error looking up movie from RottenTomatoes")

    if not movie:
        raise PluginError("No results found from rotten tomatoes for %s" % id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ["alternate_ids", "cast", "directors", "genres", "links", "posters", "release_dates"]:
            getattr(movie, attr)
        session.commit()
        return movie
예제 #6
0
파일: task.py 프로젝트: BrainDamage/Flexget
class Task(object):

    """
    Represents one task in the configuration.

    **Fires events:**

    * task.execute.before_plugin

      Before a plugin is about to be executed. Note that since this will also include all
      builtin plugins the amount of calls can be quite high

      ``parameters: task, keyword``

    * task.execute.after_plugin

      After a plugin has been executed.

      ``parameters: task, keyword``

    * task.execute.completed

      After task execution has been completed

      ``parameters: task``

    """

    max_reruns = 5

    def __init__(self, manager, name, config=None, options=None):
        """
        :param Manager manager: Manager instance.
        :param string name: Name of the task.
        :param dict config: Task configuration.
        """
        self.name = unicode(name)
        self.manager = manager
        # raw_config should remain the untouched input config
        if config is None:
            config = manager.config['tasks'].get(name, {})
        self.config = copy.deepcopy(config)
        self.prepared_config = None
        if options is None:
            options = copy.copy(self.manager.options.execute)
        elif isinstance(options, dict):
            options_namespace = copy.copy(self.manager.options.execute)
            options_namespace.__dict__.update(options)
            options = options_namespace
        self.options = options

        # simple persistence
        self.simple_persistence = SimpleTaskPersistence(self)

        # not to be reset
        self._rerun_count = 0

        self.config_modified = None

        # use reset to init variables when creating
        self._reset()

    @property
    def undecided(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.undecided

    @property
    def failed(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.failed

    @property
    def rejected(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.rejected

    @property
    def accepted(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.accepted

    @property
    def entries(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.entries

    @property
    def all_entries(self):
        """
        .. deprecated:: Use API v3
        """
        return self._all_entries

    @property
    def is_rerun(self):
        return self._rerun_count

    # TODO: can we get rid of this now that Tasks are instantiated on demand?
    def _reset(self):
        """Reset task state"""
        log.debug('resetting %s' % self.name)
        self.enabled = not self.name.startswith('_')
        self.session = None
        self.priority = 65535

        self.requests = requests.Session()

        # List of all entries in the task
        self._all_entries = EntryContainer()

        self.disabled_phases = []

        # These are just to query what happened in task. Call task.abort to set.
        self.aborted = False
        self.abort_reason = None
        self.silent_abort = False

        self._rerun = False

        # current state
        self.current_phase = None
        self.current_plugin = None

    def __cmp__(self, other):
        return cmp(self.priority, other.priority)

    def __str__(self):
        return '<Task(name=%s,aborted=%s)>' % (self.name, self.aborted)

    def disable_phase(self, phase):
        """Disable ``phase`` from execution.

        All disabled phases are re-enabled by :meth:`Task._reset()` after task
        execution has been completed.

        :param string phase: Name of ``phase``
        :raises ValueError: *phase* could not be found.
        """
        if phase not in task_phases:
            raise ValueError('%s is not a valid phase' % phase)
        if phase not in self.disabled_phases:
            log.debug('Disabling %s phase' % phase)
            self.disabled_phases.append(phase)

    def abort(self, reason='Unknown', silent=False):
        """Abort this task execution, no more plugins will be executed except the abort handling ones."""
        self.aborted = True
        self.abort_reason = reason
        self.silent_abort = silent
        if not self.silent_abort:
            log.warning('Aborting task (plugin: %s)' % self.current_plugin)
        else:
            log.debug('Aborting task (plugin: %s)' % self.current_plugin)
        raise TaskAbort(reason, silent=silent)

    def find_entry(self, category='entries', **values):
        """
        Find and return :class:`~flexget.entry.Entry` with given attributes from task or None

        :param string category: entries, accepted, rejected or failed. Defaults to entries.
        :param values: Key values of entries to be searched
        :return: Entry or None
        """
        cat = getattr(self, category)
        if not isinstance(cat, EntryIterator):
            raise TypeError('category must be a EntryIterator')
        for entry in cat:
            for k, v in values.iteritems():
                if not (k in entry and entry[k] == v):
                    break
            else:
                return entry
        return None

    def plugins(self, phase=None):
        """Get currently enabled plugins.

        :param string phase:
          Optional, limits to plugins currently configured on given phase, sorted in phase order.
        :return:
          An iterator over configured :class:`flexget.plugin.PluginInfo` instances enabled on this task.
        """
        if phase:
            plugins = sorted(get_plugins(phase=phase), key=lambda p: p.phase_handlers[phase], reverse=True)
        else:
            plugins = all_plugins.itervalues()
        return (p for p in plugins if p.name in self.config or p.builtin)

    def __run_task_phase(self, phase):
        """Executes task phase, ie. call all enabled plugins on the task.

        Fires events:

        * task.execute.before_plugin
        * task.execute.after_plugin

        :param string phase: Name of the phase
        """
        if phase not in phase_methods:
            raise Exception('%s is not a valid task phase' % phase)
        # warn if no inputs, filters or outputs in the task
        if phase in ['input', 'filter', 'output']:
            if not self.manager.unit_test:
                # Check that there is at least one manually configured plugin for these phases
                for p in self.plugins(phase):
                    if not p.builtin:
                        break
                else:
                    log.warning('Task doesn\'t have any %s plugins, you should add (at least) one!' % phase)

        for plugin in self.plugins(phase):
            # Abort this phase if one of the plugins disables it
            if phase in self.disabled_phases:
                return
            # store execute info, except during entry events
            self.current_phase = phase
            self.current_plugin = plugin.name

            if plugin.api_ver == 1:
                # backwards compatibility
                # pass method only task (old behaviour)
                args = (self,)
            else:
                # pass method task, copy of config (so plugin cannot modify it)
                args = (self, copy.copy(self.config.get(plugin.name)))

            try:
                fire_event('task.execute.before_plugin', self, plugin.name)
                response = self.__run_plugin(plugin, phase, args)
                if phase == 'input' and response:
                    # add entries returned by input to self.all_entries
                    for e in response:
                        e.task = self
                    self.all_entries.extend(response)
            finally:
                fire_event('task.execute.after_plugin', self, plugin.name)

    def __run_plugin(self, plugin, phase, args=None, kwargs=None):
        """
        Execute given plugins phase method, with supplied args and kwargs.
        If plugin throws unexpected exceptions :meth:`abort` will be called.

        :param PluginInfo plugin: Plugin to be executed
        :param string phase: Name of the phase to be executed
        :param args: Passed to the plugin
        :param kwargs: Passed to the plugin
        """
        keyword = plugin.name
        method = plugin.phase_handlers[phase]
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}

        # log.trace('Running %s method %s' % (keyword, method))
        # call the plugin
        try:
            return method(*args, **kwargs)
        except TaskAbort:
            raise
        except PluginWarning as warn:
            # check if this warning should be logged only once (may keep repeating)
            if warn.kwargs.get('log_once', False):
                from flexget.utils.log import log_once
                log_once(warn.value, warn.log)
            else:
                warn.log.warning(warn)
        except EntryUnicodeError as eue:
            msg = ('Plugin %s tried to create non-unicode compatible entry (key: %s, value: %r)' %
                   (keyword, eue.key, eue.value))
            log.critical(msg)
            self.abort(msg)
        except PluginError as err:
            err.log.critical(err.value)
            self.abort(err.value)
        except DependencyError as e:
            msg = ('Plugin `%s` cannot be used because dependency `%s` is missing.' %
                   (keyword, e.missing))
            log.critical(msg)
            log.debug(e.message)
            self.abort(msg)
        except Warning as e:
            # If warnings have been elevated to errors
            msg = 'Warning during plugin %s: %s' % (keyword, e)
            log.exception(msg)
            self.abort(msg)
        except Exception as e:
            msg = 'BUG: Unhandled error in plugin %s: %s' % (keyword, e)
            log.exception(msg)
            self.abort(msg)

    def rerun(self):
        """Immediately re-run the task after execute has completed,
        task can be re-run up to :attr:`.max_reruns` times."""
        msg = 'Plugin %s has requested task to be ran again after execution has completed.' % self.current_plugin
        # Only print the first request for a rerun to the info log
        log.debug(msg) if self._rerun else log.info(msg)
        if self._rerun_count >= self.max_reruns:
            self._rerun = False
            log.info('Task has been re-run %s times already, stopping for now' % self._rerun_count)
            return
        self._rerun = True

    def config_changed(self):
        """
        Sets config_modified flag to True for the remainder of this run.
        Used when the db changes, and all entries need to be reprocessed.
        """
        self.config_modified = True

    @useTaskLogging
    def execute(self):
        """
        Executes the the task.

        If :attr:`.enabled` is False task is not executed. Certain :attr:`.options`
        affect how execution is handled.

        - :attr:`.options.disable_phases` is a list of phases that are not enabled
          for this execution.
        - :attr:`.options.inject` is a list of :class:`Entry` instances used instead
          of running input phase.
        """
        if not self.enabled:
            log.debug('Not running disabled task %s' % self.name)
        if self.options.cron:
            self.manager.db_cleanup()

        self._reset()
        log.debug('executing %s' % self.name)
        if not self.enabled:
            log.debug('task %s disabled during preparation, not running' % self.name)
            return

        # Handle keyword args
        if self.options.learn:
            log.info('Disabling download and output phases because of --learn')
            self.disable_phase('download')
            self.disable_phase('output')
        if self.options.disable_phases:
            map(self.disable_phase, self.options.disable_phases)
        if self.options.inject:
            # If entries are passed for this execution (eg. rerun), disable the input phase
            self.disable_phase('input')
            self.all_entries.extend(self.options.inject)

        log.debug('starting session')
        self.session = Session()

        # Save current config hash and set config_modidied flag
        config_hash = hashlib.md5(str(sorted(self.config.items()))).hexdigest()
        last_hash = self.session.query(TaskConfigHash).filter(TaskConfigHash.task == self.name).first()
        if self.is_rerun:
            # Restore the config to state right after start phase
            if self.prepared_config:
                self.config = copy.deepcopy(self.prepared_config)
            else:
                log.error('BUG: No prepared_config on rerun, please report.')
            self.config_modified = False
        elif not last_hash:
            self.config_modified = True
            last_hash = TaskConfigHash(task=self.name, hash=config_hash)
            self.session.add(last_hash)
        elif last_hash.hash != config_hash:
            self.config_modified = True
            last_hash.hash = config_hash
        else:
            self.config_modified = False

        # run phases
        try:
            for phase in task_phases:
                if phase in self.disabled_phases:
                    # log keywords not executed
                    for plugin in self.plugins(phase):
                        if plugin.name in self.config:
                            log.info('Plugin %s is not executed because %s phase is disabled (e.g. --test)' %
                                     (plugin.name, phase))
                    continue
                if phase == 'start' and self.is_rerun:
                    log.debug('skipping task_start during rerun')
                elif phase == 'exit' and self._rerun:
                    log.debug('not running task_exit yet because task will rerun')
                else:
                    # run all plugins with this phase
                    self.__run_task_phase(phase)
                    if phase == 'start':
                        # Store a copy of the config state after start phase to restore for reruns
                        self.prepared_config = copy.deepcopy(self.config)
        except TaskAbort:
            # Roll back the session before calling abort handlers
            self.session.rollback()
            try:
                self.__run_task_phase('abort')
                # Commit just the abort handler changes if no exceptions are raised there
                self.session.commit()
            except TaskAbort as e:
                log.exception('abort handlers aborted: %s' % e)
            raise
        else:
            for entry in self.all_entries:
                entry.complete()
            log.debug('committing session')
            self.session.commit()
            fire_event('task.execute.completed', self)
        finally:
            # this will cause database rollback on exception
            self.session.close()

        # rerun task
        if self._rerun:
            log.info('Rerunning the task in case better resolution can be achieved.')
            self._rerun_count += 1
            # TODO: Potential optimization is to take snapshots (maybe make the ones backlog uses built in instead of
            # taking another one) after input and just inject the same entries for the rerun
            self.execute()

    def __eq__(self, other):
        if hasattr(other, 'name'):
            return self.name == other.name
        return NotImplemented

    def __copy__(self):
        new = type(self)(self.manager, self.name, self.config, self.options)
        # Update all the variables of new instance to match our own
        new.__dict__.update(self.__dict__)
        # Some mutable objects need to be copies
        new.options = copy.copy(self.options)
        new.config = copy.deepcopy(self.config)
        return new

    copy = __copy__
예제 #7
0
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None):
    """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.

    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup

    """

    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title_parser.parse(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or imdb_id or title):
            raise PluginError('Failed to parse name from %s' % raw_name)

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str():
        return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug('Looking up rotten tomatoes information for %s' % id_str())

    movie = None

    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).\
                filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = session.query(RottenTomatoesAlternateId).\
                filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\
                filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first()
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            found = session.query(RottenTomatoesSearchResult). \
                    filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str())
            try:
                imdb_alt_id = movie.alternate_ids and filter(lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, 'imdb')
                else:
                    result = movies_info(movie.id)
                movie = set_movie_details(movie, session, result)
                session.merge(movie)
            except URLError:
                log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.')
        else:
            log.debug('Movie %s information restored from cache.' % id_str())
    else:
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str())
        try:
            # Lookups using imdb_id
            if imdb_id:
                log.debug('Using IMDB alias %s.' % imdb_id)
                result = movies_alias(imdb_id, 'imdb')
                if result:
                    mismatch = []
                    if title and difflib.SequenceMatcher(lambda x: x == ' ', re.sub('\s+\(.*\)$', '', result['title'].lower()),
                            title.lower()).ratio() < MIN_MATCH:
                        mismatch.append('the title (%s <-?-> %s)' % (title, result['title']))
                    if year and fabs(result['year'] - year) > 1:
                        mismatch.append('the year (%s <-?-> %s)' % (year, result['year']))
                        release_year = None
                        if result.get('release_dates', {}).get('theater'):
                            log.debug('Checking year against theater release date')
                            release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the theater release (%s)' % release_year)
                        elif result.get('release_dates', {}).get('dvd'):
                            log.debug('Checking year against dvd release date')
                            release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the DVD release (%s)' % release_year)
                    if mismatch:
                        log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' % \
                            (imdb_id, ', or '.join(mismatch)))
                    else:
                        log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id']))
                        movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result.get('id')).first()
                        if movie:
                            log.debug('Movie %s was in database, but did not have the imdb_id stored, '
                                    'forcing an update' % movie)
                            movie = set_movie_details(movie, session, result)
                            session.merge(movie)
                        else:
                            log.debug('%s was not in database, setting info.' % result['title'])
                            movie = RottenTomatoesMovie()
                            movie = set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError('set_movie_details returned %s' % movie)
                            session.add(movie)
                else:
                    log.debug('IMDB alias %s returned no results.' % imdb_id)
            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = set_movie_details(movie, session, result)
                    session.add(movie)
            if not movie and title:
                log.verbose('Searching from rt `%s`' % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(lambda x: x == ' ',
                                    movie_res['title'].lower(), title.lower())
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year') and movie_res['year'] != year:
                                release_year = False
                                if movie_res.get('release_dates', {}).get('theater'):
                                    log.debug('Checking year against theater release date')
                                    release_year = time.strptime(movie_res['release_dates'].get('theater'), '%Y-%m-%d').tm_year
                                elif movie_res.get('release_dates', {}).get('dvd'):
                                    log.debug('Checking year against dvd release date')
                                    release_year = time.strptime(movie_res['release_dates'].get('dvd'), '%Y-%m-%d').tm_year
                                if not (release_year and release_year == year):
                                    log.debug('removing %s - %s (wrong year: %s)' % (movie_res['title'],
                                        movie_res['id'], str(release_year or movie_res['year'])))
                                    results.remove(movie_res)
                                    continue
                            if movie_res['match'] < MIN_MATCH:
                                log.debug('removing %s (min_match)' % movie_res['title'])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            log.debug('SUCCESS: only one movie remains')
                        else:
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                log.debug('unable to determine correct movie, min_diff too small'
                                        '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' %
                                        (results[0]['title'], results[0]['year'], results[0]['id'],
                                            results[1]['title'], results[1]['year'], results[1]['id']))
                                for r in results:
                                    log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'],
                                        r['id']))
                                raise PluginError('min_diff')

                        imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb')
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                        else:
                            result = movies_info(results[0].get('id'))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                        try:
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and
                                    alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids):
                                log.warning('Adding flexget_imdb alternate id %s for movie %s' %
                                        (imdb_id, movie))
                                movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\
                                        imdb_id.lstrip('t')))
                            session.add(movie)
                        except IntegrityError:
                            log.warning('Found movie %s in database after search even though we '
                                'already looked, updating it with search result.' % movie)
                            session.rollback()
                            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result['id']).first()
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and
                                    alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids):
                                log.warning('Adding flexget_imdb alternate id %s for movie %s' %
                                        (imdb_id, movie))
                                movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\
                                        imdb_id.lstrip('t')))
                            session.merge(movie)

                        if title.lower() != movie.title.lower():
                            log.debug('Saving search result for \'%s\'' % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' % id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']:
            getattr(movie, attr)
        session.commit()
        return movie
예제 #8
0
파일: cli.py 프로젝트: ksurl/Flexget
def consolidate():
    """
    Converts previous archive data model to new one.
    """

    session = Session()
    try:
        logger.verbose('Checking archive size ...')
        count = session.query(
            flexget.components.archive.db.ArchiveEntry).count()
        logger.verbose(
            'Found {} items to migrate, this can be aborted with CTRL-C safely.',
            count)

        # consolidate old data
        # id's for duplicates
        duplicates = []

        for orig in track(
                session.query(
                    flexget.components.archive.db.ArchiveEntry).yield_per(5),
                total=count,
                description='Processing...',
        ):

            # item already processed
            if orig.id in duplicates:
                continue

            # item already migrated
            if orig.sources:
                logger.info(
                    'Database looks like it has already been consolidated, item {} has already sources ...',
                    orig.title,
                )
                session.rollback()
                return

            # add legacy task to the sources list
            orig.sources.append(
                flexget.components.archive.db.get_source(orig.task, session))
            # remove task, deprecated .. well, let's still keep it ..
            # orig.task = None

            for dupe in (session.query(
                    flexget.components.archive.db.ArchiveEntry).filter(
                        flexget.components.archive.db.ArchiveEntry.id !=
                        orig.id).filter(
                            flexget.components.archive.db.ArchiveEntry.title ==
                            orig.title).filter(
                                flexget.components.archive.db.ArchiveEntry.url
                                == orig.url).all()):
                orig.sources.append(
                    flexget.components.archive.db.get_source(
                        dupe.task, session))
                duplicates.append(dupe.id)

        if duplicates:
            logger.info('Consolidated {} items, removing duplicates ...',
                        len(duplicates))
            for id in duplicates:
                session.query(
                    flexget.components.archive.db.ArchiveEntry).filter(
                        flexget.components.archive.db.ArchiveEntry.id ==
                        id).delete()
        session.commit()
        logger.info('Completed! This does NOT need to be ran again.')
    except KeyboardInterrupt:
        session.rollback()
        logger.critical('Aborted, no changes saved')
    finally:
        session.close()
예제 #9
0
def lookup_movie(title=None,
                 year=None,
                 rottentomatoes_id=None,
                 imdb_id=None,
                 smart_match=None,
                 only_cached=False,
                 session=None):
    """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.

    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup

    """

    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title_parser.parse(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or imdb_id or title):
            raise PluginError('Failed to parse name from %s' % smart_match)

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str():
        return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (
            title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug('Looking up rotten tomatoes information for %s' % id_str())

    movie = None

    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).\
                filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = session.query(RottenTomatoesAlternateId).\
                filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\
                filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first()
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(
                RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(
            func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(
                RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            log.debug(
                'No matches in movie cache found, checking search cache.')
            found = session.query(RottenTomatoesSearchResult). \
                    filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                log.debug('Movie found in search cache.')
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug(
                'Cache has expired for %s, attempting to refresh from Rotten Tomatoes.'
                % id_str())
            try:
                imdb_alt_id = movie.alternate_ids and filter(
                    lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'],
                    movie.alternate_ids)[0].id
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, 'imdb')
                else:
                    result = movies_info(movie.id)
                movie = set_movie_details(movie, session, result)
                session.merge(movie)
            except URLError:
                log.error(
                    'Error refreshing movie details from Rotten Tomatoes, cached info being used.'
                )
        else:
            log.debug('Movie %s information restored from cache.' % id_str())
    else:
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug(
            'Movie %s not found in cache, looking up from rotten tomatoes.' %
            id_str())
        try:
            # Lookups using imdb_id
            if imdb_id:
                log.debug('Using IMDB alias %s.' % imdb_id)
                result = movies_alias(imdb_id, 'imdb')
                if result:
                    mismatch = []
                    if title and difflib.SequenceMatcher(
                            lambda x: x == ' ',
                            re.sub('\s+\(.*\)$', '', result['title'].lower()),
                            title.lower()).ratio() < MIN_MATCH:
                        mismatch.append('the title (%s <-?-> %s)' %
                                        (title, result['title']))
                    result['year'] = int(result['year'])
                    if year and fabs(result['year'] - year) > 1:
                        mismatch.append('the year (%s <-?-> %s)' %
                                        (year, result['year']))
                        release_year = None
                        if result.get('release_dates', {}).get('theater'):
                            log.debug(
                                'Checking year against theater release date')
                            release_year = time.strptime(
                                result['release_dates'].get('theater'),
                                '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the theater release (%s)' %
                                                release_year)
                        elif result.get('release_dates', {}).get('dvd'):
                            log.debug('Checking year against dvd release date')
                            release_year = time.strptime(
                                result['release_dates'].get('dvd'),
                                '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the DVD release (%s)' %
                                                release_year)
                    if mismatch:
                        log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' % \
                            (imdb_id, ', or '.join(mismatch)))
                    else:
                        log.debug(
                            'imdb_id %s maps to rt_id %s, checking db for info.'
                            % (imdb_id, result['id']))
                        movie = session.query(RottenTomatoesMovie).filter(
                            RottenTomatoesMovie.id == result.get(
                                'id')).first()
                        if movie:
                            log.debug(
                                'Movie %s was in database, but did not have the imdb_id stored, '
                                'forcing an update' % movie)
                            movie = set_movie_details(movie, session, result)
                            session.merge(movie)
                        else:
                            log.debug('%s was not in database, setting info.' %
                                      result['title'])
                            movie = RottenTomatoesMovie()
                            movie = set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError(
                                    'set_movie_details returned %s' % movie)
                            session.add(movie)
                else:
                    log.debug('IMDB alias %s returned no results.' % imdb_id)
            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = set_movie_details(movie, session, result)
                    session.add(movie)
            if not movie and title:
                log.verbose('Searching from rt `%s`' % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(
                                lambda x: x == ' ', movie_res['title'].lower(),
                                title.lower())
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year'):
                                movie_res['year'] = int(movie_res['year'])
                                if movie_res['year'] != year:
                                    release_year = False
                                    if movie_res.get('release_dates',
                                                     {}).get('theater'):
                                        log.debug(
                                            'Checking year against theater release date'
                                        )
                                        release_year = time.strptime(
                                            movie_res['release_dates'].get(
                                                'theater'), '%Y-%m-%d').tm_year
                                    elif movie_res.get('release_dates',
                                                       {}).get('dvd'):
                                        log.debug(
                                            'Checking year against dvd release date'
                                        )
                                        release_year = time.strptime(
                                            movie_res['release_dates'].get(
                                                'dvd'), '%Y-%m-%d').tm_year
                                    if not (release_year
                                            and release_year == year):
                                        log.debug(
                                            'removing %s - %s (wrong year: %s)'
                                            % (movie_res['title'],
                                               movie_res['id'],
                                               str(release_year
                                                   or movie_res['year'])))
                                        results.remove(movie_res)
                                        continue
                            if movie_res['match'] < MIN_MATCH:
                                log.debug('removing %s (min_match)' %
                                          movie_res['title'])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            log.debug('SUCCESS: only one movie remains')
                        else:
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                log.debug(
                                    'unable to determine correct movie, min_diff too small'
                                    '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' %
                                    (results[0]['title'], results[0]['year'],
                                     results[0]['id'], results[1]['title'],
                                     results[1]['year'], results[1]['id']))
                                for r in results:
                                    log.debug(
                                        'remain: %s (match: %s) %s' %
                                        (r['title'], r['match'], r['id']))
                                raise PluginError('min_diff')

                        imdb_alt_id = results[0].get('alternate_ids',
                                                     {}).get('imdb')
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                        else:
                            result = movies_info(results[0].get('id'))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                        try:
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(
                                    lambda alt_id: alt_id.name == 'imdb' and
                                    alt_id.id == imdb_id.lstrip('t'),
                                    movie.alternate_ids):
                                log.warning(
                                    'Adding flexget_imdb alternate id %s for movie %s'
                                    % (imdb_id, movie))
                                movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\
                                        imdb_id.lstrip('t')))
                            session.add(movie)
                            session.commit()
                        except IntegrityError:
                            log.warning(
                                'Found movie %s in database after search even though we '
                                'already looked, updating it with search result.'
                                % movie)
                            session.rollback()
                            movie = session.query(RottenTomatoesMovie).filter(
                                RottenTomatoesMovie.id ==
                                result['id']).first()
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(
                                    lambda alt_id: alt_id.name == 'imdb' and
                                    alt_id.id == imdb_id.lstrip('t'),
                                    movie.alternate_ids):
                                log.warning(
                                    'Adding flexget_imdb alternate id %s for movie %s'
                                    % (imdb_id, movie))
                                movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\
                                        imdb_id.lstrip('t')))
                            session.merge(movie)
                            session.commit()

                        if title.lower() != movie.title.lower():
                            log.debug('Saving search result for \'%s\'' %
                                      search_string)
                            session.add(
                                RottenTomatoesSearchResult(
                                    search=search_string, movie=movie))
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' %
                          id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in [
                'alternate_ids', 'cast', 'directors', 'genres', 'links',
                'posters', 'release_dates'
        ]:
            getattr(movie, attr)
        session.commit()
        return movie