def test_withsession(self): session = Session() persist = SimplePersistence('testplugin', session=session) persist['aoeu'] = 'test' assert persist['aoeu'] == 'test' # Make sure it didn't commit or close our session session.rollback() assert 'aoeu' not in persist
def consolidate(): """ Converts previous archive data model to new one. """ session = Session() try: log.verbose('Checking archive size ...') count = session.query(ArchiveEntry).count() log.verbose('Found %i items to migrate, this can be aborted with CTRL-C safely.' % count) # consolidate old data from progressbar import ProgressBar, Percentage, Bar, ETA widgets = ['Process - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')] bar = ProgressBar(widgets=widgets, maxval=count).start() # id's for duplicates duplicates = [] for index, orig in enumerate(session.query(ArchiveEntry).yield_per(5)): bar.update(index) # item already processed if orig.id in duplicates: continue # item already migrated if orig.sources: log.info('Database looks like it has already been consolidated, ' 'item %s has already sources ...' % orig.title) session.rollback() return # add legacy task to the sources list orig.sources.append(get_source(orig.task, session)) # remove task, deprecated .. well, let's still keep it .. #orig.task = None for dupe in session.query(ArchiveEntry).\ filter(ArchiveEntry.id != orig.id).\ filter(ArchiveEntry.title == orig.title).\ filter(ArchiveEntry.url == orig.url).all(): orig.sources.append(get_source(dupe.task, session)) duplicates.append(dupe.id) if duplicates: log.info('Consolidated %i items, removing duplicates ...' % len(duplicates)) for id in duplicates: session.query(ArchiveEntry).filter(ArchiveEntry.id == id).delete() session.commit() log.info('Completed! This does NOT need to be ran again.') except KeyboardInterrupt: session.rollback() log.critical('Aborted, no changes saved') finally: session.close()
def consolidate(): """ Converts previous archive data model to new one. """ session = Session() try: log.verbose('Checking archive size ...') count = session.query(ArchiveEntry).count() log.verbose('Found %i items to migrate, this can be aborted with CTRL-C safely.' % count) # consolidate old data from progressbar import ProgressBar, Percentage, Bar, ETA widgets = ['Process - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')] bar = ProgressBar(widgets=widgets, maxval=count).start() # id's for duplicates duplicates = [] for index, orig in enumerate(session.query(ArchiveEntry).yield_per(5)): bar.update(index) # item already processed if orig.id in duplicates: continue # item already migrated if orig.sources: log.info('Database looks like it has already been consolidated, ' 'item %s has already sources ...' % orig.title) session.rollback() return # add legacy task to the sources list orig.sources.append(get_source(orig.task, session)) # remove task, deprecated .. well, let's still keep it .. # orig.task = None for dupe in session.query(ArchiveEntry).\ filter(ArchiveEntry.id != orig.id).\ filter(ArchiveEntry.title == orig.title).\ filter(ArchiveEntry.url == orig.url).all(): orig.sources.append(get_source(dupe.task, session)) duplicates.append(dupe.id) if duplicates: log.info('Consolidated %i items, removing duplicates ...' % len(duplicates)) for id in duplicates: session.query(ArchiveEntry).filter(ArchiveEntry.id == id).delete() session.commit() log.info('Completed! This does NOT need to be ran again.') except KeyboardInterrupt: session.rollback() log.critical('Aborted, no changes saved') finally: session.close()
class Task(object): """ Represents one task in the configuration. **Fires events:** * task.execute.before_plugin Before a plugin is about to be executed. Note that since this will also include all builtin plugins the amount of calls can be quite high ``parameters: task, keyword`` * task.execute.after_plugin After a plugin has been executed. ``parameters: task, keyword`` * task.execute.completed After task execution has been completed ``parameters: task`` """ max_reruns = 5 def __init__(self, manager, name, config=None, options=None): """ :param Manager manager: Manager instance. :param string name: Name of the task. :param dict config: Task configuration. """ self.name = unicode(name) self.manager = manager # raw_config should remain the untouched input config if config is None: config = manager.config['tasks'].get(name, {}) self.config = copy.deepcopy(config) self.prepared_config = None if options is None: options = copy.copy(self.manager.options.execute) elif isinstance(options, dict): options_namespace = copy.copy(self.manager.options.execute) options_namespace.__dict__.update(options) options = options_namespace self.options = options # simple persistence self.simple_persistence = SimpleTaskPersistence(self) # not to be reset self._rerun_count = 0 self.config_modified = None # use reset to init variables when creating self._reset() @property def undecided(self): """ .. deprecated:: Use API v3 """ return self.all_entries.undecided @property def failed(self): """ .. deprecated:: Use API v3 """ return self.all_entries.failed @property def rejected(self): """ .. deprecated:: Use API v3 """ return self.all_entries.rejected @property def accepted(self): """ .. deprecated:: Use API v3 """ return self.all_entries.accepted @property def entries(self): """ .. deprecated:: Use API v3 """ return self.all_entries.entries @property def all_entries(self): """ .. deprecated:: Use API v3 """ return self._all_entries @property def is_rerun(self): return self._rerun_count # TODO: can we get rid of this now that Tasks are instantiated on demand? def _reset(self): """Reset task state""" log.debug('resetting %s' % self.name) self.enabled = not self.name.startswith('_') self.session = None self.priority = 65535 self.requests = requests.Session() # List of all entries in the task self._all_entries = EntryContainer() self.disabled_phases = [] # These are just to query what happened in task. Call task.abort to set. self.aborted = False self.abort_reason = None self.silent_abort = False self._rerun = False # current state self.current_phase = None self.current_plugin = None def __cmp__(self, other): return cmp(self.priority, other.priority) def __str__(self): return '<Task(name=%s,aborted=%s)>' % (self.name, self.aborted) def disable_phase(self, phase): """Disable ``phase`` from execution. All disabled phases are re-enabled by :meth:`Task._reset()` after task execution has been completed. :param string phase: Name of ``phase`` :raises ValueError: *phase* could not be found. """ if phase not in task_phases: raise ValueError('%s is not a valid phase' % phase) if phase not in self.disabled_phases: log.debug('Disabling %s phase' % phase) self.disabled_phases.append(phase) def abort(self, reason='Unknown', silent=False): """Abort this task execution, no more plugins will be executed except the abort handling ones.""" self.aborted = True self.abort_reason = reason self.silent_abort = silent if not self.silent_abort: log.warning('Aborting task (plugin: %s)' % self.current_plugin) else: log.debug('Aborting task (plugin: %s)' % self.current_plugin) raise TaskAbort(reason, silent=silent) def find_entry(self, category='entries', **values): """ Find and return :class:`~flexget.entry.Entry` with given attributes from task or None :param string category: entries, accepted, rejected or failed. Defaults to entries. :param values: Key values of entries to be searched :return: Entry or None """ cat = getattr(self, category) if not isinstance(cat, EntryIterator): raise TypeError('category must be a EntryIterator') for entry in cat: for k, v in values.iteritems(): if not (k in entry and entry[k] == v): break else: return entry return None def plugins(self, phase=None): """Get currently enabled plugins. :param string phase: Optional, limits to plugins currently configured on given phase, sorted in phase order. :return: An iterator over configured :class:`flexget.plugin.PluginInfo` instances enabled on this task. """ if phase: plugins = sorted(get_plugins(phase=phase), key=lambda p: p.phase_handlers[phase], reverse=True) else: plugins = all_plugins.itervalues() return (p for p in plugins if p.name in self.config or p.builtin) def __run_task_phase(self, phase): """Executes task phase, ie. call all enabled plugins on the task. Fires events: * task.execute.before_plugin * task.execute.after_plugin :param string phase: Name of the phase """ if phase not in phase_methods: raise Exception('%s is not a valid task phase' % phase) # warn if no inputs, filters or outputs in the task if phase in ['input', 'filter', 'output']: if not self.manager.unit_test: # Check that there is at least one manually configured plugin for these phases for p in self.plugins(phase): if not p.builtin: break else: log.warning( 'Task doesn\'t have any %s plugins, you should add (at least) one!' % phase) for plugin in self.plugins(phase): # Abort this phase if one of the plugins disables it if phase in self.disabled_phases: return # store execute info, except during entry events self.current_phase = phase self.current_plugin = plugin.name if plugin.api_ver == 1: # backwards compatibility # pass method only task (old behaviour) args = (self, ) else: # pass method task, copy of config (so plugin cannot modify it) args = (self, copy.copy(self.config.get(plugin.name))) try: fire_event('task.execute.before_plugin', self, plugin.name) response = self.__run_plugin(plugin, phase, args) if phase == 'input' and response: # add entries returned by input to self.all_entries for e in response: e.task = self self.all_entries.extend(response) finally: fire_event('task.execute.after_plugin', self, plugin.name) def __run_plugin(self, plugin, phase, args=None, kwargs=None): """ Execute given plugins phase method, with supplied args and kwargs. If plugin throws unexpected exceptions :meth:`abort` will be called. :param PluginInfo plugin: Plugin to be executed :param string phase: Name of the phase to be executed :param args: Passed to the plugin :param kwargs: Passed to the plugin """ keyword = plugin.name method = plugin.phase_handlers[phase] if args is None: args = [] if kwargs is None: kwargs = {} # log.trace('Running %s method %s' % (keyword, method)) # call the plugin try: return method(*args, **kwargs) except TaskAbort: raise except PluginWarning as warn: # check if this warning should be logged only once (may keep repeating) if warn.kwargs.get('log_once', False): from flexget.utils.log import log_once log_once(warn.value, warn.log) else: warn.log.warning(warn) except EntryUnicodeError as eue: msg = ( 'Plugin %s tried to create non-unicode compatible entry (key: %s, value: %r)' % (keyword, eue.key, eue.value)) log.critical(msg) self.abort(msg) except PluginError as err: err.log.critical(err.value) self.abort(err.value) except DependencyError as e: msg = ( 'Plugin `%s` cannot be used because dependency `%s` is missing.' % (keyword, e.missing)) log.critical(msg) log.debug(e.message) self.abort(msg) except Warning as e: # If warnings have been elevated to errors msg = 'Warning during plugin %s: %s' % (keyword, e) log.exception(msg) self.abort(msg) except Exception as e: msg = 'BUG: Unhandled error in plugin %s: %s' % (keyword, e) log.exception(msg) self.abort(msg) def rerun(self): """Immediately re-run the task after execute has completed, task can be re-run up to :attr:`.max_reruns` times.""" msg = 'Plugin %s has requested task to be ran again after execution has completed.' % self.current_plugin # Only print the first request for a rerun to the info log log.debug(msg) if self._rerun else log.info(msg) if self._rerun_count >= self.max_reruns: self._rerun = False log.info( 'Task has been re-run %s times already, stopping for now' % self._rerun_count) return self._rerun = True def config_changed(self): """ Sets config_modified flag to True for the remainder of this run. Used when the db changes, and all entries need to be reprocessed. """ self.config_modified = True @useTaskLogging def execute(self): """ Executes the the task. If :attr:`.enabled` is False task is not executed. Certain :attr:`.options` affect how execution is handled. - :attr:`.options.disable_phases` is a list of phases that are not enabled for this execution. - :attr:`.options.inject` is a list of :class:`Entry` instances used instead of running input phase. """ if not self.enabled: log.debug('Not running disabled task %s' % self.name) if self.options.cron: self.manager.db_cleanup() self._reset() log.debug('executing %s' % self.name) if not self.enabled: log.debug('task %s disabled during preparation, not running' % self.name) return # Handle keyword args if self.options.learn: log.info('Disabling download and output phases because of --learn') self.disable_phase('download') self.disable_phase('output') if self.options.disable_phases: map(self.disable_phase, self.options.disable_phases) if self.options.inject: # If entries are passed for this execution (eg. rerun), disable the input phase self.disable_phase('input') self.all_entries.extend(self.options.inject) log.debug('starting session') self.session = Session() # Save current config hash and set config_modidied flag config_hash = hashlib.md5(str(sorted(self.config.items()))).hexdigest() last_hash = self.session.query(TaskConfigHash).filter( TaskConfigHash.task == self.name).first() if self.is_rerun: # Restore the config to state right after start phase if self.prepared_config: self.config = copy.deepcopy(self.prepared_config) else: log.error('BUG: No prepared_config on rerun, please report.') self.config_modified = False elif not last_hash: self.config_modified = True last_hash = TaskConfigHash(task=self.name, hash=config_hash) self.session.add(last_hash) elif last_hash.hash != config_hash: self.config_modified = True last_hash.hash = config_hash else: self.config_modified = False # run phases try: for phase in task_phases: if phase in self.disabled_phases: # log keywords not executed for plugin in self.plugins(phase): if plugin.name in self.config: log.info( 'Plugin %s is not executed because %s phase is disabled (e.g. --test)' % (plugin.name, phase)) continue if phase == 'start' and self.is_rerun: log.debug('skipping task_start during rerun') elif phase == 'exit' and self._rerun: log.debug( 'not running task_exit yet because task will rerun') else: # run all plugins with this phase self.__run_task_phase(phase) if phase == 'start': # Store a copy of the config state after start phase to restore for reruns self.prepared_config = copy.deepcopy(self.config) except TaskAbort: # Roll back the session before calling abort handlers self.session.rollback() try: self.__run_task_phase('abort') # Commit just the abort handler changes if no exceptions are raised there self.session.commit() except TaskAbort as e: log.exception('abort handlers aborted: %s' % e) raise else: for entry in self.all_entries: entry.complete() log.debug('committing session') self.session.commit() fire_event('task.execute.completed', self) finally: # this will cause database rollback on exception self.session.close() # rerun task if self._rerun: log.info( 'Rerunning the task in case better resolution can be achieved.' ) self._rerun_count += 1 # TODO: Potential optimization is to take snapshots (maybe make the ones backlog uses built in instead of # taking another one) after input and just inject the same entries for the rerun self.execute() def __eq__(self, other): if hasattr(other, 'name'): return self.name == other.name return NotImplemented def __copy__(self): new = type(self)(self.manager, self.name, self.config, self.options) # Update all the variables of new instance to match our own new.__dict__.update(self.__dict__) # Some mutable objects need to be copies new.options = copy.copy(self.options) new.config = copy.deepcopy(self.config) return new copy = __copy__
def lookup_movie( title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None ): """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments. Any combination of criteria can be passed, the most specific criteria specified will be used. :param rottentomatoes_id: rottentomatoes_id of desired movie :param imdb_id: imdb_id of desired movie :param title: title of desired movie :param year: release year of desired movie :param smart_match: attempt to clean and parse title and year from a string :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache :param session: optionally specify a session to use, if specified, returned Movie will be live in that session :returns: The Movie object populated with data from Rotten Tomatoes :raises: PluginError if a match cannot be found or there are other problems with the lookup """ if smart_match: # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year title_parser = MovieParser() title_parser.parse(smart_match) title = title_parser.name year = title_parser.year if title == "" and not (rottentomatoes_id or imdb_id or title): raise PluginError("Failed to parse name from %s" % smart_match) if title: search_string = title.lower() if year: search_string = "%s %s" % (search_string, year) elif not (rottentomatoes_id or imdb_id): raise PluginError("No criteria specified for rotten tomatoes lookup") def id_str(): return "<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>" % (title, year, rottentomatoes_id, imdb_id) if not session: session = Session() log.debug("Looking up rotten tomatoes information for %s" % id_str()) movie = None if rottentomatoes_id: movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == rottentomatoes_id).first() if not movie and imdb_id: alt_id = ( session.query(RottenTomatoesAlternateId) .filter(RottenTomatoesAlternateId.name.in_(["imdb", "flexget_imdb"])) .filter(RottenTomatoesAlternateId.id == imdb_id.lstrip("t")) .first() ) if alt_id: movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first() if not movie and title: movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower()) if year: movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year) movie = movie_filter.first() if not movie: log.debug("No matches in movie cache found, checking search cache.") found = ( session.query(RottenTomatoesSearchResult) .filter(func.lower(RottenTomatoesSearchResult.search) == search_string) .first() ) if found and found.movie: log.debug("Movie found in search cache.") movie = found.movie if movie: # Movie found in cache, check if cache has expired. if movie.expired and not only_cached: log.debug("Cache has expired for %s, attempting to refresh from Rotten Tomatoes." % id_str()) try: imdb_alt_id = ( movie.alternate_ids and filter(lambda alt_id: alt_id.name in ["imdb", "flexget_imdb"], movie.alternate_ids)[0].id ) if imdb_alt_id: result = movies_alias(imdb_alt_id, "imdb") else: result = movies_info(movie.id) movie = set_movie_details(movie, session, result) session.merge(movie) except URLError: log.error("Error refreshing movie details from Rotten Tomatoes, cached info being used.") else: log.debug("Movie %s information restored from cache." % id_str()) else: if only_cached: raise PluginError("Movie %s not found from cache" % id_str()) # There was no movie found in the cache, do a lookup from Rotten Tomatoes log.debug("Movie %s not found in cache, looking up from rotten tomatoes." % id_str()) try: # Lookups using imdb_id if imdb_id: log.debug("Using IMDB alias %s." % imdb_id) result = movies_alias(imdb_id, "imdb") if result: mismatch = [] if ( title and difflib.SequenceMatcher( lambda x: x == " ", re.sub("\s+\(.*\)$", "", result["title"].lower()), title.lower() ).ratio() < MIN_MATCH ): mismatch.append("the title (%s <-?-> %s)" % (title, result["title"])) result["year"] = int(result["year"]) if year and fabs(result["year"] - year) > 1: mismatch.append("the year (%s <-?-> %s)" % (year, result["year"])) release_year = None if result.get("release_dates", {}).get("theater"): log.debug("Checking year against theater release date") release_year = time.strptime(result["release_dates"].get("theater"), "%Y-%m-%d").tm_year if fabs(release_year - year) > 1: mismatch.append("the theater release (%s)" % release_year) elif result.get("release_dates", {}).get("dvd"): log.debug("Checking year against dvd release date") release_year = time.strptime(result["release_dates"].get("dvd"), "%Y-%m-%d").tm_year if fabs(release_year - year) > 1: mismatch.append("the DVD release (%s)" % release_year) if mismatch: log.warning( "Rotten Tomatoes had an imdb alias for %s but it didn't match %s." % (imdb_id, ", or ".join(mismatch)) ) else: log.debug("imdb_id %s maps to rt_id %s, checking db for info." % (imdb_id, result["id"])) movie = ( session.query(RottenTomatoesMovie) .filter(RottenTomatoesMovie.id == result.get("id")) .first() ) if movie: log.debug( "Movie %s was in database, but did not have the imdb_id stored, " "forcing an update" % movie ) movie = set_movie_details(movie, session, result) session.merge(movie) else: log.debug("%s was not in database, setting info." % result["title"]) movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) if not movie: raise PluginError("set_movie_details returned %s" % movie) session.add(movie) else: log.debug("IMDB alias %s returned no results." % imdb_id) if not movie and rottentomatoes_id: result = movies_info(rottentomatoes_id) if result: movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) session.add(movie) if not movie and title: log.verbose("Searching from rt `%s`" % search_string) results = movies_search(search_string) if results: results = results.get("movies") if results: for movie_res in results: seq = difflib.SequenceMatcher(lambda x: x == " ", movie_res["title"].lower(), title.lower()) movie_res["match"] = seq.ratio() results.sort(key=lambda x: x["match"], reverse=True) # Remove all movies below MIN_MATCH, and different year for movie_res in results[:]: if year and movie_res.get("year"): movie_res["year"] = int(movie_res["year"]) if movie_res["year"] != year: release_year = False if movie_res.get("release_dates", {}).get("theater"): log.debug("Checking year against theater release date") release_year = time.strptime( movie_res["release_dates"].get("theater"), "%Y-%m-%d" ).tm_year elif movie_res.get("release_dates", {}).get("dvd"): log.debug("Checking year against dvd release date") release_year = time.strptime( movie_res["release_dates"].get("dvd"), "%Y-%m-%d" ).tm_year if not (release_year and release_year == year): log.debug( "removing %s - %s (wrong year: %s)" % ( movie_res["title"], movie_res["id"], str(release_year or movie_res["year"]), ) ) results.remove(movie_res) continue if movie_res["match"] < MIN_MATCH: log.debug("removing %s (min_match)" % movie_res["title"]) results.remove(movie_res) continue if not results: raise PluginError("no appropiate results") if len(results) == 1: log.debug("SUCCESS: only one movie remains") else: # Check min difference between best two hits diff = results[0]["match"] - results[1]["match"] if diff < MIN_DIFF: log.debug( "unable to determine correct movie, min_diff too small" "(`%s (%d) - %s` <-?-> `%s (%d) - %s`)" % ( results[0]["title"], results[0]["year"], results[0]["id"], results[1]["title"], results[1]["year"], results[1]["id"], ) ) for r in results: log.debug("remain: %s (match: %s) %s" % (r["title"], r["match"], r["id"])) raise PluginError("min_diff") imdb_alt_id = results[0].get("alternate_ids", {}).get("imdb") if imdb_alt_id: result = movies_alias(imdb_alt_id) else: result = movies_info(results[0].get("id")) if not result: result = results[0] movie = RottenTomatoesMovie() try: movie = set_movie_details(movie, session, result) if imdb_id and not filter( lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"), movie.alternate_ids, ): log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie)) movie.alternate_ids.append( RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t")) ) session.add(movie) session.commit() except IntegrityError: log.warning( "Found movie %s in database after search even though we " "already looked, updating it with search result." % movie ) session.rollback() movie = ( session.query(RottenTomatoesMovie) .filter(RottenTomatoesMovie.id == result["id"]) .first() ) movie = set_movie_details(movie, session, result) if imdb_id and not filter( lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"), movie.alternate_ids, ): log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie)) movie.alternate_ids.append( RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t")) ) session.merge(movie) session.commit() if title.lower() != movie.title.lower(): log.debug("Saving search result for '%s'" % search_string) session.add(RottenTomatoesSearchResult(search=search_string, movie=movie)) except URLError: raise PluginError("Error looking up movie from RottenTomatoes") if not movie: raise PluginError("No results found from rotten tomatoes for %s" % id_str()) else: # Access attributes to force the relationships to eager load before we detach from session for attr in ["alternate_ids", "cast", "directors", "genres", "links", "posters", "release_dates"]: getattr(movie, attr) session.commit() return movie
class Task(object): """ Represents one task in the configuration. **Fires events:** * task.execute.before_plugin Before a plugin is about to be executed. Note that since this will also include all builtin plugins the amount of calls can be quite high ``parameters: task, keyword`` * task.execute.after_plugin After a plugin has been executed. ``parameters: task, keyword`` * task.execute.completed After task execution has been completed ``parameters: task`` """ max_reruns = 5 def __init__(self, manager, name, config=None, options=None): """ :param Manager manager: Manager instance. :param string name: Name of the task. :param dict config: Task configuration. """ self.name = unicode(name) self.manager = manager # raw_config should remain the untouched input config if config is None: config = manager.config['tasks'].get(name, {}) self.config = copy.deepcopy(config) self.prepared_config = None if options is None: options = copy.copy(self.manager.options.execute) elif isinstance(options, dict): options_namespace = copy.copy(self.manager.options.execute) options_namespace.__dict__.update(options) options = options_namespace self.options = options # simple persistence self.simple_persistence = SimpleTaskPersistence(self) # not to be reset self._rerun_count = 0 self.config_modified = None # use reset to init variables when creating self._reset() @property def undecided(self): """ .. deprecated:: Use API v3 """ return self.all_entries.undecided @property def failed(self): """ .. deprecated:: Use API v3 """ return self.all_entries.failed @property def rejected(self): """ .. deprecated:: Use API v3 """ return self.all_entries.rejected @property def accepted(self): """ .. deprecated:: Use API v3 """ return self.all_entries.accepted @property def entries(self): """ .. deprecated:: Use API v3 """ return self.all_entries.entries @property def all_entries(self): """ .. deprecated:: Use API v3 """ return self._all_entries @property def is_rerun(self): return self._rerun_count # TODO: can we get rid of this now that Tasks are instantiated on demand? def _reset(self): """Reset task state""" log.debug('resetting %s' % self.name) self.enabled = not self.name.startswith('_') self.session = None self.priority = 65535 self.requests = requests.Session() # List of all entries in the task self._all_entries = EntryContainer() self.disabled_phases = [] # These are just to query what happened in task. Call task.abort to set. self.aborted = False self.abort_reason = None self.silent_abort = False self._rerun = False # current state self.current_phase = None self.current_plugin = None def __cmp__(self, other): return cmp(self.priority, other.priority) def __str__(self): return '<Task(name=%s,aborted=%s)>' % (self.name, self.aborted) def disable_phase(self, phase): """Disable ``phase`` from execution. All disabled phases are re-enabled by :meth:`Task._reset()` after task execution has been completed. :param string phase: Name of ``phase`` :raises ValueError: *phase* could not be found. """ if phase not in task_phases: raise ValueError('%s is not a valid phase' % phase) if phase not in self.disabled_phases: log.debug('Disabling %s phase' % phase) self.disabled_phases.append(phase) def abort(self, reason='Unknown', silent=False): """Abort this task execution, no more plugins will be executed except the abort handling ones.""" self.aborted = True self.abort_reason = reason self.silent_abort = silent if not self.silent_abort: log.warning('Aborting task (plugin: %s)' % self.current_plugin) else: log.debug('Aborting task (plugin: %s)' % self.current_plugin) raise TaskAbort(reason, silent=silent) def find_entry(self, category='entries', **values): """ Find and return :class:`~flexget.entry.Entry` with given attributes from task or None :param string category: entries, accepted, rejected or failed. Defaults to entries. :param values: Key values of entries to be searched :return: Entry or None """ cat = getattr(self, category) if not isinstance(cat, EntryIterator): raise TypeError('category must be a EntryIterator') for entry in cat: for k, v in values.iteritems(): if not (k in entry and entry[k] == v): break else: return entry return None def plugins(self, phase=None): """Get currently enabled plugins. :param string phase: Optional, limits to plugins currently configured on given phase, sorted in phase order. :return: An iterator over configured :class:`flexget.plugin.PluginInfo` instances enabled on this task. """ if phase: plugins = sorted(get_plugins(phase=phase), key=lambda p: p.phase_handlers[phase], reverse=True) else: plugins = all_plugins.itervalues() return (p for p in plugins if p.name in self.config or p.builtin) def __run_task_phase(self, phase): """Executes task phase, ie. call all enabled plugins on the task. Fires events: * task.execute.before_plugin * task.execute.after_plugin :param string phase: Name of the phase """ if phase not in phase_methods: raise Exception('%s is not a valid task phase' % phase) # warn if no inputs, filters or outputs in the task if phase in ['input', 'filter', 'output']: if not self.manager.unit_test: # Check that there is at least one manually configured plugin for these phases for p in self.plugins(phase): if not p.builtin: break else: log.warning('Task doesn\'t have any %s plugins, you should add (at least) one!' % phase) for plugin in self.plugins(phase): # Abort this phase if one of the plugins disables it if phase in self.disabled_phases: return # store execute info, except during entry events self.current_phase = phase self.current_plugin = plugin.name if plugin.api_ver == 1: # backwards compatibility # pass method only task (old behaviour) args = (self,) else: # pass method task, copy of config (so plugin cannot modify it) args = (self, copy.copy(self.config.get(plugin.name))) try: fire_event('task.execute.before_plugin', self, plugin.name) response = self.__run_plugin(plugin, phase, args) if phase == 'input' and response: # add entries returned by input to self.all_entries for e in response: e.task = self self.all_entries.extend(response) finally: fire_event('task.execute.after_plugin', self, plugin.name) def __run_plugin(self, plugin, phase, args=None, kwargs=None): """ Execute given plugins phase method, with supplied args and kwargs. If plugin throws unexpected exceptions :meth:`abort` will be called. :param PluginInfo plugin: Plugin to be executed :param string phase: Name of the phase to be executed :param args: Passed to the plugin :param kwargs: Passed to the plugin """ keyword = plugin.name method = plugin.phase_handlers[phase] if args is None: args = [] if kwargs is None: kwargs = {} # log.trace('Running %s method %s' % (keyword, method)) # call the plugin try: return method(*args, **kwargs) except TaskAbort: raise except PluginWarning as warn: # check if this warning should be logged only once (may keep repeating) if warn.kwargs.get('log_once', False): from flexget.utils.log import log_once log_once(warn.value, warn.log) else: warn.log.warning(warn) except EntryUnicodeError as eue: msg = ('Plugin %s tried to create non-unicode compatible entry (key: %s, value: %r)' % (keyword, eue.key, eue.value)) log.critical(msg) self.abort(msg) except PluginError as err: err.log.critical(err.value) self.abort(err.value) except DependencyError as e: msg = ('Plugin `%s` cannot be used because dependency `%s` is missing.' % (keyword, e.missing)) log.critical(msg) log.debug(e.message) self.abort(msg) except Warning as e: # If warnings have been elevated to errors msg = 'Warning during plugin %s: %s' % (keyword, e) log.exception(msg) self.abort(msg) except Exception as e: msg = 'BUG: Unhandled error in plugin %s: %s' % (keyword, e) log.exception(msg) self.abort(msg) def rerun(self): """Immediately re-run the task after execute has completed, task can be re-run up to :attr:`.max_reruns` times.""" msg = 'Plugin %s has requested task to be ran again after execution has completed.' % self.current_plugin # Only print the first request for a rerun to the info log log.debug(msg) if self._rerun else log.info(msg) if self._rerun_count >= self.max_reruns: self._rerun = False log.info('Task has been re-run %s times already, stopping for now' % self._rerun_count) return self._rerun = True def config_changed(self): """ Sets config_modified flag to True for the remainder of this run. Used when the db changes, and all entries need to be reprocessed. """ self.config_modified = True @useTaskLogging def execute(self): """ Executes the the task. If :attr:`.enabled` is False task is not executed. Certain :attr:`.options` affect how execution is handled. - :attr:`.options.disable_phases` is a list of phases that are not enabled for this execution. - :attr:`.options.inject` is a list of :class:`Entry` instances used instead of running input phase. """ if not self.enabled: log.debug('Not running disabled task %s' % self.name) if self.options.cron: self.manager.db_cleanup() self._reset() log.debug('executing %s' % self.name) if not self.enabled: log.debug('task %s disabled during preparation, not running' % self.name) return # Handle keyword args if self.options.learn: log.info('Disabling download and output phases because of --learn') self.disable_phase('download') self.disable_phase('output') if self.options.disable_phases: map(self.disable_phase, self.options.disable_phases) if self.options.inject: # If entries are passed for this execution (eg. rerun), disable the input phase self.disable_phase('input') self.all_entries.extend(self.options.inject) log.debug('starting session') self.session = Session() # Save current config hash and set config_modidied flag config_hash = hashlib.md5(str(sorted(self.config.items()))).hexdigest() last_hash = self.session.query(TaskConfigHash).filter(TaskConfigHash.task == self.name).first() if self.is_rerun: # Restore the config to state right after start phase if self.prepared_config: self.config = copy.deepcopy(self.prepared_config) else: log.error('BUG: No prepared_config on rerun, please report.') self.config_modified = False elif not last_hash: self.config_modified = True last_hash = TaskConfigHash(task=self.name, hash=config_hash) self.session.add(last_hash) elif last_hash.hash != config_hash: self.config_modified = True last_hash.hash = config_hash else: self.config_modified = False # run phases try: for phase in task_phases: if phase in self.disabled_phases: # log keywords not executed for plugin in self.plugins(phase): if plugin.name in self.config: log.info('Plugin %s is not executed because %s phase is disabled (e.g. --test)' % (plugin.name, phase)) continue if phase == 'start' and self.is_rerun: log.debug('skipping task_start during rerun') elif phase == 'exit' and self._rerun: log.debug('not running task_exit yet because task will rerun') else: # run all plugins with this phase self.__run_task_phase(phase) if phase == 'start': # Store a copy of the config state after start phase to restore for reruns self.prepared_config = copy.deepcopy(self.config) except TaskAbort: # Roll back the session before calling abort handlers self.session.rollback() try: self.__run_task_phase('abort') # Commit just the abort handler changes if no exceptions are raised there self.session.commit() except TaskAbort as e: log.exception('abort handlers aborted: %s' % e) raise else: for entry in self.all_entries: entry.complete() log.debug('committing session') self.session.commit() fire_event('task.execute.completed', self) finally: # this will cause database rollback on exception self.session.close() # rerun task if self._rerun: log.info('Rerunning the task in case better resolution can be achieved.') self._rerun_count += 1 # TODO: Potential optimization is to take snapshots (maybe make the ones backlog uses built in instead of # taking another one) after input and just inject the same entries for the rerun self.execute() def __eq__(self, other): if hasattr(other, 'name'): return self.name == other.name return NotImplemented def __copy__(self): new = type(self)(self.manager, self.name, self.config, self.options) # Update all the variables of new instance to match our own new.__dict__.update(self.__dict__) # Some mutable objects need to be copies new.options = copy.copy(self.options) new.config = copy.deepcopy(self.config) return new copy = __copy__
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None): """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments. Any combination of criteria can be passed, the most specific criteria specified will be used. :param rottentomatoes_id: rottentomatoes_id of desired movie :param imdb_id: imdb_id of desired movie :param title: title of desired movie :param year: release year of desired movie :param smart_match: attempt to clean and parse title and year from a string :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache :param session: optionally specify a session to use, if specified, returned Movie will be live in that session :returns: The Movie object populated with data from Rotten Tomatoes :raises: PluginError if a match cannot be found or there are other problems with the lookup """ if smart_match: # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year title_parser = MovieParser() title_parser.parse(smart_match) title = title_parser.name year = title_parser.year if title == '' and not (rottentomatoes_id or imdb_id or title): raise PluginError('Failed to parse name from %s' % raw_name) if title: search_string = title.lower() if year: search_string = '%s %s' % (search_string, year) elif not (rottentomatoes_id or imdb_id): raise PluginError('No criteria specified for rotten tomatoes lookup') def id_str(): return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id) if not session: session = Session() log.debug('Looking up rotten tomatoes information for %s' % id_str()) movie = None if rottentomatoes_id: movie = session.query(RottenTomatoesMovie).\ filter(RottenTomatoesMovie.id == rottentomatoes_id).first() if not movie and imdb_id: alt_id = session.query(RottenTomatoesAlternateId).\ filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\ filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first() if alt_id: movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first() if not movie and title: movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower()) if year: movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year) movie = movie_filter.first() if not movie: found = session.query(RottenTomatoesSearchResult). \ filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first() if found and found.movie: movie = found.movie if movie: # Movie found in cache, check if cache has expired. if movie.expired and not only_cached: log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str()) try: imdb_alt_id = movie.alternate_ids and filter(lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id if imdb_alt_id: result = movies_alias(imdb_alt_id, 'imdb') else: result = movies_info(movie.id) movie = set_movie_details(movie, session, result) session.merge(movie) except URLError: log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.') else: log.debug('Movie %s information restored from cache.' % id_str()) else: if only_cached: raise PluginError('Movie %s not found from cache' % id_str()) # There was no movie found in the cache, do a lookup from Rotten Tomatoes log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str()) try: # Lookups using imdb_id if imdb_id: log.debug('Using IMDB alias %s.' % imdb_id) result = movies_alias(imdb_id, 'imdb') if result: mismatch = [] if title and difflib.SequenceMatcher(lambda x: x == ' ', re.sub('\s+\(.*\)$', '', result['title'].lower()), title.lower()).ratio() < MIN_MATCH: mismatch.append('the title (%s <-?-> %s)' % (title, result['title'])) if year and fabs(result['year'] - year) > 1: mismatch.append('the year (%s <-?-> %s)' % (year, result['year'])) release_year = None if result.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the theater release (%s)' % release_year) elif result.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the DVD release (%s)' % release_year) if mismatch: log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' % \ (imdb_id, ', or '.join(mismatch))) else: log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id'])) movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result.get('id')).first() if movie: log.debug('Movie %s was in database, but did not have the imdb_id stored, ' 'forcing an update' % movie) movie = set_movie_details(movie, session, result) session.merge(movie) else: log.debug('%s was not in database, setting info.' % result['title']) movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) if not movie: raise PluginError('set_movie_details returned %s' % movie) session.add(movie) else: log.debug('IMDB alias %s returned no results.' % imdb_id) if not movie and rottentomatoes_id: result = movies_info(rottentomatoes_id) if result: movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) session.add(movie) if not movie and title: log.verbose('Searching from rt `%s`' % search_string) results = movies_search(search_string) if results: results = results.get('movies') if results: for movie_res in results: seq = difflib.SequenceMatcher(lambda x: x == ' ', movie_res['title'].lower(), title.lower()) movie_res['match'] = seq.ratio() results.sort(key=lambda x: x['match'], reverse=True) # Remove all movies below MIN_MATCH, and different year for movie_res in results[:]: if year and movie_res.get('year') and movie_res['year'] != year: release_year = False if movie_res.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(movie_res['release_dates'].get('theater'), '%Y-%m-%d').tm_year elif movie_res.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(movie_res['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if not (release_year and release_year == year): log.debug('removing %s - %s (wrong year: %s)' % (movie_res['title'], movie_res['id'], str(release_year or movie_res['year']))) results.remove(movie_res) continue if movie_res['match'] < MIN_MATCH: log.debug('removing %s (min_match)' % movie_res['title']) results.remove(movie_res) continue if not results: raise PluginError('no appropiate results') if len(results) == 1: log.debug('SUCCESS: only one movie remains') else: # Check min difference between best two hits diff = results[0]['match'] - results[1]['match'] if diff < MIN_DIFF: log.debug('unable to determine correct movie, min_diff too small' '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' % (results[0]['title'], results[0]['year'], results[0]['id'], results[1]['title'], results[1]['year'], results[1]['id'])) for r in results: log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id'])) raise PluginError('min_diff') imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb') if imdb_alt_id: result = movies_alias(imdb_alt_id) else: result = movies_info(results[0].get('id')) if not result: result = results[0] movie = RottenTomatoesMovie() try: movie = set_movie_details(movie, session, result) if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids): log.warning('Adding flexget_imdb alternate id %s for movie %s' % (imdb_id, movie)) movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\ imdb_id.lstrip('t'))) session.add(movie) except IntegrityError: log.warning('Found movie %s in database after search even though we ' 'already looked, updating it with search result.' % movie) session.rollback() movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result['id']).first() movie = set_movie_details(movie, session, result) if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids): log.warning('Adding flexget_imdb alternate id %s for movie %s' % (imdb_id, movie)) movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\ imdb_id.lstrip('t'))) session.merge(movie) if title.lower() != movie.title.lower(): log.debug('Saving search result for \'%s\'' % search_string) session.add(RottenTomatoesSearchResult(search=search_string, movie=movie)) except URLError: raise PluginError('Error looking up movie from RottenTomatoes') if not movie: raise PluginError('No results found from rotten tomatoes for %s' % id_str()) else: # Access attributes to force the relationships to eager load before we detach from session for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']: getattr(movie, attr) session.commit() return movie
def consolidate(): """ Converts previous archive data model to new one. """ session = Session() try: logger.verbose('Checking archive size ...') count = session.query( flexget.components.archive.db.ArchiveEntry).count() logger.verbose( 'Found {} items to migrate, this can be aborted with CTRL-C safely.', count) # consolidate old data # id's for duplicates duplicates = [] for orig in track( session.query( flexget.components.archive.db.ArchiveEntry).yield_per(5), total=count, description='Processing...', ): # item already processed if orig.id in duplicates: continue # item already migrated if orig.sources: logger.info( 'Database looks like it has already been consolidated, item {} has already sources ...', orig.title, ) session.rollback() return # add legacy task to the sources list orig.sources.append( flexget.components.archive.db.get_source(orig.task, session)) # remove task, deprecated .. well, let's still keep it .. # orig.task = None for dupe in (session.query( flexget.components.archive.db.ArchiveEntry).filter( flexget.components.archive.db.ArchiveEntry.id != orig.id).filter( flexget.components.archive.db.ArchiveEntry.title == orig.title).filter( flexget.components.archive.db.ArchiveEntry.url == orig.url).all()): orig.sources.append( flexget.components.archive.db.get_source( dupe.task, session)) duplicates.append(dupe.id) if duplicates: logger.info('Consolidated {} items, removing duplicates ...', len(duplicates)) for id in duplicates: session.query( flexget.components.archive.db.ArchiveEntry).filter( flexget.components.archive.db.ArchiveEntry.id == id).delete() session.commit() logger.info('Completed! This does NOT need to be ran again.') except KeyboardInterrupt: session.rollback() logger.critical('Aborted, no changes saved') finally: session.close()
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None): """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments. Any combination of criteria can be passed, the most specific criteria specified will be used. :param rottentomatoes_id: rottentomatoes_id of desired movie :param imdb_id: imdb_id of desired movie :param title: title of desired movie :param year: release year of desired movie :param smart_match: attempt to clean and parse title and year from a string :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache :param session: optionally specify a session to use, if specified, returned Movie will be live in that session :returns: The Movie object populated with data from Rotten Tomatoes :raises: PluginError if a match cannot be found or there are other problems with the lookup """ if smart_match: # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year title_parser = MovieParser() title_parser.parse(smart_match) title = title_parser.name year = title_parser.year if title == '' and not (rottentomatoes_id or imdb_id or title): raise PluginError('Failed to parse name from %s' % smart_match) if title: search_string = title.lower() if year: search_string = '%s %s' % (search_string, year) elif not (rottentomatoes_id or imdb_id): raise PluginError('No criteria specified for rotten tomatoes lookup') def id_str(): return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % ( title, year, rottentomatoes_id, imdb_id) if not session: session = Session() log.debug('Looking up rotten tomatoes information for %s' % id_str()) movie = None if rottentomatoes_id: movie = session.query(RottenTomatoesMovie).\ filter(RottenTomatoesMovie.id == rottentomatoes_id).first() if not movie and imdb_id: alt_id = session.query(RottenTomatoesAlternateId).\ filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\ filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first() if alt_id: movie = session.query(RottenTomatoesMovie).filter( RottenTomatoesMovie.id == alt_id.movie_id).first() if not movie and title: movie_filter = session.query(RottenTomatoesMovie).filter( func.lower(RottenTomatoesMovie.title) == title.lower()) if year: movie_filter = movie_filter.filter( RottenTomatoesMovie.year == year) movie = movie_filter.first() if not movie: log.debug( 'No matches in movie cache found, checking search cache.') found = session.query(RottenTomatoesSearchResult). \ filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first() if found and found.movie: log.debug('Movie found in search cache.') movie = found.movie if movie: # Movie found in cache, check if cache has expired. if movie.expired and not only_cached: log.debug( 'Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str()) try: imdb_alt_id = movie.alternate_ids and filter( lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id if imdb_alt_id: result = movies_alias(imdb_alt_id, 'imdb') else: result = movies_info(movie.id) movie = set_movie_details(movie, session, result) session.merge(movie) except URLError: log.error( 'Error refreshing movie details from Rotten Tomatoes, cached info being used.' ) else: log.debug('Movie %s information restored from cache.' % id_str()) else: if only_cached: raise PluginError('Movie %s not found from cache' % id_str()) # There was no movie found in the cache, do a lookup from Rotten Tomatoes log.debug( 'Movie %s not found in cache, looking up from rotten tomatoes.' % id_str()) try: # Lookups using imdb_id if imdb_id: log.debug('Using IMDB alias %s.' % imdb_id) result = movies_alias(imdb_id, 'imdb') if result: mismatch = [] if title and difflib.SequenceMatcher( lambda x: x == ' ', re.sub('\s+\(.*\)$', '', result['title'].lower()), title.lower()).ratio() < MIN_MATCH: mismatch.append('the title (%s <-?-> %s)' % (title, result['title'])) result['year'] = int(result['year']) if year and fabs(result['year'] - year) > 1: mismatch.append('the year (%s <-?-> %s)' % (year, result['year'])) release_year = None if result.get('release_dates', {}).get('theater'): log.debug( 'Checking year against theater release date') release_year = time.strptime( result['release_dates'].get('theater'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the theater release (%s)' % release_year) elif result.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime( result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the DVD release (%s)' % release_year) if mismatch: log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' % \ (imdb_id, ', or '.join(mismatch))) else: log.debug( 'imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id'])) movie = session.query(RottenTomatoesMovie).filter( RottenTomatoesMovie.id == result.get( 'id')).first() if movie: log.debug( 'Movie %s was in database, but did not have the imdb_id stored, ' 'forcing an update' % movie) movie = set_movie_details(movie, session, result) session.merge(movie) else: log.debug('%s was not in database, setting info.' % result['title']) movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) if not movie: raise PluginError( 'set_movie_details returned %s' % movie) session.add(movie) else: log.debug('IMDB alias %s returned no results.' % imdb_id) if not movie and rottentomatoes_id: result = movies_info(rottentomatoes_id) if result: movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) session.add(movie) if not movie and title: log.verbose('Searching from rt `%s`' % search_string) results = movies_search(search_string) if results: results = results.get('movies') if results: for movie_res in results: seq = difflib.SequenceMatcher( lambda x: x == ' ', movie_res['title'].lower(), title.lower()) movie_res['match'] = seq.ratio() results.sort(key=lambda x: x['match'], reverse=True) # Remove all movies below MIN_MATCH, and different year for movie_res in results[:]: if year and movie_res.get('year'): movie_res['year'] = int(movie_res['year']) if movie_res['year'] != year: release_year = False if movie_res.get('release_dates', {}).get('theater'): log.debug( 'Checking year against theater release date' ) release_year = time.strptime( movie_res['release_dates'].get( 'theater'), '%Y-%m-%d').tm_year elif movie_res.get('release_dates', {}).get('dvd'): log.debug( 'Checking year against dvd release date' ) release_year = time.strptime( movie_res['release_dates'].get( 'dvd'), '%Y-%m-%d').tm_year if not (release_year and release_year == year): log.debug( 'removing %s - %s (wrong year: %s)' % (movie_res['title'], movie_res['id'], str(release_year or movie_res['year']))) results.remove(movie_res) continue if movie_res['match'] < MIN_MATCH: log.debug('removing %s (min_match)' % movie_res['title']) results.remove(movie_res) continue if not results: raise PluginError('no appropiate results') if len(results) == 1: log.debug('SUCCESS: only one movie remains') else: # Check min difference between best two hits diff = results[0]['match'] - results[1]['match'] if diff < MIN_DIFF: log.debug( 'unable to determine correct movie, min_diff too small' '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' % (results[0]['title'], results[0]['year'], results[0]['id'], results[1]['title'], results[1]['year'], results[1]['id'])) for r in results: log.debug( 'remain: %s (match: %s) %s' % (r['title'], r['match'], r['id'])) raise PluginError('min_diff') imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb') if imdb_alt_id: result = movies_alias(imdb_alt_id) else: result = movies_info(results[0].get('id')) if not result: result = results[0] movie = RottenTomatoesMovie() try: movie = set_movie_details(movie, session, result) if imdb_id and not filter( lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids): log.warning( 'Adding flexget_imdb alternate id %s for movie %s' % (imdb_id, movie)) movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\ imdb_id.lstrip('t'))) session.add(movie) session.commit() except IntegrityError: log.warning( 'Found movie %s in database after search even though we ' 'already looked, updating it with search result.' % movie) session.rollback() movie = session.query(RottenTomatoesMovie).filter( RottenTomatoesMovie.id == result['id']).first() movie = set_movie_details(movie, session, result) if imdb_id and not filter( lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids): log.warning( 'Adding flexget_imdb alternate id %s for movie %s' % (imdb_id, movie)) movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\ imdb_id.lstrip('t'))) session.merge(movie) session.commit() if title.lower() != movie.title.lower(): log.debug('Saving search result for \'%s\'' % search_string) session.add( RottenTomatoesSearchResult( search=search_string, movie=movie)) except URLError: raise PluginError('Error looking up movie from RottenTomatoes') if not movie: raise PluginError('No results found from rotten tomatoes for %s' % id_str()) else: # Access attributes to force the relationships to eager load before we detach from session for attr in [ 'alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates' ]: getattr(movie, attr) session.commit() return movie