def log_once(message, logger=logging.getLogger('log_once'), once_level=logging.INFO, suppressed_level=f_logger.VERBOSE): """ Log message only once using given logger`. Returns False if suppressed logging. When suppressed, `suppressed_level` level is still logged. """ # If there is no active manager, don't access the db from flexget.manager import manager if not manager: log.warning('DB not initialized. log_once will not work properly.') logger.log(once_level, message) return digest = hashlib.md5() digest.update(message.encode('latin1', 'replace')) # ticket:250 md5sum = digest.hexdigest() session = Session() try: # abort if this has already been logged if session.query(LogMessage).filter_by(md5sum=md5sum).first(): logger.log(suppressed_level, message) return False row = LogMessage(md5sum) session.add(row) session.commit() finally: session.close() logger.log(once_level, message) return True
def add_backlog(self, task, entry, amount=''): """Add single entry to task backlog If :amount: is not specified, entry will only be injected on next execution.""" snapshot = entry.snapshots.get('after_input') if not snapshot: if task.current_phase != 'input': # Not having a snapshot is normal during input phase, don't display a warning log.warning( 'No input snapshot available for `%s`, using current state' % entry['title']) snapshot = entry session = Session() expire_time = datetime.now() + parse_timedelta(amount) backlog_entry = session.query(BacklogEntry).filter(BacklogEntry.title == entry['title']).\ filter(BacklogEntry.task == task.name).first() if backlog_entry: # If there is already a backlog entry for this, update the expiry time if necessary. if backlog_entry.expire < expire_time: log.debug('Updating expiry time for %s' % entry['title']) backlog_entry.expire = expire_time else: log.debug('Saving %s' % entry['title']) backlog_entry = BacklogEntry() backlog_entry.title = entry['title'] backlog_entry.entry = snapshot backlog_entry.task = task.name backlog_entry.expire = expire_time session.add(backlog_entry) session.commit()
def log_once(message, logger=logging.getLogger('log_once')): """ Log message only once using given logger. Returns False if suppressed logging. When suppressed verbose level is still logged. """ digest = hashlib.md5() digest.update(message.encode('latin1', 'replace')) # ticket:250 md5sum = digest.hexdigest() session = Session() try: # abort if this has already been logged if session.query(LogMessage).filter_by(md5sum=md5sum).first(): logger.verbose(message) return False row = LogMessage(md5sum) session.add(row) session.commit() finally: session.close() logger.info(message) return True
def add_backlog(self, task, entry, amount=''): """Add single entry to task backlog If :amount: is not specified, entry will only be injected on next execution.""" snapshot = entry.snapshots.get('after_input') if not snapshot: if task.current_phase != 'input': # Not having a snapshot is normal during input phase, don't display a warning log.warning('No input snapshot available for `%s`, using current state' % entry['title']) snapshot = entry session = Session() expire_time = datetime.now() + parse_timedelta(amount) backlog_entry = session.query(BacklogEntry).filter(BacklogEntry.title == entry['title']).\ filter(BacklogEntry.task == task.name).first() if backlog_entry: # If there is already a backlog entry for this, update the expiry time if necessary. if backlog_entry.expire < expire_time: log.debug('Updating expiry time for %s' % entry['title']) backlog_entry.expire = expire_time else: log.debug('Saving %s' % entry['title']) backlog_entry = BacklogEntry() backlog_entry.title = entry['title'] backlog_entry.entry = snapshot backlog_entry.task = task.name backlog_entry.expire = expire_time session.add(backlog_entry) session.commit()
def emit(self, record): session = Session() try: session.add(LogEntry(record)) session.commit() finally: session.close()
def set_version(plugin, version): if plugin not in plugin_schemas: raise ValueError( 'Tried to set schema version for %s plugin with no versioned_base.' % plugin) base_version = plugin_schemas[plugin]['version'] if version != base_version: raise ValueError('Tried to set %s plugin schema version to %d when ' 'it should be %d as defined in versioned_base.' % (plugin, version, base_version)) session = Session() try: schema = session.query(PluginSchema).filter( PluginSchema.plugin == plugin).first() if not schema: log.debug('Initializing plugin %s schema version to %i' % (plugin, version)) schema = PluginSchema(plugin, version) session.add(schema) else: if version < schema.version: raise ValueError( 'Tried to set plugin %s schema version to lower value' % plugin) if version != schema.version: log.debug('Updating plugin %s schema version to %i' % (plugin, version)) schema.version = version session.commit() finally: session.close()
def queue_add(self, title=None, imdb_id=None, quality='ANY', force=True): """Add an item to the queue with the specified quality""" if not title or not imdb_id: # We don't have all the info we need to add movie, do a lookup for more info result = self.parse_what(imdb_id or title) title = result['title'] imdb_id = result['imdb_id'] quality = self.validate_quality(quality) session = Session() # check if the item is already queued item = session.query(QueuedMovie).filter( QueuedMovie.imdb_id == imdb_id).first() if not item: #TODO: fix item = QueuedMovie(imdb_id=imdb_id, quality=quality, immortal=force, title=title) session.add(item) session.commit() session.close() return { 'title': title, 'imdb_id': imdb_id, 'quality': quality, 'force': force } else: raise QueueError('ERROR: %s is already in the queue' % title)
def get_login_cookies(self, username, password): url_auth = 'http://www.t411.me/users/login' db_session = Session() account = db_session.query(torrent411Account).filter( torrent411Account.username == username).first() if account: if account.expiry_time < datetime.now(): db_session.delete(account) db_session.commit() log.debug("Cookies found in db!") return account.auth else: log.debug("Getting login cookies from : %s " % url_auth) params = urllib.urlencode({ 'login': username, 'password': password, 'remember': '1' }) cj = cookielib.CookieJar() # WE NEED A COOKIE HOOK HERE TO AVOID REDIRECT COOKIES opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) # NEED TO BE SAME USER_AGENT THAN DOWNLOAD LINK opener.addheaders = [('User-agent', self.USER_AGENT)] try: opener.open(url_auth, params) except Exception as e: raise UrlRewritingError("Connection Error for %s : %s" % (url_auth, e)) authKey = None uid = None password = None for cookie in cj: if cookie.name == "authKey": authKey = cookie.value if cookie.name == "uid": uid = cookie.value if cookie.name == "pass": password = cookie.value if authKey is not None and \ uid is not None and \ password is not None: authCookie = { 'uid': uid, 'password': password, 'authKey': authKey } db_session.add( torrent411Account(username=username, auth=authCookie, expiry_time=datetime.now() + timedelta(days=1))) db_session.commit() return authCookie return {"uid": "", "password": "", "authKey": ""}
def get_login_cookies(self, username, password): url_auth = 'http://www.t411.li/users/login' db_session = Session() account = db_session.query(torrent411Account).filter( torrent411Account.username == username).first() if account: if account.expiry_time < datetime.now(): db_session.delete(account) db_session.commit() log.debug("Cookies found in db!") return account.auth else: log.debug("Getting login cookies from : %s " % url_auth) params = {'login': username, 'password': password, 'remember': '1'} cj = http.cookiejar.CookieJar() # WE NEED A COOKIE HOOK HERE TO AVOID REDIRECT COOKIES opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) # NEED TO BE SAME USER_AGENT THAN DOWNLOAD LINK opener.addheaders = [('User-agent', self.USER_AGENT)] login_output = None try: login_output = opener.open(url_auth, urllib.parse.urlencode(params)).read() except Exception as e: raise UrlRewritingError("Connection Error for %s : %s" % (url_auth, e)) if b'confirmer le captcha' in login_output: log.warning("Captcha requested for login.") login_output = self._solveCaptcha(login_output, url_auth, params, opener) if b'logout' in login_output: authKey = None uid = None password = None for cookie in cj: if cookie.name == "authKey": authKey = cookie.value if cookie.name == "uid": uid = cookie.value if cookie.name == "pass": password = cookie.value if authKey is not None and \ uid is not None and \ password is not None: authCookie = {'uid': uid, 'password': password, 'authKey': authKey } db_session.add(torrent411Account(username=username, auth=authCookie, expiry_time=datetime.now() + timedelta(days=1))) db_session.commit() return authCookie else: log.error("Login failed (Torrent411). Check your login and password.") return {}
def get_login_cookies(self, username, password): url_auth = 'http://www.t411.me/users/login' db_session = Session() account = db_session.query(torrent411Account).filter( torrent411Account.username == username).first() if account: if account.expiry_time < datetime.now(): db_session.delete(account) db_session.commit() log.debug("Cookies found in db!") return account.auth else: log.debug("Getting login cookies from : %s " % url_auth) params = urllib.urlencode({'login': username, 'password': password, 'remember': '1'}) cj = cookielib.CookieJar() # WE NEED A COOKIE HOOK HERE TO AVOID REDIRECT COOKIES opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) # NEED TO BE SAME USER_AGENT THAN DOWNLOAD LINK opener.addheaders = [('User-agent', self.USER_AGENT)] try: opener.open(url_auth, params) except Exception as e: raise UrlRewritingError("Connection Error for %s : %s" % (url_auth, e)) authKey = None uid = None password = None for cookie in cj: if cookie.name == "authKey": authKey = cookie.value if cookie.name == "uid": uid = cookie.value if cookie.name == "pass": password = cookie.value if authKey is not None and \ uid is not None and \ password is not None: authCookie = {'uid': uid, 'password': password, 'authKey': authKey } db_session.add(torrent411Account(username=username, auth=authCookie, expiry_time=datetime.now() + timedelta(days=1))) db_session.commit() return authCookie return {"uid": "", "password": "", "authKey": "" }
def _set_db_last_run(self): session = Session() try: db_trigger = session.query(DBTrigger).get(self.uid) if not db_trigger: db_trigger = DBTrigger(self.uid) session.add(db_trigger) db_trigger.last_run = self.last_run session.commit() finally: session.close() log.debug('recorded last_run to the database')
def _set_db_last_run(self): session = Session() try: db_trigger = session.query(DBTrigger).get(self.uid) if not db_trigger: db_trigger = DBTrigger(self.uid) session.add(db_trigger) db_trigger.last_run = self.last_run session.commit() finally: session.close() log.debug("recorded last_run to the database")
def on_process_start(self, feed): if not feed.manager.options.seen: return feed.manager.disable_feeds() session = Session() se = SeenEntry(u'--seen', unicode(feed.name)) sf = SeenField(u'--seen', unicode(feed.manager.options.seen)) se.fields.append(sf) session.add(se) session.commit() log.info('Added %s as seen. This will affect all feeds.' % feed.manager.options.seen)
def seen_add(options): seen_name = options.add_value if is_imdb_url(seen_name): imdb_id = extract_id(seen_name) if imdb_id: seen_name = imdb_id session = Session() se = SeenEntry(seen_name, 'cli_seen') sf = SeenField('cli_seen', seen_name) se.fields.append(sf) session.add(se) session.commit() console('Added %s as seen. This will affect all tasks.' % seen_name)
def on_task_start(self, task, config): if not config: return config = self.prepare_config(config) current_version = get_current_flexget_version() if config.get("check_for_dev_version") is False and current_version.endswith("dev"): log.debug("dev version detected, skipping check") return if config.get("lookup") == "always": always_check = True else: always_check = False interval = config.get("interval") session = Session() last_check = session.query(LastVersionCheck).first() if not always_check: if last_check: time_dif = datetime.now() - last_check.last_check_time should_poll = time_dif.days > interval else: should_poll = True if not should_poll: log.debug("version check interval not met, skipping check") return latest_version = get_latest_flexget_version_number() if not latest_version: log.warning("Could not get latest version of flexget") return elif latest_version != current_version: log.warning( "You are not running latest Flexget Version. Current is %s and latest is %s", current_version, latest_version, ) if last_check: log.debug("updating last check time") last_check.update() else: last_check = LastVersionCheck() log.debug("creating instance of last version check in DB") session.add(last_check)
def on_task_start(self, task, config): if not config: return config = self.prepare_config(config) current_version = get_current_flexget_version() if config.get('check_for_dev_version' ) is False and current_version.endswith('dev'): log.debug('dev version detected, skipping check') return if config.get('lookup') == 'always': always_check = True else: always_check = False interval = config.get('interval') session = Session() last_check = session.query(LastVersionCheck).first() if not always_check: if last_check: time_dif = datetime.now() - last_check.last_check_time should_poll = time_dif.days > interval else: should_poll = True if not should_poll: log.debug('version check interval not met, skipping check') return latest_version = get_latest_flexget_version_number() if not latest_version: log.warning('Could not get latest version of flexget') return elif latest_version != current_version: log.warning( 'You are not running latest Flexget Version. Current is %s and latest is %s', current_version, latest_version) if last_check: log.debug('updating last check time') last_check.update() else: last_check = LastVersionCheck() log.debug('creating instance of last version check in DB') session.add(last_check)
def on_task_start(self, task, config): if not config: return config = self.prepare_config(config) current_version = get_current_flexget_version() if config.get('check_for_dev_version') is False and current_version.endswith('dev'): log.debug('dev version detected, skipping check') return always_check = bool(config.get('lookup') == 'always') interval = config.get('interval') session = Session() last_check = session.query(LastVersionCheck).first() if not always_check: if last_check: time_dif = datetime.now() - last_check.last_check_time should_poll = time_dif.days > interval else: should_poll = True if not should_poll: log.debug('version check interval not met, skipping check') return latest_version = get_latest_flexget_version_number() if not latest_version: log.warning('Could not get latest version of flexget') return elif latest_version != current_version: log.warning( 'You are not running latest Flexget Version. Current is %s and latest is %s', current_version, latest_version, ) if last_check: log.debug('updating last check time') last_check.update() else: last_check = LastVersionCheck() log.debug('creating instance of last version check in DB') session.add(last_check)
def on_process_start(self, task): if not task.manager.options.seen: return task.manager.disable_tasks() seen_name = task.manager.options.seen if is_imdb_url(seen_name): imdb_id = extract_id(seen_name) if imdb_id: seen_name = imdb_id session = Session() se = SeenEntry(u'--seen', unicode(task.name)) sf = SeenField(u'--seen', seen_name) se.fields.append(sf) session.add(se) session.commit() log.info('Added %s as seen. This will affect all tasks.' % seen_name)
def begin(self, manager, options): series_name = options.series_name ep_id = options.episode_id session = Session() series = session.query(Series).filter(Series.name == series_name).first() if not series: console('Series not yet in database, adding `%s`' % series_name) series = Series() series.name = series_name session.add(series) try: set_series_begin(series, ep_id) except ValueError as e: console(e) else: console('Episodes for `%s` will be accepted starting with `%s`' % (series.name, ep_id)) session.commit() finally: session.close() manager.config_changed()
def set_version(plugin, version): if plugin not in plugin_schemas: raise ValueError('Tried to set schema version for %s plugin with no versioned_base.' % plugin) if version != plugin_schemas[plugin]['version']: raise ValueError('Tried to set %s plugin schema version not equal to that defined in versioned_base.' % plugin) session = Session() try: schema = session.query(PluginSchema).filter(PluginSchema.plugin == plugin).first() if not schema: log.debug('Initializing plugin %s schema version to %i' % (plugin, version)) schema = PluginSchema(plugin, version) session.add(schema) else: if version < schema.version: raise ValueError('Tried to set plugin %s schema version to lower value' % plugin) if version != schema.version: log.debug('Updating plugin %s schema version to %i' % (plugin, version)) schema.version = version session.commit() finally: session.close()
def begin(manager, options): series_name = options.series_name ep_id = options.episode_id session = Session() try: series = session.query(Series).filter(Series.name == series_name).first() if not series: console('Series not yet in database, adding `%s`' % series_name) series = Series() series.name = series_name session.add(series) try: set_series_begin(series, ep_id) except ValueError as e: console(e) else: console('Episodes for `%s` will be accepted starting with `%s`' % (series.name, ep_id)) session.commit() finally: session.close() manager.config_changed()
def on_process_start(self, feed, config): """Purge remembered entries if the config has changed and write new hash""" # No session on process start, make our own session = Session() # Delete expired items session.query(RememberEntry).filter(RememberEntry.expires < datetime.now()).delete() # Generate hash for current config config_hash = hashlib.md5(str(feed.config.items())).hexdigest() # See if the feed has the same hash as last run old_feed = session.query(RememberFeed).filter(RememberFeed.name == feed.name).first() if old_feed and (old_feed.hash != config_hash or feed.manager.options.forget_rejected): if feed.manager.options.forget_rejected: log.info('Forgetting previous rejections.') else: log.verbose('Config has changed since last run, purging remembered entries.') session.delete(old_feed) old_feed = None if not old_feed: # Create this feed in the db if not present session.add(RememberFeed(name=feed.name, hash=config_hash)) session.commit()
def series_begin(manager): if not manager.options.series_begin: return manager.disable_tasks() series_name, ep_id = manager.options.series_begin session = Session() series = session.query(Series).filter(Series.name == series_name).first() if not series: console('Series not yet in database, adding `%s`' % series_name) series = Series() series.name = series_name session.add(series) try: set_series_begin(series, ep_id) except ValueError as e: console(e) else: console('Episodes for `%s` will be accepted starting with `%s`' % (series.name, ep_id)) session.commit() finally: session.close()
def notify(self, title, message, config): session = Session() access_token = self._real_init(session, config) failure_message = self._get_failure_message(session, config) all_messages = failure_message + message if access_token: try: self._send_msgs(all_messages, access_token) except Exception as e: entry = MessageEntry( content=all_messages, failure_time=datetime.now() ) session.add(entry) session.commit() raise PluginError(str(e)) if self.image: self._send_images(access_token)
def migrate(self, feed): """Migrates 0.9 session data into new database""" session = Session() try: shelve = feed.manager.shelve_session count = 0 log.info('If this crashes, you can\'t migrate 0.9 data to 1.0 ... sorry') for name, data in shelve.iteritems(): if not 'seen' in data: continue seen = data['seen'] for k, v in seen.iteritems(): se = SeenEntry(u'N/A', seen.feed, u'migrated') se.fields.append(SeenField(u'unknown', k)) session.add(se) count += 1 session.commit() log.info('It worked! Migrated %s seen items' % count) except Exception: log.critical('It crashed :(') finally: session.close()
def lookup(self, entry, search_allowed=True): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get("imdb_url", eval_lazy=False): log.debug("No title passed. Lookup for %s" % entry["imdb_url"]) elif entry.get("imdb_id", eval_lazy=False): log.debug("No title passed. Lookup for %s" % entry["imdb_id"]) elif entry.get("title", eval_lazy=False): log.debug("lookup for %s" % entry["title"]) else: raise PluginError("looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.") take_a_break = False session = Session() try: # entry sanity checks for field in ["imdb_votes", "imdb_score"]: if entry.get(field, eval_lazy=False): value = entry[field] if not isinstance(value, (int, float)): raise PluginError("Entry field %s should be a number!" % field) # if imdb_id is included, build the url. if entry.get("imdb_id", eval_lazy=False) and not entry.get("imdb_url", eval_lazy=False): entry["imdb_url"] = make_url(entry["imdb_id"]) # make sure imdb url is valid if entry.get("imdb_url", eval_lazy=False): imdb_id = extract_id(entry["imdb_url"]) if imdb_id: entry["imdb_url"] = make_url(imdb_id) else: log.debug("imdb url %s is invalid, removing it" % entry["imdb_url"]) del (entry["imdb_url"]) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get("imdb_url", eval_lazy=False): result = session.query(SearchResult).filter(SearchResult.title == entry["title"]).first() if result: if result.fails and not manager.options.retry: # this movie cannot be found, not worth trying again ... log.debug("%s will fail lookup" % entry["title"]) raise PluginError("Title `%s` lookup fails" % entry["title"]) else: if result.url: log.trace("Setting imdb url for %s from db" % entry["title"]) entry["imdb_url"] = result.url # no imdb url, but information required, try searching if not entry.get("imdb_url", eval_lazy=False) and search_allowed: log.verbose("Searching from imdb `%s`" % entry["title"]) take_a_break = True search = ImdbSearch() search_result = search.smart_match(entry["title"]) if search_result: entry["imdb_url"] = search_result["url"] # store url for this movie, so we don't have to search on # every run result = SearchResult(entry["title"], entry["imdb_url"]) session.add(result) log.verbose("Found %s" % (entry["imdb_url"])) else: log_once("Imdb lookup failed for %s" % entry["title"], log) # store FAIL for this title result = SearchResult(entry["title"]) result.fails = True session.add(result) raise PluginError("Title `%s` lookup failed" % entry["title"]) # check if this imdb page has been parsed & cached movie = ( session.query(Movie) .options( joinedload_all(Movie.genres), joinedload_all(Movie.languages), joinedload_all(Movie.actors), joinedload_all(Movie.directors), ) .filter(Movie.url == entry["imdb_url"]) .first() ) # determine whether or not movie details needs to be parsed req_parse = False if not movie: req_parse = True elif movie.expired: req_parse = True if req_parse: if movie is not None: if movie.expired: log.verbose("Movie `%s` details expired, refreshing ..." % movie.title) # Remove the old movie, we'll store another one later. session.query(Movie).filter(Movie.url == entry["imdb_url"]).delete() # search and store to cache if "title" in entry: log.verbose("Parsing imdb for `%s`" % entry["title"]) else: log.verbose("Parsing imdb for `%s`" % entry["imdb_id"]) try: take_a_break = True movie = self._parse_new_movie(entry["imdb_url"], session) except UnicodeDecodeError: log.error( "Unable to determine encoding for %s. Installing chardet library may help." % entry["imdb_url"] ) # store cache so this will not be tried again movie = Movie() movie.url = entry["imdb_url"] session.add(movie) raise PluginError("UnicodeDecodeError") except ValueError, e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise PluginError("Invalid parameter: %s" % entry["imdb_url"], log) for att in ["title", "score", "votes", "year", "genres", "languages", "actors", "directors", "mpaa_rating"]: log.trace("movie.%s: %s" % (att, getattr(movie, att))) # store to entry entry.update_using_map(self.field_map, movie) # give imdb a little break between requests (see: http://flexget.com/ticket/129#comment:1) if take_a_break and not manager.options.debug and not manager.unit_test: import time time.sleep(3)
def lookup(self, entry, search_allowed=True): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get('imdb_id', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('imdb_url', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('title', eval_lazy=False): log.debug('lookup for %s' % entry['title']) else: raise plugin.PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.') session = Session() try: # entry sanity checks for field in ['imdb_votes', 'imdb_score']: if entry.get(field, eval_lazy=False): value = entry[field] if not isinstance(value, (int, float)): raise plugin.PluginError('Entry field %s should be a number!' % field) # if imdb_id is included, build the url. if entry.get('imdb_id', eval_lazy=False) and not entry.get('imdb_url', eval_lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', eval_lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) del(entry['imdb_url']) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', eval_lazy=False): result = session.query(SearchResult).\ filter(SearchResult.title == entry['title']).first() if result: # TODO: 1.2 this should really be checking task.options.retry if result.fails and not manager.options.execute.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise plugin.PluginError('IMDB lookup failed for %s' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_url'] = result.url # no imdb url, but information required, try searching if not entry.get('imdb_url', eval_lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) search = ImdbSearch() search_name = entry.get('movie_name', entry['title'], eval_lazy=False) search_result = search.smart_match(search_name) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on # every run result = SearchResult(entry['title'], entry['imdb_url']) session.add(result) log.verbose('Found %s' % (entry['imdb_url'])) else: log_once('IMDB lookup failed for %s' % entry['title'], log, logging.WARN) # store FAIL for this title result = SearchResult(entry['title']) result.fails = True session.add(result) raise plugin.PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query(Movie).filter(Movie.url == entry['imdb_url']).first() # determine whether or not movie details needs to be parsed req_parse = False if not movie: req_parse = True elif movie.expired: req_parse = True if req_parse: if movie is not None: if movie.expired: log.verbose('Movie `%s` details expired, refreshing ...' % movie.title) # Remove the old movie, we'll store another one later. session.query(MovieLanguage).filter(MovieLanguage.movie_id == movie.id).delete() session.query(Movie).filter(Movie.url == entry['imdb_url']).delete() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: movie = self._parse_new_movie(entry['imdb_url'], session) except UnicodeDecodeError: log.error('Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url']) # store cache so this will not be tried again movie = Movie() movie.url = entry['imdb_url'] session.add(movie) raise plugin.PluginError('UnicodeDecodeError') except ValueError as e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise plugin.PluginError('Invalid parameter: %s' % entry['imdb_url'], log) for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # store to entry entry.update_using_map(self.field_map, movie) finally: log.trace('committing session') session.commit()
def get_login_cookies(self, username, password): url_auth = 'http://www.t411.in/users/login' db_session = Session() account = db_session.query(torrent411Account).filter( torrent411Account.username == username).first() if account: if account.expiry_time < datetime.now(): db_session.delete(account) db_session.commit() log.debug("Cookies found in db!") return account.auth else: log.debug("Getting login cookies from : %s " % url_auth) params = {'login': username, 'password': password, 'remember': '1'} cj = cookielib.CookieJar() # WE NEED A COOKIE HOOK HERE TO AVOID REDIRECT COOKIES opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) # NEED TO BE SAME USER_AGENT THAN DOWNLOAD LINK opener.addheaders = [('User-agent', self.USER_AGENT)] login_output = None try: login_output = opener.open(url_auth, urllib.urlencode(params)).read() except Exception as e: raise UrlRewritingError("Connection Error for %s : %s" % (url_auth, e)) if b'confirmer le captcha' in login_output: log.warn("Captcha requested for login.") login_output = self._solveCaptcha(login_output, url_auth, params, opener) if b'logout' in login_output: authKey = None uid = None password = None for cookie in cj: if cookie.name == "authKey": authKey = cookie.value if cookie.name == "uid": uid = cookie.value if cookie.name == "pass": password = cookie.value if authKey is not None and \ uid is not None and \ password is not None: authCookie = { 'uid': uid, 'password': password, 'authKey': authKey } db_session.add( torrent411Account(username=username, auth=authCookie, expiry_time=datetime.now() + timedelta(days=1))) db_session.commit() return authCookie else: log.error( "Login failed (Torrent411). Check your login and password." ) return {}
def lookup_movie( title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None ): """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments. Any combination of criteria can be passed, the most specific criteria specified will be used. :param rottentomatoes_id: rottentomatoes_id of desired movie :param imdb_id: imdb_id of desired movie :param title: title of desired movie :param year: release year of desired movie :param smart_match: attempt to clean and parse title and year from a string :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache :param session: optionally specify a session to use, if specified, returned Movie will be live in that session :returns: The Movie object populated with data from Rotten Tomatoes :raises: PluginError if a match cannot be found or there are other problems with the lookup """ if smart_match: # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year title_parser = MovieParser() title_parser.parse(smart_match) title = title_parser.name year = title_parser.year if title == "" and not (rottentomatoes_id or imdb_id or title): raise PluginError("Failed to parse name from %s" % smart_match) if title: search_string = title.lower() if year: search_string = "%s %s" % (search_string, year) elif not (rottentomatoes_id or imdb_id): raise PluginError("No criteria specified for rotten tomatoes lookup") def id_str(): return "<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>" % (title, year, rottentomatoes_id, imdb_id) if not session: session = Session() log.debug("Looking up rotten tomatoes information for %s" % id_str()) movie = None if rottentomatoes_id: movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == rottentomatoes_id).first() if not movie and imdb_id: alt_id = ( session.query(RottenTomatoesAlternateId) .filter(RottenTomatoesAlternateId.name.in_(["imdb", "flexget_imdb"])) .filter(RottenTomatoesAlternateId.id == imdb_id.lstrip("t")) .first() ) if alt_id: movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first() if not movie and title: movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower()) if year: movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year) movie = movie_filter.first() if not movie: log.debug("No matches in movie cache found, checking search cache.") found = ( session.query(RottenTomatoesSearchResult) .filter(func.lower(RottenTomatoesSearchResult.search) == search_string) .first() ) if found and found.movie: log.debug("Movie found in search cache.") movie = found.movie if movie: # Movie found in cache, check if cache has expired. if movie.expired and not only_cached: log.debug("Cache has expired for %s, attempting to refresh from Rotten Tomatoes." % id_str()) try: imdb_alt_id = ( movie.alternate_ids and filter(lambda alt_id: alt_id.name in ["imdb", "flexget_imdb"], movie.alternate_ids)[0].id ) if imdb_alt_id: result = movies_alias(imdb_alt_id, "imdb") else: result = movies_info(movie.id) movie = set_movie_details(movie, session, result) session.merge(movie) except URLError: log.error("Error refreshing movie details from Rotten Tomatoes, cached info being used.") else: log.debug("Movie %s information restored from cache." % id_str()) else: if only_cached: raise PluginError("Movie %s not found from cache" % id_str()) # There was no movie found in the cache, do a lookup from Rotten Tomatoes log.debug("Movie %s not found in cache, looking up from rotten tomatoes." % id_str()) try: # Lookups using imdb_id if imdb_id: log.debug("Using IMDB alias %s." % imdb_id) result = movies_alias(imdb_id, "imdb") if result: mismatch = [] if ( title and difflib.SequenceMatcher( lambda x: x == " ", re.sub("\s+\(.*\)$", "", result["title"].lower()), title.lower() ).ratio() < MIN_MATCH ): mismatch.append("the title (%s <-?-> %s)" % (title, result["title"])) result["year"] = int(result["year"]) if year and fabs(result["year"] - year) > 1: mismatch.append("the year (%s <-?-> %s)" % (year, result["year"])) release_year = None if result.get("release_dates", {}).get("theater"): log.debug("Checking year against theater release date") release_year = time.strptime(result["release_dates"].get("theater"), "%Y-%m-%d").tm_year if fabs(release_year - year) > 1: mismatch.append("the theater release (%s)" % release_year) elif result.get("release_dates", {}).get("dvd"): log.debug("Checking year against dvd release date") release_year = time.strptime(result["release_dates"].get("dvd"), "%Y-%m-%d").tm_year if fabs(release_year - year) > 1: mismatch.append("the DVD release (%s)" % release_year) if mismatch: log.warning( "Rotten Tomatoes had an imdb alias for %s but it didn't match %s." % (imdb_id, ", or ".join(mismatch)) ) else: log.debug("imdb_id %s maps to rt_id %s, checking db for info." % (imdb_id, result["id"])) movie = ( session.query(RottenTomatoesMovie) .filter(RottenTomatoesMovie.id == result.get("id")) .first() ) if movie: log.debug( "Movie %s was in database, but did not have the imdb_id stored, " "forcing an update" % movie ) movie = set_movie_details(movie, session, result) session.merge(movie) else: log.debug("%s was not in database, setting info." % result["title"]) movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) if not movie: raise PluginError("set_movie_details returned %s" % movie) session.add(movie) else: log.debug("IMDB alias %s returned no results." % imdb_id) if not movie and rottentomatoes_id: result = movies_info(rottentomatoes_id) if result: movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) session.add(movie) if not movie and title: log.verbose("Searching from rt `%s`" % search_string) results = movies_search(search_string) if results: results = results.get("movies") if results: for movie_res in results: seq = difflib.SequenceMatcher(lambda x: x == " ", movie_res["title"].lower(), title.lower()) movie_res["match"] = seq.ratio() results.sort(key=lambda x: x["match"], reverse=True) # Remove all movies below MIN_MATCH, and different year for movie_res in results[:]: if year and movie_res.get("year"): movie_res["year"] = int(movie_res["year"]) if movie_res["year"] != year: release_year = False if movie_res.get("release_dates", {}).get("theater"): log.debug("Checking year against theater release date") release_year = time.strptime( movie_res["release_dates"].get("theater"), "%Y-%m-%d" ).tm_year elif movie_res.get("release_dates", {}).get("dvd"): log.debug("Checking year against dvd release date") release_year = time.strptime( movie_res["release_dates"].get("dvd"), "%Y-%m-%d" ).tm_year if not (release_year and release_year == year): log.debug( "removing %s - %s (wrong year: %s)" % ( movie_res["title"], movie_res["id"], str(release_year or movie_res["year"]), ) ) results.remove(movie_res) continue if movie_res["match"] < MIN_MATCH: log.debug("removing %s (min_match)" % movie_res["title"]) results.remove(movie_res) continue if not results: raise PluginError("no appropiate results") if len(results) == 1: log.debug("SUCCESS: only one movie remains") else: # Check min difference between best two hits diff = results[0]["match"] - results[1]["match"] if diff < MIN_DIFF: log.debug( "unable to determine correct movie, min_diff too small" "(`%s (%d) - %s` <-?-> `%s (%d) - %s`)" % ( results[0]["title"], results[0]["year"], results[0]["id"], results[1]["title"], results[1]["year"], results[1]["id"], ) ) for r in results: log.debug("remain: %s (match: %s) %s" % (r["title"], r["match"], r["id"])) raise PluginError("min_diff") imdb_alt_id = results[0].get("alternate_ids", {}).get("imdb") if imdb_alt_id: result = movies_alias(imdb_alt_id) else: result = movies_info(results[0].get("id")) if not result: result = results[0] movie = RottenTomatoesMovie() try: movie = set_movie_details(movie, session, result) if imdb_id and not filter( lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"), movie.alternate_ids, ): log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie)) movie.alternate_ids.append( RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t")) ) session.add(movie) session.commit() except IntegrityError: log.warning( "Found movie %s in database after search even though we " "already looked, updating it with search result." % movie ) session.rollback() movie = ( session.query(RottenTomatoesMovie) .filter(RottenTomatoesMovie.id == result["id"]) .first() ) movie = set_movie_details(movie, session, result) if imdb_id and not filter( lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"), movie.alternate_ids, ): log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie)) movie.alternate_ids.append( RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t")) ) session.merge(movie) session.commit() if title.lower() != movie.title.lower(): log.debug("Saving search result for '%s'" % search_string) session.add(RottenTomatoesSearchResult(search=search_string, movie=movie)) except URLError: raise PluginError("Error looking up movie from RottenTomatoes") if not movie: raise PluginError("No results found from rotten tomatoes for %s" % id_str()) else: # Access attributes to force the relationships to eager load before we detach from session for attr in ["alternate_ids", "cast", "directors", "genres", "links", "posters", "release_dates"]: getattr(movie, attr) session.commit() return movie
class Task(object): """ Represents one task in the configuration. **Fires events:** * task.execute.before_plugin Before a plugin is about to be executed. Note that since this will also include all builtin plugins the amount of calls can be quite high ``parameters: task, keyword`` * task.execute.after_plugin After a plugin has been executed. ``parameters: task, keyword`` * task.execute.completed After task execution has been completed ``parameters: task`` """ max_reruns = 5 def __init__(self, manager, name, config=None, options=None): """ :param Manager manager: Manager instance. :param string name: Name of the task. :param dict config: Task configuration. """ self.name = unicode(name) self.manager = manager # raw_config should remain the untouched input config if config is None: config = manager.config['tasks'].get(name, {}) self.config = copy.deepcopy(config) self.prepared_config = None if options is None: options = copy.copy(self.manager.options.execute) elif isinstance(options, dict): options_namespace = copy.copy(self.manager.options.execute) options_namespace.__dict__.update(options) options = options_namespace self.options = options # simple persistence self.simple_persistence = SimpleTaskPersistence(self) # not to be reset self._rerun_count = 0 self.config_modified = None # use reset to init variables when creating self._reset() @property def undecided(self): """ .. deprecated:: Use API v3 """ return self.all_entries.undecided @property def failed(self): """ .. deprecated:: Use API v3 """ return self.all_entries.failed @property def rejected(self): """ .. deprecated:: Use API v3 """ return self.all_entries.rejected @property def accepted(self): """ .. deprecated:: Use API v3 """ return self.all_entries.accepted @property def entries(self): """ .. deprecated:: Use API v3 """ return self.all_entries.entries @property def all_entries(self): """ .. deprecated:: Use API v3 """ return self._all_entries @property def is_rerun(self): return self._rerun_count # TODO: can we get rid of this now that Tasks are instantiated on demand? def _reset(self): """Reset task state""" log.debug('resetting %s' % self.name) self.enabled = not self.name.startswith('_') self.session = None self.priority = 65535 self.requests = requests.Session() # List of all entries in the task self._all_entries = EntryContainer() self.disabled_phases = [] # These are just to query what happened in task. Call task.abort to set. self.aborted = False self.abort_reason = None self.silent_abort = False self._rerun = False # current state self.current_phase = None self.current_plugin = None def __cmp__(self, other): return cmp(self.priority, other.priority) def __str__(self): return '<Task(name=%s,aborted=%s)>' % (self.name, self.aborted) def disable_phase(self, phase): """Disable ``phase`` from execution. All disabled phases are re-enabled by :meth:`Task._reset()` after task execution has been completed. :param string phase: Name of ``phase`` :raises ValueError: *phase* could not be found. """ if phase not in task_phases: raise ValueError('%s is not a valid phase' % phase) if phase not in self.disabled_phases: log.debug('Disabling %s phase' % phase) self.disabled_phases.append(phase) def abort(self, reason='Unknown', silent=False): """Abort this task execution, no more plugins will be executed except the abort handling ones.""" self.aborted = True self.abort_reason = reason self.silent_abort = silent if not self.silent_abort: log.warning('Aborting task (plugin: %s)' % self.current_plugin) else: log.debug('Aborting task (plugin: %s)' % self.current_plugin) raise TaskAbort(reason, silent=silent) def find_entry(self, category='entries', **values): """ Find and return :class:`~flexget.entry.Entry` with given attributes from task or None :param string category: entries, accepted, rejected or failed. Defaults to entries. :param values: Key values of entries to be searched :return: Entry or None """ cat = getattr(self, category) if not isinstance(cat, EntryIterator): raise TypeError('category must be a EntryIterator') for entry in cat: for k, v in values.iteritems(): if not (k in entry and entry[k] == v): break else: return entry return None def plugins(self, phase=None): """Get currently enabled plugins. :param string phase: Optional, limits to plugins currently configured on given phase, sorted in phase order. :return: An iterator over configured :class:`flexget.plugin.PluginInfo` instances enabled on this task. """ if phase: plugins = sorted(get_plugins(phase=phase), key=lambda p: p.phase_handlers[phase], reverse=True) else: plugins = all_plugins.itervalues() return (p for p in plugins if p.name in self.config or p.builtin) def __run_task_phase(self, phase): """Executes task phase, ie. call all enabled plugins on the task. Fires events: * task.execute.before_plugin * task.execute.after_plugin :param string phase: Name of the phase """ if phase not in phase_methods: raise Exception('%s is not a valid task phase' % phase) # warn if no inputs, filters or outputs in the task if phase in ['input', 'filter', 'output']: if not self.manager.unit_test: # Check that there is at least one manually configured plugin for these phases for p in self.plugins(phase): if not p.builtin: break else: log.warning( 'Task doesn\'t have any %s plugins, you should add (at least) one!' % phase) for plugin in self.plugins(phase): # Abort this phase if one of the plugins disables it if phase in self.disabled_phases: return # store execute info, except during entry events self.current_phase = phase self.current_plugin = plugin.name if plugin.api_ver == 1: # backwards compatibility # pass method only task (old behaviour) args = (self, ) else: # pass method task, copy of config (so plugin cannot modify it) args = (self, copy.copy(self.config.get(plugin.name))) try: fire_event('task.execute.before_plugin', self, plugin.name) response = self.__run_plugin(plugin, phase, args) if phase == 'input' and response: # add entries returned by input to self.all_entries for e in response: e.task = self self.all_entries.extend(response) finally: fire_event('task.execute.after_plugin', self, plugin.name) def __run_plugin(self, plugin, phase, args=None, kwargs=None): """ Execute given plugins phase method, with supplied args and kwargs. If plugin throws unexpected exceptions :meth:`abort` will be called. :param PluginInfo plugin: Plugin to be executed :param string phase: Name of the phase to be executed :param args: Passed to the plugin :param kwargs: Passed to the plugin """ keyword = plugin.name method = plugin.phase_handlers[phase] if args is None: args = [] if kwargs is None: kwargs = {} # log.trace('Running %s method %s' % (keyword, method)) # call the plugin try: return method(*args, **kwargs) except TaskAbort: raise except PluginWarning as warn: # check if this warning should be logged only once (may keep repeating) if warn.kwargs.get('log_once', False): from flexget.utils.log import log_once log_once(warn.value, warn.log) else: warn.log.warning(warn) except EntryUnicodeError as eue: msg = ( 'Plugin %s tried to create non-unicode compatible entry (key: %s, value: %r)' % (keyword, eue.key, eue.value)) log.critical(msg) self.abort(msg) except PluginError as err: err.log.critical(err.value) self.abort(err.value) except DependencyError as e: msg = ( 'Plugin `%s` cannot be used because dependency `%s` is missing.' % (keyword, e.missing)) log.critical(msg) log.debug(e.message) self.abort(msg) except Warning as e: # If warnings have been elevated to errors msg = 'Warning during plugin %s: %s' % (keyword, e) log.exception(msg) self.abort(msg) except Exception as e: msg = 'BUG: Unhandled error in plugin %s: %s' % (keyword, e) log.exception(msg) self.abort(msg) def rerun(self): """Immediately re-run the task after execute has completed, task can be re-run up to :attr:`.max_reruns` times.""" msg = 'Plugin %s has requested task to be ran again after execution has completed.' % self.current_plugin # Only print the first request for a rerun to the info log log.debug(msg) if self._rerun else log.info(msg) if self._rerun_count >= self.max_reruns: self._rerun = False log.info( 'Task has been re-run %s times already, stopping for now' % self._rerun_count) return self._rerun = True def config_changed(self): """ Sets config_modified flag to True for the remainder of this run. Used when the db changes, and all entries need to be reprocessed. """ self.config_modified = True @useTaskLogging def execute(self): """ Executes the the task. If :attr:`.enabled` is False task is not executed. Certain :attr:`.options` affect how execution is handled. - :attr:`.options.disable_phases` is a list of phases that are not enabled for this execution. - :attr:`.options.inject` is a list of :class:`Entry` instances used instead of running input phase. """ if not self.enabled: log.debug('Not running disabled task %s' % self.name) if self.options.cron: self.manager.db_cleanup() self._reset() log.debug('executing %s' % self.name) if not self.enabled: log.debug('task %s disabled during preparation, not running' % self.name) return # Handle keyword args if self.options.learn: log.info('Disabling download and output phases because of --learn') self.disable_phase('download') self.disable_phase('output') if self.options.disable_phases: map(self.disable_phase, self.options.disable_phases) if self.options.inject: # If entries are passed for this execution (eg. rerun), disable the input phase self.disable_phase('input') self.all_entries.extend(self.options.inject) log.debug('starting session') self.session = Session() # Save current config hash and set config_modidied flag config_hash = hashlib.md5(str(sorted(self.config.items()))).hexdigest() last_hash = self.session.query(TaskConfigHash).filter( TaskConfigHash.task == self.name).first() if self.is_rerun: # Restore the config to state right after start phase if self.prepared_config: self.config = copy.deepcopy(self.prepared_config) else: log.error('BUG: No prepared_config on rerun, please report.') self.config_modified = False elif not last_hash: self.config_modified = True last_hash = TaskConfigHash(task=self.name, hash=config_hash) self.session.add(last_hash) elif last_hash.hash != config_hash: self.config_modified = True last_hash.hash = config_hash else: self.config_modified = False # run phases try: for phase in task_phases: if phase in self.disabled_phases: # log keywords not executed for plugin in self.plugins(phase): if plugin.name in self.config: log.info( 'Plugin %s is not executed because %s phase is disabled (e.g. --test)' % (plugin.name, phase)) continue if phase == 'start' and self.is_rerun: log.debug('skipping task_start during rerun') elif phase == 'exit' and self._rerun: log.debug( 'not running task_exit yet because task will rerun') else: # run all plugins with this phase self.__run_task_phase(phase) if phase == 'start': # Store a copy of the config state after start phase to restore for reruns self.prepared_config = copy.deepcopy(self.config) except TaskAbort: # Roll back the session before calling abort handlers self.session.rollback() try: self.__run_task_phase('abort') # Commit just the abort handler changes if no exceptions are raised there self.session.commit() except TaskAbort as e: log.exception('abort handlers aborted: %s' % e) raise else: for entry in self.all_entries: entry.complete() log.debug('committing session') self.session.commit() fire_event('task.execute.completed', self) finally: # this will cause database rollback on exception self.session.close() # rerun task if self._rerun: log.info( 'Rerunning the task in case better resolution can be achieved.' ) self._rerun_count += 1 # TODO: Potential optimization is to take snapshots (maybe make the ones backlog uses built in instead of # taking another one) after input and just inject the same entries for the rerun self.execute() def __eq__(self, other): if hasattr(other, 'name'): return self.name == other.name return NotImplemented def __copy__(self): new = type(self)(self.manager, self.name, self.config, self.options) # Update all the variables of new instance to match our own new.__dict__.update(self.__dict__) # Some mutable objects need to be copies new.options = copy.copy(self.options) new.config = copy.deepcopy(self.config) return new copy = __copy__
class Task(object): """ Represents one task in the configuration. **Fires events:** * task.execute.before_plugin Before a plugin is about to be executed. Note that since this will also include all builtin plugins the amount of calls can be quite high ``parameters: task, keyword`` * task.execute.after_plugin After a plugin has been executed. ``parameters: task, keyword`` * task.execute.completed After task execution has been completed ``parameters: task`` """ max_reruns = 5 def __init__(self, manager, name, config): """ :param Manager manager: Manager instance. :param string name: Name of the task. :param dict config: Task configuration. """ self.name = unicode(name) self.config = config self.manager = manager # simple persistence self.simple_persistence = SimpleTaskPersistence(self) # not to be reset self._rerun_count = 0 # This should not be used until after process_start, when it is evaluated self.config_modified = None # use reset to init variables when creating self._reset() # Make these read-only properties all_entries = property(lambda self: self._all_entries) entries = property(lambda self: self.all_entries.entries) accepted = property(lambda self: self.all_entries.accepted) rejected = property(lambda self: self.all_entries.rejected) failed = property(lambda self: self.all_entries.failed) @property def is_rerun(self): return self._rerun_count def _reset(self): """Reset task state""" log.debug('resetting %s' % self.name) self.enabled = True self.session = None self.priority = 65535 self.requests = requests.Session() # List of all entries in the task self._all_entries = EntryContainer(task=self) self.disabled_phases = [] # TODO: task.abort() should be done by using exception? not a flag that has to be checked everywhere self._abort = False self._abort_reason = None self._silent_abort = False self._rerun = False # current state self.current_phase = None self.current_plugin = None def __cmp__(self, other): return cmp(self.priority, other.priority) def __str__(self): return '<Task(name=%s,aborted=%s)>' % (self.name, str(self.aborted)) @property def aborted(self): return self._abort and not self._silent_abort @property def abort_reason(self): return self._abort_reason @property def undecided(self): """Iterate over undecided entries""" return (entry for entry in self.entries if not entry in self.accepted and entry not in self.rejected) def disable_phase(self, phase): """Disable ``phase`` from execution. All disabled phases are re-enabled by :meth:`Task._reset()` after task execution has been completed. :param string phase: Name of ``phase`` :raises ValueError: *phase* could not be found. """ if phase not in task_phases: raise ValueError('%s is not a valid phase' % phase) if phase not in self.disabled_phases: log.debug('Disabling %s phase' % phase) self.disabled_phases.append(phase) def abort(self, reason='Unknown', **kwargs): """Abort this task execution, no more plugins will be executed after the current one exists.""" if self._abort: return self._abort_reason = reason if not kwargs.get('silent', False): log.info('Aborting task (plugin: %s)' % self.current_plugin) self._silent_abort = False else: log.debug('Aborting task (plugin: %s)' % self.current_plugin) self._silent_abort = True # Run the abort phase before we set the _abort flag self._abort = True self.__run_task_phase('abort') def find_entry(self, category='entries', **values): """ Find and return :class:`~flexget.entry.Entry` with given attributes from task or None :param string category: entries, accepted, rejected or failed. Defaults to entries. :param values: Key values of entries to be searched :return: Entry or None """ cat = getattr(self, category) if not isinstance(cat, EntryIterator): raise TypeError('category must be a EntryIterator') for entry in cat: for k, v in values.iteritems(): if not (k in entry and entry[k] == v): break else: return entry return None def plugins(self, phase=None): """Get currently enabled plugins. :param string phase: Optional, limits to plugins currently configured on given phase, sorted in phase order. :return: An iterator over configured :class:`flexget.plugin.PluginInfo` instances enabled on this task. """ if phase: plugins = sorted(get_plugins_by_phase(phase), key=lambda p: p.phase_handlers[phase], reverse=True) else: plugins = all_plugins.itervalues() return (p for p in plugins if p.name in self.config or p.builtin) def __run_task_phase(self, phase): """Executes task phase, ie. call all enabled plugins on the task. Fires events: * task.execute.before_plugin * task.execute.after_plugin :param string phase: Name of the phase """ if phase not in task_phases + ['abort', 'process_start', 'process_end']: raise Exception('%s is not a valid task phase' % phase) # warn if no inputs, filters or outputs in the task if phase in ['input', 'filter', 'output']: if not self.manager.unit_test: # Check that there is at least one manually configured plugin for these phases for p in self.plugins(phase): if not p.builtin: break else: log.warning('Task doesn\'t have any %s plugins, you should add (at least) one!' % phase) for plugin in self.plugins(phase): # Abort this phase if one of the plugins disables it if phase in self.disabled_phases: return # store execute info, except during entry events self.current_phase = phase self.current_plugin = plugin.name if plugin.api_ver == 1: # backwards compatibility # pass method only task (old behaviour) args = (self,) else: # pass method task, copy of config (so plugin cannot modify it) args = (self, copy.copy(self.config.get(plugin.name))) try: fire_event('task.execute.before_plugin', self, plugin.name) response = self.__run_plugin(plugin, phase, args) if phase == 'input' and response: # add entries returned by input to self.entries self.all_entries.extend(response) finally: fire_event('task.execute.after_plugin', self, plugin.name) # Make sure we abort if any plugin sets our abort flag if self._abort and phase != 'abort': return def _run_entry_phase(self, phase, entry, **kwargs): # TODO: entry events are not very elegant, refactor into real (new) events or something ... if phase not in ['accept', 'reject', 'fail']: raise Exception('Not a valid entry phase') phase_plugins = self.plugins(phase) for plugin in phase_plugins: self.__run_plugin(plugin, phase, (self, entry), kwargs) def __run_plugin(self, plugin, phase, args=None, kwargs=None): """ Execute given plugins phase method, with supplied args and kwargs. If plugin throws unexpected exceptions :meth:`abort` will be called. :param PluginInfo plugin: Plugin to be executed :param string phase: Name of the phase to be executed :param args: Passed to the plugin :param kwargs: Passed to the plugin """ keyword = plugin.name method = plugin.phase_handlers[phase] if args is None: args = [] if kwargs is None: kwargs = {} # log.trace('Running %s method %s' % (keyword, method)) # call the plugin try: return method(*args, **kwargs) except PluginWarning as warn: # check if this warning should be logged only once (may keep repeating) if warn.kwargs.get('log_once', False): from flexget.utils.log import log_once log_once(warn.value, warn.log) else: warn.log.warning(warn) except EntryUnicodeError as eue: msg = ('Plugin %s tried to create non-unicode compatible entry (key: %s, value: %r)' % (keyword, eue.key, eue.value)) log.critical(msg) self.abort(msg) except PluginError as err: err.log.critical(err.value) self.abort(err.value) except DependencyError as e: msg = ('Plugin `%s` cannot be used because dependency `%s` is missing.' % (keyword, e.missing)) log.critical(msg) log.debug(e.message) self.abort(msg) except Exception as e: msg = 'BUG: Unhandled error in plugin %s: %s' % (keyword, e) log.exception(msg) self.abort(msg) # don't handle plugin errors gracefully with unit test if self.manager.unit_test: raise def rerun(self): """Immediately re-run the task after execute has completed, task can be re-run up to :attr:`.max_reruns` times.""" self._rerun = True log.info('Plugin %s has requested task to be ran again after execution has completed.' % self.current_plugin) def config_changed(self): """Forces config_modified flag to come out true on next run. Used when the db changes, and all entries need to be reprocessed.""" log.debug('Marking config as changed.') session = self.session or Session() task_hash = session.query(TaskConfigHash).filter(TaskConfigHash.task == self.name).first() if task_hash: task_hash.hash = '' self.config_modified = True # If we created our own session, commit and close it. if not self.session: session.commit() session.close() @useTaskLogging def execute(self, disable_phases=None, entries=None): """Executes the task. :param list disable_phases: Disable given phases names during execution :param list entries: Entries to be used in execution instead of using the input. Disables input phase. """ log.debug('executing %s' % self.name) # Store original config state to be restored if a rerun is needed config_backup = copy.deepcopy(self.config) self._reset() # Handle keyword args if disable_phases: map(self.disable_phase, disable_phases) if entries: # If entries are passed for this execution (eg. rerun), disable the input phase self.disable_phase('input') self.all_entries.extend(entries) # validate configuration errors = self.validate() if self._abort: # todo: bad practice return if errors and self.manager.unit_test: # todo: bad practice raise Exception('configuration errors') if self.manager.options.validate: if not errors: log.info('Task \'%s\' passed' % self.name) self.enabled = False return log.debug('starting session') self.session = Session() # Save current config hash and set config_modidied flag config_hash = hashlib.md5(str(self.config.items())).hexdigest() last_hash = self.session.query(TaskConfigHash).filter(TaskConfigHash.task == self.name).first() if self.is_rerun: # Make sure on rerun config is not marked as modified self.config_modified = False elif not last_hash: self.config_modified = True last_hash = TaskConfigHash(task=self.name, hash=config_hash) self.session.add(last_hash) elif last_hash.hash != config_hash: self.config_modified = True last_hash.hash = config_hash else: self.config_modified = False try: # run phases for phase in task_phases: if phase in self.disabled_phases: # log keywords not executed for plugin in self.plugins(phase): if plugin.name in self.config: log.info('Plugin %s is not executed because %s phase is disabled (e.g. --test)' % (plugin.name, phase)) continue # run all plugins with this phase self.__run_task_phase(phase) # if abort flag has been set task should be aborted now # since this calls return rerun will not be done if self._abort: return log.debug('committing session, abort=%s' % self._abort) self.session.commit() fire_event('task.execute.completed', self) finally: # this will cause database rollback on exception and task.abort self.session.close() # rerun task if self._rerun: if self._rerun_count >= self.max_reruns: log.info('Task has been rerunning already %s times, stopping for now' % self._rerun_count) # reset the counter for future runs (necessary only with webui) self._rerun_count = 0 else: log.info('Rerunning the task in case better resolution can be achieved.') self._rerun_count += 1 # Restore config to original state before running again self.config = config_backup self.execute(disable_phases=disable_phases, entries=entries) # Clean up entries after the task has executed to reduce ram usage, #1652 # TODO: This doesn't work with unified entries, not sure best replacement """if not self.manager.unit_test: log.debug('Clearing all entries from task.') self.entries = [] self.rejected = [] self.failed = []""" def _process_start(self): """Execute process_start phase""" self.__run_task_phase('process_start') def _process_end(self): """Execute terminate phase for this task""" if self.manager.options.validate: log.debug('No process_end phase with --check') return self.__run_task_phase('process_end') def validate(self): """Called during task execution. Validates config, prints errors and aborts task if invalid.""" errors = self.validate_config(self.config) # log errors and abort if errors: log.critical('Task \'%s\' has configuration errors:' % self.name) for error in errors: log.error(error) # task has errors, abort it self.abort('\n'.join(errors)) return errors @staticmethod def validate_config(config): """Plugin configuration validation. Return list of error messages that were detected.""" validate_errors = [] # validate config is a dictionary if not isinstance(config, dict): validate_errors.append('Config is not a dictionary.') return validate_errors # validate all plugins for keyword in config: if keyword.startswith('_'): continue try: plugin = get_plugin_by_name(keyword) except: validate_errors.append('Unknown plugin \'%s\'' % keyword) continue if hasattr(plugin.instance, 'validator'): try: validator = plugin.instance.validator() except TypeError as e: log.critical('Invalid validator method in plugin %s' % keyword) log.exception(e) continue if not validator.name == 'root': # if validator is not root type, add root validator as it's parent validator = validator.add_root_parent() if not validator.validate(config[keyword]): for msg in validator.errors.messages: validate_errors.append('%s %s' % (keyword, msg)) else: log.warning('Used plugin %s does not support validating. Please notify author!' % keyword) return validate_errors
class Task(object): """ Represents one task in the configuration. **Fires events:** * task.execute.before_plugin Before a plugin is about to be executed. Note that since this will also include all builtin plugins the amount of calls can be quite high ``parameters: task, keyword`` * task.execute.after_plugin After a plugin has been executed. ``parameters: task, keyword`` * task.execute.completed After task execution has been completed ``parameters: task`` """ max_reruns = 5 def __init__(self, manager, name, config=None, options=None): """ :param Manager manager: Manager instance. :param string name: Name of the task. :param dict config: Task configuration. """ self.name = unicode(name) self.manager = manager # raw_config should remain the untouched input config if config is None: config = manager.config['tasks'].get(name, {}) self.config = copy.deepcopy(config) self.prepared_config = None if options is None: options = copy.copy(self.manager.options.execute) elif isinstance(options, dict): options_namespace = copy.copy(self.manager.options.execute) options_namespace.__dict__.update(options) options = options_namespace self.options = options # simple persistence self.simple_persistence = SimpleTaskPersistence(self) # not to be reset self._rerun_count = 0 self.config_modified = None # use reset to init variables when creating self._reset() @property def undecided(self): """ .. deprecated:: Use API v3 """ return self.all_entries.undecided @property def failed(self): """ .. deprecated:: Use API v3 """ return self.all_entries.failed @property def rejected(self): """ .. deprecated:: Use API v3 """ return self.all_entries.rejected @property def accepted(self): """ .. deprecated:: Use API v3 """ return self.all_entries.accepted @property def entries(self): """ .. deprecated:: Use API v3 """ return self.all_entries.entries @property def all_entries(self): """ .. deprecated:: Use API v3 """ return self._all_entries @property def is_rerun(self): return self._rerun_count # TODO: can we get rid of this now that Tasks are instantiated on demand? def _reset(self): """Reset task state""" log.debug('resetting %s' % self.name) self.enabled = not self.name.startswith('_') self.session = None self.priority = 65535 self.requests = requests.Session() # List of all entries in the task self._all_entries = EntryContainer() self.disabled_phases = [] # These are just to query what happened in task. Call task.abort to set. self.aborted = False self.abort_reason = None self.silent_abort = False self._rerun = False # current state self.current_phase = None self.current_plugin = None def __cmp__(self, other): return cmp(self.priority, other.priority) def __str__(self): return '<Task(name=%s,aborted=%s)>' % (self.name, self.aborted) def disable_phase(self, phase): """Disable ``phase`` from execution. All disabled phases are re-enabled by :meth:`Task._reset()` after task execution has been completed. :param string phase: Name of ``phase`` :raises ValueError: *phase* could not be found. """ if phase not in task_phases: raise ValueError('%s is not a valid phase' % phase) if phase not in self.disabled_phases: log.debug('Disabling %s phase' % phase) self.disabled_phases.append(phase) def abort(self, reason='Unknown', silent=False): """Abort this task execution, no more plugins will be executed except the abort handling ones.""" self.aborted = True self.abort_reason = reason self.silent_abort = silent if not self.silent_abort: log.warning('Aborting task (plugin: %s)' % self.current_plugin) else: log.debug('Aborting task (plugin: %s)' % self.current_plugin) raise TaskAbort(reason, silent=silent) def find_entry(self, category='entries', **values): """ Find and return :class:`~flexget.entry.Entry` with given attributes from task or None :param string category: entries, accepted, rejected or failed. Defaults to entries. :param values: Key values of entries to be searched :return: Entry or None """ cat = getattr(self, category) if not isinstance(cat, EntryIterator): raise TypeError('category must be a EntryIterator') for entry in cat: for k, v in values.iteritems(): if not (k in entry and entry[k] == v): break else: return entry return None def plugins(self, phase=None): """Get currently enabled plugins. :param string phase: Optional, limits to plugins currently configured on given phase, sorted in phase order. :return: An iterator over configured :class:`flexget.plugin.PluginInfo` instances enabled on this task. """ if phase: plugins = sorted(get_plugins(phase=phase), key=lambda p: p.phase_handlers[phase], reverse=True) else: plugins = all_plugins.itervalues() return (p for p in plugins if p.name in self.config or p.builtin) def __run_task_phase(self, phase): """Executes task phase, ie. call all enabled plugins on the task. Fires events: * task.execute.before_plugin * task.execute.after_plugin :param string phase: Name of the phase """ if phase not in phase_methods: raise Exception('%s is not a valid task phase' % phase) # warn if no inputs, filters or outputs in the task if phase in ['input', 'filter', 'output']: if not self.manager.unit_test: # Check that there is at least one manually configured plugin for these phases for p in self.plugins(phase): if not p.builtin: break else: log.warning('Task doesn\'t have any %s plugins, you should add (at least) one!' % phase) for plugin in self.plugins(phase): # Abort this phase if one of the plugins disables it if phase in self.disabled_phases: return # store execute info, except during entry events self.current_phase = phase self.current_plugin = plugin.name if plugin.api_ver == 1: # backwards compatibility # pass method only task (old behaviour) args = (self,) else: # pass method task, copy of config (so plugin cannot modify it) args = (self, copy.copy(self.config.get(plugin.name))) try: fire_event('task.execute.before_plugin', self, plugin.name) response = self.__run_plugin(plugin, phase, args) if phase == 'input' and response: # add entries returned by input to self.all_entries for e in response: e.task = self self.all_entries.extend(response) finally: fire_event('task.execute.after_plugin', self, plugin.name) def __run_plugin(self, plugin, phase, args=None, kwargs=None): """ Execute given plugins phase method, with supplied args and kwargs. If plugin throws unexpected exceptions :meth:`abort` will be called. :param PluginInfo plugin: Plugin to be executed :param string phase: Name of the phase to be executed :param args: Passed to the plugin :param kwargs: Passed to the plugin """ keyword = plugin.name method = plugin.phase_handlers[phase] if args is None: args = [] if kwargs is None: kwargs = {} # log.trace('Running %s method %s' % (keyword, method)) # call the plugin try: return method(*args, **kwargs) except TaskAbort: raise except PluginWarning as warn: # check if this warning should be logged only once (may keep repeating) if warn.kwargs.get('log_once', False): from flexget.utils.log import log_once log_once(warn.value, warn.log) else: warn.log.warning(warn) except EntryUnicodeError as eue: msg = ('Plugin %s tried to create non-unicode compatible entry (key: %s, value: %r)' % (keyword, eue.key, eue.value)) log.critical(msg) self.abort(msg) except PluginError as err: err.log.critical(err.value) self.abort(err.value) except DependencyError as e: msg = ('Plugin `%s` cannot be used because dependency `%s` is missing.' % (keyword, e.missing)) log.critical(msg) log.debug(e.message) self.abort(msg) except Warning as e: # If warnings have been elevated to errors msg = 'Warning during plugin %s: %s' % (keyword, e) log.exception(msg) self.abort(msg) except Exception as e: msg = 'BUG: Unhandled error in plugin %s: %s' % (keyword, e) log.exception(msg) self.abort(msg) def rerun(self): """Immediately re-run the task after execute has completed, task can be re-run up to :attr:`.max_reruns` times.""" msg = 'Plugin %s has requested task to be ran again after execution has completed.' % self.current_plugin # Only print the first request for a rerun to the info log log.debug(msg) if self._rerun else log.info(msg) if self._rerun_count >= self.max_reruns: self._rerun = False log.info('Task has been re-run %s times already, stopping for now' % self._rerun_count) return self._rerun = True def config_changed(self): """ Sets config_modified flag to True for the remainder of this run. Used when the db changes, and all entries need to be reprocessed. """ self.config_modified = True @useTaskLogging def execute(self): """ Executes the the task. If :attr:`.enabled` is False task is not executed. Certain :attr:`.options` affect how execution is handled. - :attr:`.options.disable_phases` is a list of phases that are not enabled for this execution. - :attr:`.options.inject` is a list of :class:`Entry` instances used instead of running input phase. """ if not self.enabled: log.debug('Not running disabled task %s' % self.name) if self.options.cron: self.manager.db_cleanup() self._reset() log.debug('executing %s' % self.name) if not self.enabled: log.debug('task %s disabled during preparation, not running' % self.name) return # Handle keyword args if self.options.learn: log.info('Disabling download and output phases because of --learn') self.disable_phase('download') self.disable_phase('output') if self.options.disable_phases: map(self.disable_phase, self.options.disable_phases) if self.options.inject: # If entries are passed for this execution (eg. rerun), disable the input phase self.disable_phase('input') self.all_entries.extend(self.options.inject) log.debug('starting session') self.session = Session() # Save current config hash and set config_modidied flag config_hash = hashlib.md5(str(sorted(self.config.items()))).hexdigest() last_hash = self.session.query(TaskConfigHash).filter(TaskConfigHash.task == self.name).first() if self.is_rerun: # Restore the config to state right after start phase if self.prepared_config: self.config = copy.deepcopy(self.prepared_config) else: log.error('BUG: No prepared_config on rerun, please report.') self.config_modified = False elif not last_hash: self.config_modified = True last_hash = TaskConfigHash(task=self.name, hash=config_hash) self.session.add(last_hash) elif last_hash.hash != config_hash: self.config_modified = True last_hash.hash = config_hash else: self.config_modified = False # run phases try: for phase in task_phases: if phase in self.disabled_phases: # log keywords not executed for plugin in self.plugins(phase): if plugin.name in self.config: log.info('Plugin %s is not executed because %s phase is disabled (e.g. --test)' % (plugin.name, phase)) continue if phase == 'start' and self.is_rerun: log.debug('skipping task_start during rerun') elif phase == 'exit' and self._rerun: log.debug('not running task_exit yet because task will rerun') else: # run all plugins with this phase self.__run_task_phase(phase) if phase == 'start': # Store a copy of the config state after start phase to restore for reruns self.prepared_config = copy.deepcopy(self.config) except TaskAbort: # Roll back the session before calling abort handlers self.session.rollback() try: self.__run_task_phase('abort') # Commit just the abort handler changes if no exceptions are raised there self.session.commit() except TaskAbort as e: log.exception('abort handlers aborted: %s' % e) raise else: for entry in self.all_entries: entry.complete() log.debug('committing session') self.session.commit() fire_event('task.execute.completed', self) finally: # this will cause database rollback on exception self.session.close() # rerun task if self._rerun: log.info('Rerunning the task in case better resolution can be achieved.') self._rerun_count += 1 # TODO: Potential optimization is to take snapshots (maybe make the ones backlog uses built in instead of # taking another one) after input and just inject the same entries for the rerun self.execute() def __eq__(self, other): if hasattr(other, 'name'): return self.name == other.name return NotImplemented def __copy__(self): new = type(self)(self.manager, self.name, self.config, self.options) # Update all the variables of new instance to match our own new.__dict__.update(self.__dict__) # Some mutable objects need to be copies new.options = copy.copy(self.options) new.config = copy.deepcopy(self.config) return new copy = __copy__
def migrate2(self): session = Session() try: from progressbar import ProgressBar, Percentage, Bar, ETA except: print 'Critical: progressbar library not found, try running `bin/easy_install progressbar` ?' return class Seen(Base): __tablename__ = 'seen' id = Column(Integer, primary_key=True) field = Column(String) value = Column(String, index=True) task = Column('feed', String) added = Column(DateTime) def __init__(self, field, value, task): self.field = field self.value = value self.task = task self.added = datetime.now() def __str__(self): return '<Seen(%s=%s)>' % (self.field, self.value) print '' # REPAIR / REMOVE DUPLICATES index = 0 removed = 0 total = session.query(Seen).count() + 1 widgets = ['Repairing - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')] bar = ProgressBar(widgets=widgets, maxval=total).start() for seen in session.query(Seen).all(): index += 1 if index % 10 == 0: bar.update(index) amount = 0 for dupe in session.query(Seen).filter(Seen.value == seen.value): amount += 1 if amount > 1: removed += 1 session.delete(dupe) bar.finish() # MIGRATE total = session.query(Seen).count() + 1 widgets = ['Upgrading - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')] bar = ProgressBar(widgets=widgets, maxval=total).start() index = 0 for seen in session.query(Seen).all(): index += 1 if not index % 10: bar.update(index) se = SeenEntry(u'N/A', seen.task, u'migrated') se.added = seen.added se.fields.append(SeenField(seen.field, seen.value)) session.add(se) bar.finish() session.execute('drop table seen;') session.commit()
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None): """ Do a lookup from Rotten Tomatoes for the movie matching the passed arguments. Any combination of criteria can be passed, the most specific criteria specified will be used. :param rottentomatoes_id: rottentomatoes_id of desired movie :param imdb_id: imdb_id of desired movie :param title: title of desired movie :param year: release year of desired movie :param smart_match: attempt to clean and parse title and year from a string :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache :param session: optionally specify a session to use, if specified, returned Movie will be live in that session :returns: The Movie object populated with data from Rotten Tomatoes :raises: PluginError if a match cannot be found or there are other problems with the lookup """ if smart_match: # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year title_parser = MovieParser() title_parser.parse(smart_match) title = title_parser.name year = title_parser.year if title == '' and not (rottentomatoes_id or imdb_id or title): raise PluginError('Failed to parse name from %s' % smart_match) if title: search_string = title.lower() if year: search_string = '%s %s' % (search_string, year) elif not (rottentomatoes_id or imdb_id): raise PluginError('No criteria specified for rotten tomatoes lookup') def id_str(): return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id) if not session: session = Session() log.debug('Looking up rotten tomatoes information for %s' % id_str()) movie = None # Try to lookup from cache if rottentomatoes_id: movie = session.query(RottenTomatoesMovie).\ filter(RottenTomatoesMovie.id == rottentomatoes_id).first() if not movie and imdb_id: alt_id = session.query(RottenTomatoesAlternateId).\ filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\ filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first() if alt_id: movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first() if not movie and title: movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower()) if year: movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year) movie = movie_filter.first() if not movie: log.debug('No matches in movie cache found, checking search cache.') found = session.query(RottenTomatoesSearchResult).\ filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first() if found and found.movie: log.debug('Movie found in search cache.') movie = found.movie if movie: # Movie found in cache, check if cache has expired. if movie.expired and not only_cached: log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str()) try: imdb_alt_id = movie.alternate_ids and filter( lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id if imdb_alt_id: result = movies_alias(imdb_alt_id, 'imdb') else: result = movies_info(movie.id) movie = _set_movie_details(movie, session, result) session.merge(movie) except URLError: log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.') else: log.debug('Movie %s information restored from cache.' % id_str()) else: if only_cached: raise PluginError('Movie %s not found from cache' % id_str()) # There was no movie found in the cache, do a lookup from Rotten Tomatoes log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str()) try: # Lookups using imdb_id # TODO: extract to method if imdb_id: log.debug('Using IMDB alias %s.' % imdb_id) result = movies_alias(imdb_id, 'imdb') if result: mismatch = [] min_match = difflib.SequenceMatcher(lambda x: x == ' ', re.sub('\s+\(.*\)$', '', result['title'].lower()), title.lower()).ratio() < MIN_MATCH if title and min_match: mismatch.append('the title (%s <-?-> %s)' % (title, result['title'])) result['year'] = int(result['year']) if year and fabs(result['year'] - year) > 1: mismatch.append('the year (%s <-?-> %s)' % (year, result['year'])) release_year = None if result.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the theater release (%s)' % release_year) elif result.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the DVD release (%s)' % release_year) if mismatch: log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' % (imdb_id, ', or '.join(mismatch))) else: log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id'])) movie = session.query(RottenTomatoesMovie).\ filter(RottenTomatoesMovie.id == result.get('id')).first() if movie: log.debug('Movie %s was in database, but did not have the imdb_id stored, ' 'forcing an update' % movie) movie = _set_movie_details(movie, session, result) session.merge(movie) else: log.debug('%s was not in database, setting info.' % result['title']) movie = RottenTomatoesMovie() movie = _set_movie_details(movie, session, result) if not movie: raise PluginError('set_movie_details returned %s' % movie) session.add(movie) else: log.debug('IMDB alias %s returned no results.' % imdb_id) if not movie and rottentomatoes_id: result = movies_info(rottentomatoes_id) if result: movie = RottenTomatoesMovie() movie = _set_movie_details(movie, session, result) session.add(movie) if not movie and title: # TODO: Extract to method log.verbose('Searching from rt `%s`' % search_string) results = movies_search(search_string) if results: results = results.get('movies') if results: for movie_res in results: seq = difflib.SequenceMatcher( lambda x: x == ' ', movie_res['title'].lower(), title.lower()) movie_res['match'] = seq.ratio() results.sort(key=lambda x: x['match'], reverse=True) # Remove all movies below MIN_MATCH, and different year for movie_res in results[:]: if year and movie_res.get('year'): movie_res['year'] = int(movie_res['year']) if movie_res['year'] != year: release_year = False if movie_res.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(movie_res['release_dates'].get('theater'), '%Y-%m-%d').tm_year elif movie_res.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(movie_res['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if not (release_year and release_year == year): log.debug('removing %s - %s (wrong year: %s)' % (movie_res['title'], movie_res['id'], str(release_year or movie_res['year']))) results.remove(movie_res) continue if movie_res['match'] < MIN_MATCH: log.debug('removing %s (min_match)' % movie_res['title']) results.remove(movie_res) continue if not results: raise PluginError('no appropiate results') if len(results) == 1: log.debug('SUCCESS: only one movie remains') else: # Check min difference between best two hits diff = results[0]['match'] - results[1]['match'] if diff < MIN_DIFF: log.debug('unable to determine correct movie, min_diff too small' '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' % (results[0]['title'], results[0]['year'], results[0]['id'], results[1]['title'], results[1]['year'], results[1]['id'])) for r in results: log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id'])) raise PluginError('min_diff') imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb') if imdb_alt_id: result = movies_alias(imdb_alt_id) else: result = movies_info(results[0].get('id')) if not result: result = results[0] movie = RottenTomatoesMovie() movie = _set_movie_details(movie, session, result) if imdb_id and not filter( lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids): # TODO: get rid of these confusing lambdas log.warning('Adding flexget_imdb alternate id %s for movie %s' % (imdb_id, movie)) movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb', imdb_id.lstrip('t'))) session.add(movie) session.commit() if title.lower() != movie.title.lower(): log.debug('Saving search result for \'%s\'' % search_string) session.add(RottenTomatoesSearchResult(search=search_string, movie=movie)) except URLError: raise PluginError('Error looking up movie from RottenTomatoes') if not movie: raise PluginError('No results found from rotten tomatoes for %s' % id_str()) else: # Access attributes to force the relationships to eager load before we detach from session for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']: getattr(movie, attr) session.commit() return movie
class Task(object): """ Represents one task in the configuration. **Fires events:** * task.execute.before_plugin Before a plugin is about to be executed. Note that since this will also include all builtin plugins the amount of calls can be quite high ``parameters: task, keyword`` * task.execute.after_plugin After a plugin has been executed. ``parameters: task, keyword`` * task.execute.completed After task execution has been completed ``parameters: task`` """ max_reruns = 5 def __init__(self, manager, name, config): """ :param Manager manager: Manager instance. :param string name: Name of the task. :param dict config: Task configuration. """ self.name = unicode(name) self.config = config self.manager = manager # simple persistence self.simple_persistence = SimpleTaskPersistence(self) # not to be reset self._rerun_count = 0 # This should not be used until after process_start, when it is evaluated self.config_modified = None # use reset to init variables when creating self._reset() # Make these read-only properties all_entries = property(lambda self: self._all_entries) entries = property(lambda self: self.all_entries.entries) accepted = property(lambda self: self.all_entries.accepted) rejected = property(lambda self: self.all_entries.rejected) failed = property(lambda self: self.all_entries.failed) @property def is_rerun(self): return self._rerun_count def _reset(self): """Reset task state""" log.debug('resetting %s' % self.name) self.enabled = True self.session = None self.priority = 65535 self.requests = requests.Session() # List of all entries in the task self._all_entries = EntryContainer(task=self) self.disabled_phases = [] # TODO: task.abort() should be done by using exception? not a flag that has to be checked everywhere self._abort = False self._abort_reason = None self._silent_abort = False self._rerun = False # current state self.current_phase = None self.current_plugin = None def __cmp__(self, other): return cmp(self.priority, other.priority) def __str__(self): return '<Task(name=%s,aborted=%s)>' % (self.name, str(self.aborted)) @property def aborted(self): return self._abort and not self._silent_abort @property def abort_reason(self): return self._abort_reason @property def undecided(self): """Iterate over undecided entries""" return (entry for entry in self.entries if not entry in self.accepted and entry not in self.rejected) def disable_phase(self, phase): """Disable ``phase`` from execution. All disabled phases are re-enabled by :meth:`Task._reset()` after task execution has been completed. :param string phase: Name of ``phase`` :raises ValueError: *phase* could not be found. """ if phase not in task_phases: raise ValueError('%s is not a valid phase' % phase) if phase not in self.disabled_phases: log.debug('Disabling %s phase' % phase) self.disabled_phases.append(phase) def abort(self, reason='Unknown', **kwargs): """Abort this task execution, no more plugins will be executed after the current one exists.""" if self._abort: return self._abort_reason = reason if not kwargs.get('silent', False): log.info('Aborting task (plugin: %s)' % self.current_plugin) self._silent_abort = False else: log.debug('Aborting task (plugin: %s)' % self.current_plugin) self._silent_abort = True # Run the abort phase before we set the _abort flag self._abort = True self.__run_task_phase('abort') def find_entry(self, category='entries', **values): """ Find and return :class:`~flexget.entry.Entry` with given attributes from task or None :param string category: entries, accepted, rejected or failed. Defaults to entries. :param values: Key values of entries to be searched :return: Entry or None """ cat = getattr(self, category) if not isinstance(cat, EntryIterator): raise TypeError('category must be a EntryIterator') for entry in cat: for k, v in values.iteritems(): if not (k in entry and entry[k] == v): break else: return entry return None def plugins(self, phase=None): """Get currently enabled plugins. :param string phase: Optional, limits to plugins currently configured on given phase, sorted in phase order. :return: An iterator over configured :class:`flexget.plugin.PluginInfo` instances enabled on this task. """ if phase: plugins = sorted(get_plugins_by_phase(phase), key=lambda p: p.phase_handlers[phase], reverse=True) else: plugins = all_plugins.itervalues() return (p for p in plugins if p.name in self.config or p.builtin) def __run_task_phase(self, phase): """Executes task phase, ie. call all enabled plugins on the task. Fires events: * task.execute.before_plugin * task.execute.after_plugin :param string phase: Name of the phase """ if phase not in task_phases + ['abort', 'process_start', 'process_end']: raise Exception('%s is not a valid task phase' % phase) # warn if no inputs, filters or outputs in the task if phase in ['input', 'filter', 'output']: if not self.manager.unit_test: # Check that there is at least one manually configured plugin for these phases for p in self.plugins(phase): if not p.builtin: break else: log.warning('Task doesn\'t have any %s plugins, you should add (at least) one!' % phase) for plugin in self.plugins(phase): # Abort this phase if one of the plugins disables it if phase in self.disabled_phases: return # store execute info, except during entry events self.current_phase = phase self.current_plugin = plugin.name if plugin.api_ver == 1: # backwards compatibility # pass method only task (old behaviour) args = (self,) else: # pass method task, copy of config (so plugin cannot modify it) args = (self, copy.copy(self.config.get(plugin.name))) try: fire_event('task.execute.before_plugin', self, plugin.name) response = self.__run_plugin(plugin, phase, args) if phase == 'input' and response: # add entries returned by input to self.entries self.all_entries.extend(response) finally: fire_event('task.execute.after_plugin', self, plugin.name) # Make sure we abort if any plugin sets our abort flag if self._abort and phase != 'abort': return def _run_entry_phase(self, phase, entry, **kwargs): # TODO: entry events are not very elegant, refactor into real (new) events or something ... if phase not in ['accept', 'reject', 'fail']: raise Exception('Not a valid entry phase') phase_plugins = self.plugins(phase) for plugin in phase_plugins: self.__run_plugin(plugin, phase, (self, entry), kwargs) def __run_plugin(self, plugin, phase, args=None, kwargs=None): """ Execute given plugins phase method, with supplied args and kwargs. If plugin throws unexpected exceptions :meth:`abort` will be called. :param PluginInfo plugin: Plugin to be executed :param string phase: Name of the phase to be executed :param args: Passed to the plugin :param kwargs: Passed to the plugin """ keyword = plugin.name method = plugin.phase_handlers[phase] if args is None: args = [] if kwargs is None: kwargs = {} # log.trace('Running %s method %s' % (keyword, method)) # call the plugin try: return method(*args, **kwargs) except PluginWarning as warn: # check if this warning should be logged only once (may keep repeating) if warn.kwargs.get('log_once', False): from flexget.utils.log import log_once log_once(warn.value, warn.log) else: warn.log.warning(warn) except EntryUnicodeError as eue: msg = ('Plugin %s tried to create non-unicode compatible entry (key: %s, value: %r)' % (keyword, eue.key, eue.value)) log.critical(msg) self.abort(msg) except PluginError as err: err.log.critical(err.value) self.abort(err.value) except DependencyError as e: msg = ('Plugin `%s` cannot be used because dependency `%s` is missing.' % (keyword, e.missing)) log.critical(msg) log.debug(e.message) self.abort(msg) except Exception as e: msg = 'BUG: Unhandled error in plugin %s: %s' % (keyword, e) log.exception(msg) self.abort(msg) # don't handle plugin errors gracefully with unit test if self.manager.unit_test: raise def rerun(self): """Immediately re-run the task after execute has completed, task can be re-run up to :attr:`.max_reruns` times.""" self._rerun = True log.info('Plugin %s has requested task to be ran again after execution has completed.' % self.current_plugin) def config_changed(self): """Forces config_modified flag to come out true on next run. Used when the db changes, and all entries need to be reprocessed.""" log.debug('Marking config as changed.') session = self.session or Session() task_hash = session.query(TaskConfigHash).filter(TaskConfigHash.task == self.name).first() if task_hash: task_hash.hash = '' self.config_modified = True # If we created our own session, commit and close it. if not self.session: session.commit() session.close() @useTaskLogging def execute(self, disable_phases=None, entries=None): """Executes the task. :param list disable_phases: Disable given phases names during execution :param list entries: Entries to be used in execution instead of using the input. Disables input phase. """ log.debug('executing %s' % self.name) # Store original config state to be restored if a rerun is needed config_backup = copy.deepcopy(self.config) self._reset() # Handle keyword args if disable_phases: map(self.disable_phase, disable_phases) if entries: # If entries are passed for this execution (eg. rerun), disable the input phase self.disable_phase('input') self.all_entries.extend(entries) # validate configuration errors = self.validate() if self._abort: # todo: bad practice return if errors and self.manager.unit_test: # todo: bad practice raise Exception('configuration errors') if self.manager.options.validate: if not errors: log.info('Task \'%s\' passed' % self.name) self.enabled = False return log.debug('starting session') self.session = Session() # Save current config hash and set config_modidied flag config_hash = hashlib.md5(str(sorted(self.config.items()))).hexdigest() last_hash = self.session.query(TaskConfigHash).filter(TaskConfigHash.task == self.name).first() if self.is_rerun: # Make sure on rerun config is not marked as modified self.config_modified = False elif not last_hash: self.config_modified = True last_hash = TaskConfigHash(task=self.name, hash=config_hash) self.session.add(last_hash) elif last_hash.hash != config_hash: self.config_modified = True last_hash.hash = config_hash else: self.config_modified = False try: # run phases for phase in task_phases: if phase in self.disabled_phases: # log keywords not executed for plugin in self.plugins(phase): if plugin.name in self.config: log.info('Plugin %s is not executed because %s phase is disabled (e.g. --test)' % (plugin.name, phase)) continue # run all plugins with this phase self.__run_task_phase(phase) # if abort flag has been set task should be aborted now # since this calls return rerun will not be done if self._abort: return log.debug('committing session, abort=%s' % self._abort) self.session.commit() fire_event('task.execute.completed', self) finally: # this will cause database rollback on exception and task.abort self.session.close() # rerun task if self._rerun: if self._rerun_count >= self.max_reruns: log.info('Task has been rerunning already %s times, stopping for now' % self._rerun_count) # reset the counter for future runs (necessary only with webui) self._rerun_count = 0 else: log.info('Rerunning the task in case better resolution can be achieved.') self._rerun_count += 1 # Restore config to original state before running again self.config = config_backup self.execute(disable_phases=disable_phases, entries=entries) # Clean up entries after the task has executed to reduce ram usage, #1652 # TODO: This doesn't work with unified entries, not sure best replacement """if not self.manager.unit_test: log.debug('Clearing all entries from task.') self.entries = [] self.rejected = [] self.failed = []""" def _process_start(self): """Execute process_start phase""" self.__run_task_phase('process_start') def _process_end(self): """Execute terminate phase for this task""" if self.manager.options.validate: log.debug('No process_end phase with --check') return self.__run_task_phase('process_end') def validate(self): """Called during task execution. Validates config, prints errors and aborts task if invalid.""" errors = self.validate_config(self.config) # log errors and abort if errors: log.critical('Task \'%s\' has configuration errors:' % self.name) for error in errors: log.error(error.error_with_path) # task has errors, abort it self.abort('\n'.join(e.error_with_path for e in errors)) return errors @staticmethod def validate_config(config): schema = plugin_schemas(context='task') # Don't validate commented out plugins schema['patternProperties'] = {'^_': {}} validator = config_schema.SchemaValidator(schema) return validator.process_config(config)
def lookup(self, entry, search_allowed=True): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get('imdb_url', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('imdb_id', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('title', eval_lazy=False): log.debug('lookup for %s' % entry['title']) else: raise PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.') session = Session() try: # entry sanity checks for field in ['imdb_votes', 'imdb_score']: if entry.get(field, eval_lazy=False): value = entry[field] if not isinstance(value, (int, float)): raise PluginError('Entry field %s should be a number!' % field) # if imdb_id is included, build the url. if entry.get('imdb_id', eval_lazy=False) and not entry.get('imdb_url', eval_lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', eval_lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) del(entry['imdb_url']) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', eval_lazy=False): result = session.query(SearchResult).\ filter(SearchResult.title == entry['title']).first() if result: if result.fails and not manager.options.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise PluginError('Title `%s` lookup fails' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_url'] = result.url # no imdb url, but information required, try searching if not entry.get('imdb_url', eval_lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) search = ImdbSearch() search_name = entry.get('movie_name', entry['title'], eval_lazy=False) search_result = search.smart_match(search_name) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on # every run result = SearchResult(entry['title'], entry['imdb_url']) session.add(result) log.verbose('Found %s' % (entry['imdb_url'])) else: log_once('Imdb lookup failed for %s' % entry['title'], log) # store FAIL for this title result = SearchResult(entry['title']) result.fails = True session.add(result) raise PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query(Movie).\ options(joinedload_all(Movie.genres), joinedload_all(Movie.languages), joinedload_all(Movie.actors), joinedload_all(Movie.directors)).\ filter(Movie.url == entry['imdb_url']).first() # determine whether or not movie details needs to be parsed req_parse = False if not movie: req_parse = True elif movie.expired: req_parse = True if req_parse: if movie is not None: if movie.expired: log.verbose('Movie `%s` details expired, refreshing ...' % movie.title) # Remove the old movie, we'll store another one later. session.query(MovieLanguage).filter(MovieLanguage.movie_id == movie.id).delete() session.query(Movie).filter(Movie.url == entry['imdb_url']).delete() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: movie = self._parse_new_movie(entry['imdb_url'], session) except UnicodeDecodeError: log.error('Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url']) # store cache so this will not be tried again movie = Movie() movie.url = entry['imdb_url'] session.add(movie) raise PluginError('UnicodeDecodeError') except ValueError as e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log) for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # store to entry entry.update_using_map(self.field_map, movie) finally: log.trace('committing session') session.commit()
def build(self, task): if not (plt and pd): logger.warning('Dependency does not exist: [matplotlib, pandas]') return if not task.accepted and not task.failed: return session = Session() columns = [ 'site', 'downloaded', 'uploaded', 'share_ratio', 'points', 'seeding', 'leeching', 'hr', ] data = {'sort_column': [], 'default_order': []} total_details = {} total_changed = {} for column in columns: data[column] = [] total_details[column] = 0 total_changed[column] = 0 order = len(task.all_entries) for entry in task.all_entries: data['default_order'].append(order) order = order - 1 user_details_db = self._get_user_details(session, entry['site_name']) if user_details_db is None: user_details_db = UserDetailsEntry(site=entry['site_name'], downloaded=0, uploaded=0, share_ratio=0, points=0, seeding=0, leeching=0, hr=0) session.add(user_details_db) session.commit() user_details_db = self._get_user_details( session, entry['site_name']) # failed if not entry.get('details'): for column in columns: value = getattr(user_details_db, column) if entry.failed: data[column].append( self.buid_data_text(column, value) + '*') else: data[column].append(self.buid_data_text(column, value)) if not entry.get('do_not_count') and column not in [ 'site' ]: self.count(total_details, column, value) data['sort_column'].append(0) continue # now details_now = {} for key, value in entry['details'].items(): details_now[key] = self.transfer_data(key, value) # changed details_changed = {} for key, value_now in details_now.items(): if value_now != '*': details_changed[key] = value_now - getattr( user_details_db, key) else: details_changed[key] = '*' if details_changed['uploaded'] == '*': data['sort_column'].append(0) else: data['sort_column'].append(details_changed['uploaded']) # append to data data['site'].append(entry['site_name']) for column in columns: if column == 'site': continue data[column].append('{}{}'.format( self.buid_data_text(column, getattr(user_details_db, column)), self.buid_data_text(column, details_changed[column], append=True))) if total_details.get(column) is None: total_details[column] = 0 if total_changed.get(column) is None: total_changed[column] = 0 if not entry.get('do_not_count') and column not in [ 'share_ratio', 'points' ]: total_details[column] = total_details[column] + getattr( user_details_db, column) if details_changed[column] != '*': total_changed[column] = total_changed[ column] + details_changed[column] # update db for key, value in details_now.items(): if value != '*': setattr(user_details_db, key, value) session.commit() data['site'].append('total') for column in columns: if column == 'site': continue data[column].append('{}{}'.format( self.buid_data_text(column, total_details[column]), self.buid_data_text(column, total_changed[column], append=True))) data['sort_column'].append(float('inf')) data['default_order'].append(float('inf')) df = pd.DataFrame(data) df.sort_values(by=['sort_column', 'default_order'], ascending=False, inplace=True) df.drop(columns=['sort_column', 'default_order'], inplace=True) line_count = len(data['site']) fig = plt.figure(figsize=(8, line_count / 1.8)) plt.axis('off') colors = [] for x in df.values: cc = [] for y in x: if '-' in y and 'm-team' not in y: cc.append('#f38181') elif '+' in y: cc.append('#95e1d3') elif '*' in y: cc.append('#eff48e') else: cc.append('white') colors.append(cc) col_widths = [0.14, 0.16, 0.16, 0.14, 0.14, 0.1, 0.1, 0.06] table = plt.table(cellText=df.values, cellColours=colors, bbox=[0, 0, 1, 1], colLabels=df.columns, colWidths=col_widths, loc='best') table.auto_set_font_size(False) table.set_fontsize(10) fig.tight_layout() plt.title(datetime.now().replace(microsecond=0)) plt.savefig('details_report.png', bbox_inches='tight', dpi=300)
def migrate2(self): session = Session() try: from progressbar import ProgressBar, Percentage, Bar, ETA except: print 'Critical: progressbar library not found, try running `bin/easy_install progressbar` ?' return class Seen(Base): __tablename__ = 'seen' id = Column(Integer, primary_key=True) field = Column(String) value = Column(String, index=True) task = Column('feed', String) added = Column(DateTime) def __init__(self, field, value, task): self.field = field self.value = value self.task = task self.added = datetime.now() def __str__(self): return '<Seen(%s=%s)>' % (self.field, self.value) print '' # REPAIR / REMOVE DUPLICATES index = 0 removed = 0 total = session.query(Seen).count() + 1 widgets = [ 'Repairing - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']') ] bar = ProgressBar(widgets=widgets, maxval=total).start() for seen in session.query(Seen).all(): index += 1 if index % 10 == 0: bar.update(index) amount = 0 for dupe in session.query(Seen).filter(Seen.value == seen.value): amount += 1 if amount > 1: removed += 1 session.delete(dupe) bar.finish() # MIGRATE total = session.query(Seen).count() + 1 widgets = [ 'Upgrading - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']') ] bar = ProgressBar(widgets=widgets, maxval=total).start() index = 0 for seen in session.query(Seen).all(): index += 1 if not index % 10: bar.update(index) se = SeenEntry(u'N/A', seen.task, u'migrated') se.added = seen.added se.fields.append(SeenField(seen.field, seen.value)) session.add(se) bar.finish() session.execute('drop table seen;') session.commit()
def lookup(self, entry, search_allowed=True): """Perform imdb lookup for entry. Raises PluginError with failure reason.""" from flexget.manager import manager if entry.get('imdb_url', lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('imdb_id', lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('title', lazy=False): log.debug('lookup for %s' % entry['title']) else: raise PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.') take_a_break = False session = Session() try: # entry sanity checks for field in ['imdb_votes', 'imdb_score']: if entry.get(field, lazy=False): value = entry[field] if not isinstance(value, (int, float)): raise PluginError('Entry field %s should be a number!' % field) # if imdb_id is included, build the url. if entry.get('imdb_id', lazy=False) and not entry.get('imdb_url', lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) del(entry['imdb_url']) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', lazy=False): result = session.query(SearchResult).\ filter(SearchResult.title == entry['title']).first() if result: if result.fails and not manager.options.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise PluginError('Title `%s` lookup fails' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_url'] = result.url # no imdb url, but information required, try searching if not entry.get('imdb_url', lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) take_a_break = True search = ImdbSearch() search_result = search.smart_match(entry['title']) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on # every run result = SearchResult(entry['title'], entry['imdb_url']) session.add(result) log.verbose('Found %s' % (entry['imdb_url'])) else: log_once('Imdb lookup failed for %s' % entry['title'], log) # store FAIL for this title result = SearchResult(entry['title']) result.fails = True session.add(result) raise PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query(Movie).\ options(joinedload_all(Movie.genres, Movie.languages, Movie.actors, Movie.directors)).\ filter(Movie.url == entry['imdb_url']).first() refresh_interval = 2 if movie: if movie.year: age = (datetime.now().year - movie.year) refresh_interval += age * 5 log.debug('cached movie `%s` age %i refresh interval %i days' % (movie.title, age, refresh_interval)) if not movie or movie.updated is None or \ movie.updated < datetime.now() - timedelta(days=refresh_interval): # Remove the old movie, we'll store another one later. session.query(Movie).filter(Movie.url == entry['imdb_url']).delete() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: take_a_break = True imdb = ImdbParser() imdb.parse(entry['imdb_url']) # store to database movie = Movie() movie.photo = imdb.photo movie.title = imdb.name movie.score = imdb.score movie.votes = imdb.votes movie.year = imdb.year movie.mpaa_rating = imdb.mpaa_rating movie.plot_outline = imdb.plot_outline movie.url = entry['imdb_url'] for name in imdb.genres: genre = session.query(Genre).\ filter(Genre.name == name).first() if not genre: genre = Genre(name) movie.genres.append(genre) # pylint:disable=E1101 for name in imdb.languages: language = session.query(Language).\ filter(Language.name == name).first() if not language: language = Language(name) movie.languages.append(language) # pylint:disable=E1101 for imdb_id, name in imdb.actors.iteritems(): actor = session.query(Actor).\ filter(Actor.imdb_id == imdb_id).first() if not actor: actor = Actor(imdb_id, name) movie.actors.append(actor) # pylint:disable=E1101 for imdb_id, name in imdb.directors.iteritems(): director = session.query(Director).\ filter(Director.imdb_id == imdb_id).first() if not director: director = Director(imdb_id, name) movie.directors.append(director) # pylint:disable=E1101 # so that we can track how long since we've updated the info later movie.updated = datetime.now() session.add(movie) except UnicodeDecodeError: log.error('Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url']) # store cache so this will not be tried again movie = Movie() movie.url = entry['imdb_url'] session.add(movie) raise PluginError('UnicodeDecodeError') except ValueError, e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log) for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # store to entry entry.update_using_map(self.field_map, movie) # give imdb a little break between requests (see: http://flexget.com/ticket/129#comment:1) if (take_a_break and not manager.options.debug and not manager.unit_test): import time time.sleep(3)
def lookup_movie(title=None, year=None, rottentomatoes_id=None, smart_match=None, only_cached=False, session=None): """ Do a lookup from Rotten Tomatoes for the movie matching the passed arguments. Any combination of criteria can be passed, the most specific criteria specified will be used. :param rottentomatoes_id: rottentomatoes_id of desired movie :param string title: title of desired movie :param year: release year of desired movie :param smart_match: attempt to clean and parse title and year from a string :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache :param session: optionally specify a session to use, if specified, returned Movie will be live in that session :returns: The Movie object populated with data from Rotten Tomatoes :raises: PluginError if a match cannot be found or there are other problems with the lookup """ if smart_match: # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year title_parser = MovieParser() title_parser.parse(smart_match) title = title_parser.name year = title_parser.year if title == '' and not (rottentomatoes_id or title): raise PluginError('Failed to parse name from %s' % smart_match) if title: search_string = title.lower() if year: search_string = '%s %s' % (search_string, year) elif not rottentomatoes_id: raise PluginError('No criteria specified for rotten tomatoes lookup') def id_str(): return '<title=%s,year=%s,rottentomatoes_id=%s>' % (title, year, rottentomatoes_id) if not session: session = Session() log.debug('Looking up rotten tomatoes information for %s' % id_str()) movie = None # Try to lookup from cache if rottentomatoes_id: movie = session.query(RottenTomatoesMovie).\ filter(RottenTomatoesMovie.id == rottentomatoes_id).first() if not movie and title: movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower()) if year: movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year) movie = movie_filter.first() if not movie: log.debug('No matches in movie cache found, checking search cache.') found = session.query(RottenTomatoesSearchResult).\ filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first() if found and found.movie: log.debug('Movie found in search cache.') movie = found.movie if movie: # Movie found in cache, check if cache has expired. if movie.expired and not only_cached: log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str()) try: result = movies_info(movie.id) movie = _set_movie_details(movie, session, result) session.merge(movie) except URLError: log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.') else: log.debug('Movie %s information restored from cache.' % id_str()) else: if only_cached: raise PluginError('Movie %s not found from cache' % id_str()) # There was no movie found in the cache, do a lookup from Rotten Tomatoes log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str()) try: if not movie and rottentomatoes_id: result = movies_info(rottentomatoes_id) if result: movie = RottenTomatoesMovie() movie = _set_movie_details(movie, session, result) session.add(movie) if not movie and title: # TODO: Extract to method log.verbose('Searching from rt `%s`' % search_string) results = movies_search(search_string) if results: results = results.get('movies') if results: for movie_res in results: seq = difflib.SequenceMatcher( lambda x: x == ' ', movie_res['title'].lower(), title.lower()) movie_res['match'] = seq.ratio() results.sort(key=lambda x: x['match'], reverse=True) # Remove all movies below MIN_MATCH, and different year for movie_res in results[:]: if year and movie_res.get('year'): movie_res['year'] = int(movie_res['year']) if movie_res['year'] != year: release_year = False if movie_res.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(movie_res['release_dates'].get('theater'), '%Y-%m-%d').tm_year elif movie_res.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(movie_res['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if not (release_year and release_year == year): log.debug('removing %s - %s (wrong year: %s)' % (movie_res['title'], movie_res['id'], str(release_year or movie_res['year']))) results.remove(movie_res) continue if movie_res['match'] < MIN_MATCH: log.debug('removing %s (min_match)' % movie_res['title']) results.remove(movie_res) continue if not results: raise PluginError('no appropiate results') if len(results) == 1: log.debug('SUCCESS: only one movie remains') else: # Check min difference between best two hits diff = results[0]['match'] - results[1]['match'] if diff < MIN_DIFF: log.debug('unable to determine correct movie, min_diff too small' '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' % (results[0]['title'], results[0]['year'], results[0]['id'], results[1]['title'], results[1]['year'], results[1]['id'])) for r in results: log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id'])) raise PluginError('min_diff') result = movies_info(results[0].get('id')) if not result: result = results[0] movie = session.query(RottenTomatoesMovie).filter( RottenTomatoesMovie.id == result['id']).first() if not movie: movie = RottenTomatoesMovie() movie = _set_movie_details(movie, session, result) session.add(movie) session.commit() if title.lower() != movie.title.lower(): log.debug('Saving search result for \'%s\'' % search_string) session.add(RottenTomatoesSearchResult(search=search_string, movie=movie)) except URLError: raise PluginError('Error looking up movie from RottenTomatoes') if not movie: raise PluginError('No results found from rotten tomatoes for %s' % id_str()) else: # Access attributes to force the relationships to eager load before we detach from session for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']: getattr(movie, attr) session.commit() return movie
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None): """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments. Any combination of criteria can be passed, the most specific criteria specified will be used. :param rottentomatoes_id: rottentomatoes_id of desired movie :param imdb_id: imdb_id of desired movie :param title: title of desired movie :param year: release year of desired movie :param smart_match: attempt to clean and parse title and year from a string :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache :param session: optionally specify a session to use, if specified, returned Movie will be live in that session :returns: The Movie object populated with data from Rotten Tomatoes :raises: PluginError if a match cannot be found or there are other problems with the lookup """ if smart_match: # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year title_parser = MovieParser() title_parser.parse(smart_match) title = title_parser.name year = title_parser.year if title == '' and not (rottentomatoes_id or imdb_id or title): raise PluginError('Failed to parse name from %s' % raw_name) if title: search_string = title.lower() if year: search_string = '%s %s' % (search_string, year) elif not (rottentomatoes_id or imdb_id): raise PluginError('No criteria specified for rotten tomatoes lookup') def id_str(): return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id) if not session: session = Session() log.debug('Looking up rotten tomatoes information for %s' % id_str()) movie = None if rottentomatoes_id: movie = session.query(RottenTomatoesMovie).\ filter(RottenTomatoesMovie.id == rottentomatoes_id).first() if not movie and imdb_id: alt_id = session.query(RottenTomatoesAlternateId).\ filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\ filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first() if alt_id: movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first() if not movie and title: movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower()) if year: movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year) movie = movie_filter.first() if not movie: found = session.query(RottenTomatoesSearchResult). \ filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first() if found and found.movie: movie = found.movie if movie: # Movie found in cache, check if cache has expired. if movie.expired and not only_cached: log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str()) try: imdb_alt_id = movie.alternate_ids and filter(lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id if imdb_alt_id: result = movies_alias(imdb_alt_id, 'imdb') else: result = movies_info(movie.id) movie = set_movie_details(movie, session, result) session.merge(movie) except URLError: log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.') else: log.debug('Movie %s information restored from cache.' % id_str()) else: if only_cached: raise PluginError('Movie %s not found from cache' % id_str()) # There was no movie found in the cache, do a lookup from Rotten Tomatoes log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str()) try: # Lookups using imdb_id if imdb_id: log.debug('Using IMDB alias %s.' % imdb_id) result = movies_alias(imdb_id, 'imdb') if result: mismatch = [] if title and difflib.SequenceMatcher(lambda x: x == ' ', re.sub('\s+\(.*\)$', '', result['title'].lower()), title.lower()).ratio() < MIN_MATCH: mismatch.append('the title (%s <-?-> %s)' % (title, result['title'])) if year and fabs(result['year'] - year) > 1: mismatch.append('the year (%s <-?-> %s)' % (year, result['year'])) release_year = None if result.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the theater release (%s)' % release_year) elif result.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the DVD release (%s)' % release_year) if mismatch: log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' % \ (imdb_id, ', or '.join(mismatch))) else: log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id'])) movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result.get('id')).first() if movie: log.debug('Movie %s was in database, but did not have the imdb_id stored, ' 'forcing an update' % movie) movie = set_movie_details(movie, session, result) session.merge(movie) else: log.debug('%s was not in database, setting info.' % result['title']) movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) if not movie: raise PluginError('set_movie_details returned %s' % movie) session.add(movie) else: log.debug('IMDB alias %s returned no results.' % imdb_id) if not movie and rottentomatoes_id: result = movies_info(rottentomatoes_id) if result: movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) session.add(movie) if not movie and title: log.verbose('Searching from rt `%s`' % search_string) results = movies_search(search_string) if results: results = results.get('movies') if results: for movie_res in results: seq = difflib.SequenceMatcher(lambda x: x == ' ', movie_res['title'].lower(), title.lower()) movie_res['match'] = seq.ratio() results.sort(key=lambda x: x['match'], reverse=True) # Remove all movies below MIN_MATCH, and different year for movie_res in results[:]: if year and movie_res.get('year') and movie_res['year'] != year: release_year = False if movie_res.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(movie_res['release_dates'].get('theater'), '%Y-%m-%d').tm_year elif movie_res.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(movie_res['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if not (release_year and release_year == year): log.debug('removing %s - %s (wrong year: %s)' % (movie_res['title'], movie_res['id'], str(release_year or movie_res['year']))) results.remove(movie_res) continue if movie_res['match'] < MIN_MATCH: log.debug('removing %s (min_match)' % movie_res['title']) results.remove(movie_res) continue if not results: raise PluginError('no appropiate results') if len(results) == 1: log.debug('SUCCESS: only one movie remains') else: # Check min difference between best two hits diff = results[0]['match'] - results[1]['match'] if diff < MIN_DIFF: log.debug('unable to determine correct movie, min_diff too small' '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' % (results[0]['title'], results[0]['year'], results[0]['id'], results[1]['title'], results[1]['year'], results[1]['id'])) for r in results: log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id'])) raise PluginError('min_diff') imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb') if imdb_alt_id: result = movies_alias(imdb_alt_id) else: result = movies_info(results[0].get('id')) if not result: result = results[0] movie = RottenTomatoesMovie() try: movie = set_movie_details(movie, session, result) if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids): log.warning('Adding flexget_imdb alternate id %s for movie %s' % (imdb_id, movie)) movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\ imdb_id.lstrip('t'))) session.add(movie) except IntegrityError: log.warning('Found movie %s in database after search even though we ' 'already looked, updating it with search result.' % movie) session.rollback() movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result['id']).first() movie = set_movie_details(movie, session, result) if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids): log.warning('Adding flexget_imdb alternate id %s for movie %s' % (imdb_id, movie)) movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\ imdb_id.lstrip('t'))) session.merge(movie) if title.lower() != movie.title.lower(): log.debug('Saving search result for \'%s\'' % search_string) session.add(RottenTomatoesSearchResult(search=search_string, movie=movie)) except URLError: raise PluginError('Error looking up movie from RottenTomatoes') if not movie: raise PluginError('No results found from rotten tomatoes for %s' % id_str()) else: # Access attributes to force the relationships to eager load before we detach from session for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']: getattr(movie, attr) session.commit() return movie