Example #1
0
def log_once(message,
             logger=logging.getLogger('log_once'),
             once_level=logging.INFO,
             suppressed_level=f_logger.VERBOSE):
    """
    Log message only once using given logger`. Returns False if suppressed logging.
    When suppressed, `suppressed_level` level is still logged.
    """
    # If there is no active manager, don't access the db
    from flexget.manager import manager
    if not manager:
        log.warning('DB not initialized. log_once will not work properly.')
        logger.log(once_level, message)
        return

    digest = hashlib.md5()
    digest.update(message.encode('latin1', 'replace'))  # ticket:250
    md5sum = digest.hexdigest()

    session = Session()
    try:
        # abort if this has already been logged
        if session.query(LogMessage).filter_by(md5sum=md5sum).first():
            logger.log(suppressed_level, message)
            return False

        row = LogMessage(md5sum)
        session.add(row)
        session.commit()
    finally:
        session.close()

    logger.log(once_level, message)
    return True
Example #2
0
    def add_backlog(self, task, entry, amount=''):
        """Add single entry to task backlog

        If :amount: is not specified, entry will only be injected on next execution."""
        snapshot = entry.snapshots.get('after_input')
        if not snapshot:
            if task.current_phase != 'input':
                # Not having a snapshot is normal during input phase, don't display a warning
                log.warning(
                    'No input snapshot available for `%s`, using current state'
                    % entry['title'])
            snapshot = entry
        session = Session()
        expire_time = datetime.now() + parse_timedelta(amount)
        backlog_entry = session.query(BacklogEntry).filter(BacklogEntry.title == entry['title']).\
                                                filter(BacklogEntry.task == task.name).first()
        if backlog_entry:
            # If there is already a backlog entry for this, update the expiry time if necessary.
            if backlog_entry.expire < expire_time:
                log.debug('Updating expiry time for %s' % entry['title'])
                backlog_entry.expire = expire_time
        else:
            log.debug('Saving %s' % entry['title'])
            backlog_entry = BacklogEntry()
            backlog_entry.title = entry['title']
            backlog_entry.entry = snapshot
            backlog_entry.task = task.name
            backlog_entry.expire = expire_time
            session.add(backlog_entry)
        session.commit()
Example #3
0
def log_once(message, logger=logging.getLogger('log_once')):
    """
    Log message only once using given logger. Returns False if suppressed logging.
    When suppressed verbose level is still logged.
    """

    digest = hashlib.md5()
    digest.update(message.encode('latin1', 'replace')) # ticket:250
    md5sum = digest.hexdigest()

    session = Session()
    try:
        # abort if this has already been logged
        if session.query(LogMessage).filter_by(md5sum=md5sum).first():
            logger.verbose(message)
            return False

        row = LogMessage(md5sum)
        session.add(row)
        session.commit()
    finally:
        session.close()

    logger.info(message)
    return True
Example #4
0
def log_once(message, logger=logging.getLogger('log_once'), once_level=logging.INFO, suppressed_level=f_logger.VERBOSE):
    """
    Log message only once using given logger`. Returns False if suppressed logging.
    When suppressed, `suppressed_level` level is still logged.
    """
    # If there is no active manager, don't access the db
    from flexget.manager import manager
    if not manager:
        log.warning('DB not initialized. log_once will not work properly.')
        logger.log(once_level, message)
        return

    digest = hashlib.md5()
    digest.update(message.encode('latin1', 'replace')) # ticket:250
    md5sum = digest.hexdigest()

    session = Session()
    try:
        # abort if this has already been logged
        if session.query(LogMessage).filter_by(md5sum=md5sum).first():
            logger.log(suppressed_level, message)
            return False

        row = LogMessage(md5sum)
        session.add(row)
        session.commit()
    finally:
        session.close()

    logger.log(once_level, message)
    return True
Example #5
0
    def add_backlog(self, task, entry, amount=''):
        """Add single entry to task backlog

        If :amount: is not specified, entry will only be injected on next execution."""
        snapshot = entry.snapshots.get('after_input')
        if not snapshot:
            if task.current_phase != 'input':
                # Not having a snapshot is normal during input phase, don't display a warning
                log.warning('No input snapshot available for `%s`, using current state' % entry['title'])
            snapshot = entry
        session = Session()
        expire_time = datetime.now() + parse_timedelta(amount)
        backlog_entry = session.query(BacklogEntry).filter(BacklogEntry.title == entry['title']).\
                                                filter(BacklogEntry.task == task.name).first()
        if backlog_entry:
            # If there is already a backlog entry for this, update the expiry time if necessary.
            if backlog_entry.expire < expire_time:
                log.debug('Updating expiry time for %s' % entry['title'])
                backlog_entry.expire = expire_time
        else:
            log.debug('Saving %s' % entry['title'])
            backlog_entry = BacklogEntry()
            backlog_entry.title = entry['title']
            backlog_entry.entry = snapshot
            backlog_entry.task = task.name
            backlog_entry.expire = expire_time
            session.add(backlog_entry)
        session.commit()
Example #6
0
 def emit(self, record):
     session = Session()
     try:
         session.add(LogEntry(record))
         session.commit()
     finally:
         session.close()
Example #7
0
File: log.py Project: s-m-b/Flexget
def log_once(message, logger=logging.getLogger('log_once')):
    """
    Log message only once using given logger. Returns False if suppressed logging.
    When suppressed verbose level is still logged.
    """

    digest = hashlib.md5()
    digest.update(message.encode('latin1', 'replace'))  # ticket:250
    md5sum = digest.hexdigest()

    session = Session()
    try:
        # abort if this has already been logged
        if session.query(LogMessage).filter_by(md5sum=md5sum).first():
            logger.verbose(message)
            return False

        row = LogMessage(md5sum)
        session.add(row)
        session.commit()
    finally:
        session.close()

    logger.info(message)
    return True
Example #8
0
def set_version(plugin, version):
    if plugin not in plugin_schemas:
        raise ValueError(
            'Tried to set schema version for %s plugin with no versioned_base.'
            % plugin)
    base_version = plugin_schemas[plugin]['version']
    if version != base_version:
        raise ValueError('Tried to set %s plugin schema version to %d when '
                         'it should be %d as defined in versioned_base.' %
                         (plugin, version, base_version))
    session = Session()
    try:
        schema = session.query(PluginSchema).filter(
            PluginSchema.plugin == plugin).first()
        if not schema:
            log.debug('Initializing plugin %s schema version to %i' %
                      (plugin, version))
            schema = PluginSchema(plugin, version)
            session.add(schema)
        else:
            if version < schema.version:
                raise ValueError(
                    'Tried to set plugin %s schema version to lower value' %
                    plugin)
            if version != schema.version:
                log.debug('Updating plugin %s schema version to %i' %
                          (plugin, version))
                schema.version = version
        session.commit()
    finally:
        session.close()
Example #9
0
 def emit(self, record):
     session = Session()
     try:
         session.add(LogEntry(record))
         session.commit()
     finally:
         session.close()
Example #10
0
    def queue_add(self, title=None, imdb_id=None, quality='ANY', force=True):
        """Add an item to the queue with the specified quality"""

        if not title or not imdb_id:
            # We don't have all the info we need to add movie, do a lookup for more info
            result = self.parse_what(imdb_id or title)
            title = result['title']
            imdb_id = result['imdb_id']
        quality = self.validate_quality(quality)

        session = Session()

        # check if the item is already queued
        item = session.query(QueuedMovie).filter(
            QueuedMovie.imdb_id == imdb_id).first()
        if not item:
            #TODO: fix
            item = QueuedMovie(imdb_id=imdb_id,
                               quality=quality,
                               immortal=force,
                               title=title)
            session.add(item)
            session.commit()
            session.close()
            return {
                'title': title,
                'imdb_id': imdb_id,
                'quality': quality,
                'force': force
            }
        else:
            raise QueueError('ERROR: %s is already in the queue' % title)
Example #11
0
    def get_login_cookies(self, username, password):
        url_auth = 'http://www.t411.me/users/login'
        db_session = Session()
        account = db_session.query(torrent411Account).filter(
            torrent411Account.username == username).first()
        if account:
            if account.expiry_time < datetime.now():
                db_session.delete(account)
                db_session.commit()
            log.debug("Cookies found in db!")
            return account.auth
        else:
            log.debug("Getting login cookies from : %s " % url_auth)
            params = urllib.urlencode({
                'login': username,
                'password': password,
                'remember': '1'
            })
            cj = cookielib.CookieJar()
            #           WE NEED A COOKIE HOOK HERE TO AVOID REDIRECT COOKIES
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            #           NEED TO BE SAME USER_AGENT THAN DOWNLOAD LINK
            opener.addheaders = [('User-agent', self.USER_AGENT)]
            try:
                opener.open(url_auth, params)
            except Exception as e:
                raise UrlRewritingError("Connection Error for %s : %s" %
                                        (url_auth, e))

            authKey = None
            uid = None
            password = None

            for cookie in cj:
                if cookie.name == "authKey":
                    authKey = cookie.value
                if cookie.name == "uid":
                    uid = cookie.value
                if cookie.name == "pass":
                    password = cookie.value

            if authKey is not None and \
               uid is not None and \
               password is not None:
                authCookie = {
                    'uid': uid,
                    'password': password,
                    'authKey': authKey
                }
                db_session.add(
                    torrent411Account(username=username,
                                      auth=authCookie,
                                      expiry_time=datetime.now() +
                                      timedelta(days=1)))
                db_session.commit()
                return authCookie

        return {"uid": "", "password": "", "authKey": ""}
Example #12
0
    def get_login_cookies(self, username, password):
        url_auth = 'http://www.t411.li/users/login'
        db_session = Session()
        account = db_session.query(torrent411Account).filter(
            torrent411Account.username == username).first()
        if account:
            if account.expiry_time < datetime.now():
                db_session.delete(account)
                db_session.commit()
            log.debug("Cookies found in db!")
            return account.auth
        else:
            log.debug("Getting login cookies from : %s " % url_auth)
            params = {'login': username, 'password': password, 'remember': '1'}
            cj = http.cookiejar.CookieJar()
            #           WE NEED A COOKIE HOOK HERE TO AVOID REDIRECT COOKIES
            opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
            #           NEED TO BE SAME USER_AGENT THAN DOWNLOAD LINK
            opener.addheaders = [('User-agent', self.USER_AGENT)]
            login_output = None
            try:
                login_output = opener.open(url_auth, urllib.parse.urlencode(params)).read()
            except Exception as e:
                raise UrlRewritingError("Connection Error for %s : %s" % (url_auth, e))

            if b'confirmer le captcha' in login_output:
                log.warning("Captcha requested for login.")
                login_output = self._solveCaptcha(login_output, url_auth, params, opener)

            if b'logout' in login_output:
                authKey = None
                uid = None
                password = None

                for cookie in cj:
                    if cookie.name == "authKey":
                        authKey = cookie.value
                    if cookie.name == "uid":
                        uid = cookie.value
                    if cookie.name == "pass":
                        password = cookie.value

                if authKey is not None and \
                        uid is not None and \
                        password is not None:
                    authCookie = {'uid': uid,
                                  'password': password,
                                  'authKey': authKey
                                  }
                    db_session.add(torrent411Account(username=username,
                                                     auth=authCookie,
                                                     expiry_time=datetime.now() + timedelta(days=1)))
                    db_session.commit()
                    return authCookie
            else:
                log.error("Login failed (Torrent411). Check your login and password.")
                return {}
Example #13
0
    def get_login_cookies(self, username, password):
        url_auth = 'http://www.t411.me/users/login'
        db_session = Session()
        account = db_session.query(torrent411Account).filter(
            torrent411Account.username == username).first()
        if account:
            if account.expiry_time < datetime.now():
                db_session.delete(account)
                db_session.commit()
            log.debug("Cookies found in db!")
            return account.auth
        else:
            log.debug("Getting login cookies from : %s " % url_auth)
            params = urllib.urlencode({'login': username,
                                       'password': password,
                                       'remember': '1'})
            cj = cookielib.CookieJar()
#           WE NEED A COOKIE HOOK HERE TO AVOID REDIRECT COOKIES
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
#           NEED TO BE SAME USER_AGENT THAN DOWNLOAD LINK
            opener.addheaders = [('User-agent', self.USER_AGENT)]
            try:
                opener.open(url_auth, params)
            except Exception as e:
                raise UrlRewritingError("Connection Error for %s : %s" % (url_auth, e))

            authKey = None
            uid = None
            password = None

            for cookie in cj:
                if cookie.name == "authKey":
                    authKey = cookie.value
                if cookie.name == "uid":
                    uid = cookie.value
                if cookie.name == "pass":
                    password = cookie.value

            if authKey is not None and \
               uid is not None and \
               password is not None:
                authCookie = {'uid': uid,
                              'password': password,
                              'authKey': authKey
                              }
                db_session.add(torrent411Account(username=username,
                                                 auth=authCookie,
                                                 expiry_time=datetime.now() + timedelta(days=1)))
                db_session.commit()
                return authCookie

        return {"uid": "",
                "password": "",
                "authKey": ""
                }
Example #14
0
 def _set_db_last_run(self):
     session = Session()
     try:
         db_trigger = session.query(DBTrigger).get(self.uid)
         if not db_trigger:
             db_trigger = DBTrigger(self.uid)
             session.add(db_trigger)
         db_trigger.last_run = self.last_run
         session.commit()
     finally:
         session.close()
     log.debug('recorded last_run to the database')
Example #15
0
 def _set_db_last_run(self):
     session = Session()
     try:
         db_trigger = session.query(DBTrigger).get(self.uid)
         if not db_trigger:
             db_trigger = DBTrigger(self.uid)
             session.add(db_trigger)
         db_trigger.last_run = self.last_run
         session.commit()
     finally:
         session.close()
     log.debug("recorded last_run to the database")
Example #16
0
File: seen.py Project: kop1/flexget
    def on_process_start(self, feed):
        if not feed.manager.options.seen:
            return

        feed.manager.disable_feeds()

        session = Session()
        se = SeenEntry(u'--seen', unicode(feed.name))
        sf = SeenField(u'--seen', unicode(feed.manager.options.seen))
        se.fields.append(sf)
        session.add(se)
        session.commit()

        log.info('Added %s as seen. This will affect all feeds.' % feed.manager.options.seen)
Example #17
0
def seen_add(options):
    seen_name = options.add_value
    if is_imdb_url(seen_name):
        imdb_id = extract_id(seen_name)
        if imdb_id:
            seen_name = imdb_id

    session = Session()
    se = SeenEntry(seen_name, 'cli_seen')
    sf = SeenField('cli_seen', seen_name)
    se.fields.append(sf)
    session.add(se)
    session.commit()
    console('Added %s as seen. This will affect all tasks.' % seen_name)
Example #18
0
def seen_add(options):
    seen_name = options.add_value
    if is_imdb_url(seen_name):
        imdb_id = extract_id(seen_name)
        if imdb_id:
            seen_name = imdb_id

    session = Session()
    se = SeenEntry(seen_name, 'cli_seen')
    sf = SeenField('cli_seen', seen_name)
    se.fields.append(sf)
    session.add(se)
    session.commit()
    console('Added %s as seen. This will affect all tasks.' % seen_name)
Example #19
0
    def on_task_start(self, task, config):
        if not config:
            return

        config = self.prepare_config(config)
        current_version = get_current_flexget_version()

        if config.get("check_for_dev_version") is False and current_version.endswith("dev"):
            log.debug("dev version detected, skipping check")
            return

        if config.get("lookup") == "always":
            always_check = True
        else:
            always_check = False
        interval = config.get("interval")

        session = Session()
        last_check = session.query(LastVersionCheck).first()
        if not always_check:
            if last_check:
                time_dif = datetime.now() - last_check.last_check_time
                should_poll = time_dif.days > interval
            else:
                should_poll = True

            if not should_poll:
                log.debug("version check interval not met, skipping check")
                return

        latest_version = get_latest_flexget_version_number()
        if not latest_version:
            log.warning("Could not get latest version of flexget")
            return
        elif latest_version != current_version:
            log.warning(
                "You are not running latest Flexget Version. Current is %s and latest is %s",
                current_version,
                latest_version,
            )
        if last_check:
            log.debug("updating last check time")
            last_check.update()
        else:
            last_check = LastVersionCheck()
            log.debug("creating instance of last version check in DB")
            session.add(last_check)
Example #20
0
    def on_task_start(self, task, config):
        if not config:
            return

        config = self.prepare_config(config)
        current_version = get_current_flexget_version()

        if config.get('check_for_dev_version'
                      ) is False and current_version.endswith('dev'):
            log.debug('dev version detected, skipping check')
            return

        if config.get('lookup') == 'always':
            always_check = True
        else:
            always_check = False
        interval = config.get('interval')

        session = Session()
        last_check = session.query(LastVersionCheck).first()
        if not always_check:
            if last_check:
                time_dif = datetime.now() - last_check.last_check_time
                should_poll = time_dif.days > interval
            else:
                should_poll = True

            if not should_poll:
                log.debug('version check interval not met, skipping check')
                return

        latest_version = get_latest_flexget_version_number()
        if not latest_version:
            log.warning('Could not get latest version of flexget')
            return
        elif latest_version != current_version:
            log.warning(
                'You are not running latest Flexget Version. Current is %s and latest is %s',
                current_version, latest_version)
        if last_check:
            log.debug('updating last check time')
            last_check.update()
        else:
            last_check = LastVersionCheck()
            log.debug('creating instance of last version check in DB')
            session.add(last_check)
Example #21
0
    def on_task_start(self, task, config):
        if not config:
            return

        config = self.prepare_config(config)
        current_version = get_current_flexget_version()

        if config.get('check_for_dev_version') is False and current_version.endswith('dev'):
            log.debug('dev version detected, skipping check')
            return

        always_check = bool(config.get('lookup') == 'always')
        interval = config.get('interval')

        session = Session()
        last_check = session.query(LastVersionCheck).first()
        if not always_check:
            if last_check:
                time_dif = datetime.now() - last_check.last_check_time
                should_poll = time_dif.days > interval
            else:
                should_poll = True

            if not should_poll:
                log.debug('version check interval not met, skipping check')
                return

        latest_version = get_latest_flexget_version_number()
        if not latest_version:
            log.warning('Could not get latest version of flexget')
            return
        elif latest_version != current_version:
            log.warning(
                'You are not running latest Flexget Version. Current is %s and latest is %s',
                current_version,
                latest_version,
            )
        if last_check:
            log.debug('updating last check time')
            last_check.update()
        else:
            last_check = LastVersionCheck()
            log.debug('creating instance of last version check in DB')
            session.add(last_check)
Example #22
0
    def on_process_start(self, task):
        if not task.manager.options.seen:
            return

        task.manager.disable_tasks()

        seen_name = task.manager.options.seen
        if is_imdb_url(seen_name):
            imdb_id = extract_id(seen_name)
            if imdb_id:
                seen_name = imdb_id

        session = Session()
        se = SeenEntry(u'--seen', unicode(task.name))
        sf = SeenField(u'--seen', seen_name)
        se.fields.append(sf)
        session.add(se)
        session.commit()

        log.info('Added %s as seen. This will affect all tasks.' % seen_name)
Example #23
0
    def on_process_start(self, task):
        if not task.manager.options.seen:
            return

        task.manager.disable_tasks()

        seen_name = task.manager.options.seen
        if is_imdb_url(seen_name):
            imdb_id = extract_id(seen_name)
            if imdb_id:
                seen_name = imdb_id

        session = Session()
        se = SeenEntry(u'--seen', unicode(task.name))
        sf = SeenField(u'--seen', seen_name)
        se.fields.append(sf)
        session.add(se)
        session.commit()

        log.info('Added %s as seen. This will affect all tasks.' % seen_name)
Example #24
0
 def begin(self, manager, options):
     series_name = options.series_name
     ep_id = options.episode_id
     session = Session()
     series = session.query(Series).filter(Series.name == series_name).first()
     if not series:
         console('Series not yet in database, adding `%s`' % series_name)
         series = Series()
         series.name = series_name
         session.add(series)
     try:
         set_series_begin(series, ep_id)
     except ValueError as e:
         console(e)
     else:
         console('Episodes for `%s` will be accepted starting with `%s`' % (series.name, ep_id))
         session.commit()
     finally:
         session.close()
     manager.config_changed()
Example #25
0
def set_version(plugin, version):
    if plugin not in plugin_schemas:
        raise ValueError('Tried to set schema version for %s plugin with no versioned_base.' % plugin)
    if version != plugin_schemas[plugin]['version']:
        raise ValueError('Tried to set %s plugin schema version not equal to that defined in versioned_base.' % plugin)
    session = Session()
    try:
        schema = session.query(PluginSchema).filter(PluginSchema.plugin == plugin).first()
        if not schema:
            log.debug('Initializing plugin %s schema version to %i' % (plugin, version))
            schema = PluginSchema(plugin, version)
            session.add(schema)
        else:
            if version < schema.version:
                raise ValueError('Tried to set plugin %s schema version to lower value' % plugin)
            if version != schema.version:
                log.debug('Updating plugin %s schema version to %i' % (plugin, version))
                schema.version = version
        session.commit()
    finally:
        session.close()
Example #26
0
def begin(manager, options):
    series_name = options.series_name
    ep_id = options.episode_id
    session = Session()
    try:
        series = session.query(Series).filter(Series.name == series_name).first()
        if not series:
            console('Series not yet in database, adding `%s`' % series_name)
            series = Series()
            series.name = series_name
            session.add(series)
        try:
            set_series_begin(series, ep_id)
        except ValueError as e:
            console(e)
        else:
            console('Episodes for `%s` will be accepted starting with `%s`' % (series.name, ep_id))
            session.commit()
    finally:
        session.close()
    manager.config_changed()
Example #27
0
 def on_process_start(self, feed, config):
     """Purge remembered entries if the config has changed and write new hash"""
     # No session on process start, make our own
     session = Session()
     # Delete expired items
     session.query(RememberEntry).filter(RememberEntry.expires < datetime.now()).delete()
     # Generate hash for current config
     config_hash = hashlib.md5(str(feed.config.items())).hexdigest()
     # See if the feed has the same hash as last run
     old_feed = session.query(RememberFeed).filter(RememberFeed.name == feed.name).first()
     if old_feed and (old_feed.hash != config_hash or feed.manager.options.forget_rejected):
         if feed.manager.options.forget_rejected:
             log.info('Forgetting previous rejections.')
         else:
             log.verbose('Config has changed since last run, purging remembered entries.')
         session.delete(old_feed)
         old_feed = None
     if not old_feed:
         # Create this feed in the db if not present
         session.add(RememberFeed(name=feed.name, hash=config_hash))
     session.commit()
Example #28
0
def series_begin(manager):
    if not manager.options.series_begin:
        return
    manager.disable_tasks()
    series_name, ep_id = manager.options.series_begin
    session = Session()
    series = session.query(Series).filter(Series.name == series_name).first()
    if not series:
        console('Series not yet in database, adding `%s`' % series_name)
        series = Series()
        series.name = series_name
        session.add(series)
    try:
        set_series_begin(series, ep_id)
    except ValueError as e:
        console(e)
    else:
        console('Episodes for `%s` will be accepted starting with `%s`' % (series.name, ep_id))
        session.commit()
    finally:
        session.close()
Example #29
0
    def notify(self, title, message, config):
        session = Session()

        access_token = self._real_init(session, config)

        failure_message = self._get_failure_message(session, config)

        all_messages = failure_message + message

        if access_token:
            try:
                self._send_msgs(all_messages, access_token)
            except Exception as e:
                entry = MessageEntry(
                    content=all_messages,
                    failure_time=datetime.now()
                )
                session.add(entry)
                session.commit()
                raise PluginError(str(e))
            if self.image:
                self._send_images(access_token)
Example #30
0
File: seen.py Project: kop1/flexget
    def migrate(self, feed):
        """Migrates 0.9 session data into new database"""

        session = Session()
        try:
            shelve = feed.manager.shelve_session
            count = 0
            log.info('If this crashes, you can\'t migrate 0.9 data to 1.0 ... sorry')
            for name, data in shelve.iteritems():
                if not 'seen' in data:
                    continue
                seen = data['seen']
                for k, v in seen.iteritems():
                    se = SeenEntry(u'N/A', seen.feed, u'migrated')
                    se.fields.append(SeenField(u'unknown', k))
                    session.add(se)
                    count += 1
            session.commit()
            log.info('It worked! Migrated %s seen items' % count)
        except Exception:
            log.critical('It crashed :(')
        finally:
            session.close()
Example #31
0
    def lookup(self, entry, search_allowed=True):
        """
        Perform imdb lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """

        from flexget.manager import manager

        if entry.get("imdb_url", eval_lazy=False):
            log.debug("No title passed. Lookup for %s" % entry["imdb_url"])
        elif entry.get("imdb_id", eval_lazy=False):
            log.debug("No title passed. Lookup for %s" % entry["imdb_id"])
        elif entry.get("title", eval_lazy=False):
            log.debug("lookup for %s" % entry["title"])
        else:
            raise PluginError("looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.")

        take_a_break = False
        session = Session()

        try:
            # entry sanity checks
            for field in ["imdb_votes", "imdb_score"]:
                if entry.get(field, eval_lazy=False):
                    value = entry[field]
                    if not isinstance(value, (int, float)):
                        raise PluginError("Entry field %s should be a number!" % field)

            # if imdb_id is included, build the url.
            if entry.get("imdb_id", eval_lazy=False) and not entry.get("imdb_url", eval_lazy=False):
                entry["imdb_url"] = make_url(entry["imdb_id"])

            # make sure imdb url is valid
            if entry.get("imdb_url", eval_lazy=False):
                imdb_id = extract_id(entry["imdb_url"])
                if imdb_id:
                    entry["imdb_url"] = make_url(imdb_id)
                else:
                    log.debug("imdb url %s is invalid, removing it" % entry["imdb_url"])
                    del (entry["imdb_url"])

            # no imdb_url, check if there is cached result for it or if the
            # search is known to fail
            if not entry.get("imdb_url", eval_lazy=False):
                result = session.query(SearchResult).filter(SearchResult.title == entry["title"]).first()
                if result:
                    if result.fails and not manager.options.retry:
                        # this movie cannot be found, not worth trying again ...
                        log.debug("%s will fail lookup" % entry["title"])
                        raise PluginError("Title `%s` lookup fails" % entry["title"])
                    else:
                        if result.url:
                            log.trace("Setting imdb url for %s from db" % entry["title"])
                            entry["imdb_url"] = result.url

            # no imdb url, but information required, try searching
            if not entry.get("imdb_url", eval_lazy=False) and search_allowed:
                log.verbose("Searching from imdb `%s`" % entry["title"])

                take_a_break = True
                search = ImdbSearch()
                search_result = search.smart_match(entry["title"])
                if search_result:
                    entry["imdb_url"] = search_result["url"]
                    # store url for this movie, so we don't have to search on
                    # every run
                    result = SearchResult(entry["title"], entry["imdb_url"])
                    session.add(result)
                    log.verbose("Found %s" % (entry["imdb_url"]))
                else:
                    log_once("Imdb lookup failed for %s" % entry["title"], log)
                    # store FAIL for this title
                    result = SearchResult(entry["title"])
                    result.fails = True
                    session.add(result)
                    raise PluginError("Title `%s` lookup failed" % entry["title"])

            # check if this imdb page has been parsed & cached
            movie = (
                session.query(Movie)
                .options(
                    joinedload_all(Movie.genres),
                    joinedload_all(Movie.languages),
                    joinedload_all(Movie.actors),
                    joinedload_all(Movie.directors),
                )
                .filter(Movie.url == entry["imdb_url"])
                .first()
            )

            # determine whether or not movie details needs to be parsed
            req_parse = False
            if not movie:
                req_parse = True
            elif movie.expired:
                req_parse = True

            if req_parse:
                if movie is not None:
                    if movie.expired:
                        log.verbose("Movie `%s` details expired, refreshing ..." % movie.title)
                    # Remove the old movie, we'll store another one later.
                    session.query(Movie).filter(Movie.url == entry["imdb_url"]).delete()

                # search and store to cache
                if "title" in entry:
                    log.verbose("Parsing imdb for `%s`" % entry["title"])
                else:
                    log.verbose("Parsing imdb for `%s`" % entry["imdb_id"])
                try:
                    take_a_break = True
                    movie = self._parse_new_movie(entry["imdb_url"], session)
                except UnicodeDecodeError:
                    log.error(
                        "Unable to determine encoding for %s. Installing chardet library may help." % entry["imdb_url"]
                    )
                    # store cache so this will not be tried again
                    movie = Movie()
                    movie.url = entry["imdb_url"]
                    session.add(movie)
                    raise PluginError("UnicodeDecodeError")
                except ValueError, e:
                    # TODO: might be a little too broad catch, what was this for anyway? ;P
                    if manager.options.debug:
                        log.exception(e)
                    raise PluginError("Invalid parameter: %s" % entry["imdb_url"], log)

            for att in ["title", "score", "votes", "year", "genres", "languages", "actors", "directors", "mpaa_rating"]:
                log.trace("movie.%s: %s" % (att, getattr(movie, att)))

            # store to entry
            entry.update_using_map(self.field_map, movie)

            # give imdb a little break between requests (see: http://flexget.com/ticket/129#comment:1)
            if take_a_break and not manager.options.debug and not manager.unit_test:
                import time

                time.sleep(3)
Example #32
0
    def lookup(self, entry, search_allowed=True):
        """
        Perform imdb lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """

        from flexget.manager import manager

        if entry.get('imdb_id', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
        elif entry.get('imdb_url', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
        elif entry.get('title', eval_lazy=False):
            log.debug('lookup for %s' % entry['title'])
        else:
            raise plugin.PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.')

        session = Session()

        try:
            # entry sanity checks
            for field in ['imdb_votes', 'imdb_score']:
                if entry.get(field, eval_lazy=False):
                    value = entry[field]
                    if not isinstance(value, (int, float)):
                        raise plugin.PluginError('Entry field %s should be a number!' % field)

            # if imdb_id is included, build the url.
            if entry.get('imdb_id', eval_lazy=False) and not entry.get('imdb_url', eval_lazy=False):
                entry['imdb_url'] = make_url(entry['imdb_id'])

            # make sure imdb url is valid
            if entry.get('imdb_url', eval_lazy=False):
                imdb_id = extract_id(entry['imdb_url'])
                if imdb_id:
                    entry['imdb_url'] = make_url(imdb_id)
                else:
                    log.debug('imdb url %s is invalid, removing it' % entry['imdb_url'])
                    del(entry['imdb_url'])

            # no imdb_url, check if there is cached result for it or if the
            # search is known to fail
            if not entry.get('imdb_url', eval_lazy=False):
                result = session.query(SearchResult).\
                    filter(SearchResult.title == entry['title']).first()
                if result:
                    # TODO: 1.2 this should really be checking task.options.retry
                    if result.fails and not manager.options.execute.retry:
                        # this movie cannot be found, not worth trying again ...
                        log.debug('%s will fail lookup' % entry['title'])
                        raise plugin.PluginError('IMDB lookup failed for %s' % entry['title'])
                    else:
                        if result.url:
                            log.trace('Setting imdb url for %s from db' % entry['title'])
                            entry['imdb_url'] = result.url

            # no imdb url, but information required, try searching
            if not entry.get('imdb_url', eval_lazy=False) and search_allowed:
                log.verbose('Searching from imdb `%s`' % entry['title'])

                search = ImdbSearch()
                search_name = entry.get('movie_name', entry['title'], eval_lazy=False)
                search_result = search.smart_match(search_name)
                if search_result:
                    entry['imdb_url'] = search_result['url']
                    # store url for this movie, so we don't have to search on
                    # every run
                    result = SearchResult(entry['title'], entry['imdb_url'])
                    session.add(result)
                    log.verbose('Found %s' % (entry['imdb_url']))
                else:
                    log_once('IMDB lookup failed for %s' % entry['title'], log, logging.WARN)
                    # store FAIL for this title
                    result = SearchResult(entry['title'])
                    result.fails = True
                    session.add(result)
                    raise plugin.PluginError('Title `%s` lookup failed' % entry['title'])

            # check if this imdb page has been parsed & cached
            movie = session.query(Movie).filter(Movie.url == entry['imdb_url']).first()

            # determine whether or not movie details needs to be parsed
            req_parse = False
            if not movie:
                req_parse = True
            elif movie.expired:
                req_parse = True

            if req_parse:
                if movie is not None:
                    if movie.expired:
                        log.verbose('Movie `%s` details expired, refreshing ...' % movie.title)
                    # Remove the old movie, we'll store another one later.
                    session.query(MovieLanguage).filter(MovieLanguage.movie_id == movie.id).delete()
                    session.query(Movie).filter(Movie.url == entry['imdb_url']).delete()

                # search and store to cache
                if 'title' in entry:
                    log.verbose('Parsing imdb for `%s`' % entry['title'])
                else:
                    log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
                try:
                    movie = self._parse_new_movie(entry['imdb_url'], session)
                except UnicodeDecodeError:
                    log.error('Unable to determine encoding for %s. Installing chardet library may help.' %
                              entry['imdb_url'])
                    # store cache so this will not be tried again
                    movie = Movie()
                    movie.url = entry['imdb_url']
                    session.add(movie)
                    raise plugin.PluginError('UnicodeDecodeError')
                except ValueError as e:
                    # TODO: might be a little too broad catch, what was this for anyway? ;P
                    if manager.options.debug:
                        log.exception(e)
                    raise plugin.PluginError('Invalid parameter: %s' % entry['imdb_url'], log)

            for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']:
                log.trace('movie.%s: %s' % (att, getattr(movie, att)))

            # store to entry
            entry.update_using_map(self.field_map, movie)
        finally:
            log.trace('committing session')
            session.commit()
Example #33
0
    def get_login_cookies(self, username, password):
        url_auth = 'http://www.t411.in/users/login'
        db_session = Session()
        account = db_session.query(torrent411Account).filter(
            torrent411Account.username == username).first()
        if account:
            if account.expiry_time < datetime.now():
                db_session.delete(account)
                db_session.commit()
            log.debug("Cookies found in db!")
            return account.auth
        else:
            log.debug("Getting login cookies from : %s " % url_auth)
            params = {'login': username, 'password': password, 'remember': '1'}
            cj = cookielib.CookieJar()
            #           WE NEED A COOKIE HOOK HERE TO AVOID REDIRECT COOKIES
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            #           NEED TO BE SAME USER_AGENT THAN DOWNLOAD LINK
            opener.addheaders = [('User-agent', self.USER_AGENT)]
            login_output = None
            try:
                login_output = opener.open(url_auth,
                                           urllib.urlencode(params)).read()
            except Exception as e:
                raise UrlRewritingError("Connection Error for %s : %s" %
                                        (url_auth, e))

            if b'confirmer le captcha' in login_output:
                log.warn("Captcha requested for login.")
                login_output = self._solveCaptcha(login_output, url_auth,
                                                  params, opener)

            if b'logout' in login_output:
                authKey = None
                uid = None
                password = None

                for cookie in cj:
                    if cookie.name == "authKey":
                        authKey = cookie.value
                    if cookie.name == "uid":
                        uid = cookie.value
                    if cookie.name == "pass":
                        password = cookie.value

                if authKey is not None and \
                   uid is not None and \
                   password is not None:
                    authCookie = {
                        'uid': uid,
                        'password': password,
                        'authKey': authKey
                    }
                    db_session.add(
                        torrent411Account(username=username,
                                          auth=authCookie,
                                          expiry_time=datetime.now() +
                                          timedelta(days=1)))
                    db_session.commit()
                    return authCookie
            else:
                log.error(
                    "Login failed (Torrent411). Check your login and password."
                )
                return {}
Example #34
0
def lookup_movie(
    title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None
):
    """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.

    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup

    """

    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title_parser.parse(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == "" and not (rottentomatoes_id or imdb_id or title):
            raise PluginError("Failed to parse name from %s" % smart_match)

    if title:
        search_string = title.lower()
        if year:
            search_string = "%s %s" % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError("No criteria specified for rotten tomatoes lookup")

    def id_str():
        return "<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>" % (title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug("Looking up rotten tomatoes information for %s" % id_str())

    movie = None

    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = (
            session.query(RottenTomatoesAlternateId)
            .filter(RottenTomatoesAlternateId.name.in_(["imdb", "flexget_imdb"]))
            .filter(RottenTomatoesAlternateId.id == imdb_id.lstrip("t"))
            .first()
        )
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            log.debug("No matches in movie cache found, checking search cache.")
            found = (
                session.query(RottenTomatoesSearchResult)
                .filter(func.lower(RottenTomatoesSearchResult.search) == search_string)
                .first()
            )
            if found and found.movie:
                log.debug("Movie found in search cache.")
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug("Cache has expired for %s, attempting to refresh from Rotten Tomatoes." % id_str())
            try:
                imdb_alt_id = (
                    movie.alternate_ids
                    and filter(lambda alt_id: alt_id.name in ["imdb", "flexget_imdb"], movie.alternate_ids)[0].id
                )
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, "imdb")
                else:
                    result = movies_info(movie.id)
                movie = set_movie_details(movie, session, result)
                session.merge(movie)
            except URLError:
                log.error("Error refreshing movie details from Rotten Tomatoes, cached info being used.")
        else:
            log.debug("Movie %s information restored from cache." % id_str())
    else:
        if only_cached:
            raise PluginError("Movie %s not found from cache" % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug("Movie %s not found in cache, looking up from rotten tomatoes." % id_str())
        try:
            # Lookups using imdb_id
            if imdb_id:
                log.debug("Using IMDB alias %s." % imdb_id)
                result = movies_alias(imdb_id, "imdb")
                if result:
                    mismatch = []
                    if (
                        title
                        and difflib.SequenceMatcher(
                            lambda x: x == " ", re.sub("\s+\(.*\)$", "", result["title"].lower()), title.lower()
                        ).ratio()
                        < MIN_MATCH
                    ):
                        mismatch.append("the title (%s <-?-> %s)" % (title, result["title"]))
                    result["year"] = int(result["year"])
                    if year and fabs(result["year"] - year) > 1:
                        mismatch.append("the year (%s <-?-> %s)" % (year, result["year"]))
                        release_year = None
                        if result.get("release_dates", {}).get("theater"):
                            log.debug("Checking year against theater release date")
                            release_year = time.strptime(result["release_dates"].get("theater"), "%Y-%m-%d").tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append("the theater release (%s)" % release_year)
                        elif result.get("release_dates", {}).get("dvd"):
                            log.debug("Checking year against dvd release date")
                            release_year = time.strptime(result["release_dates"].get("dvd"), "%Y-%m-%d").tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append("the DVD release (%s)" % release_year)
                    if mismatch:
                        log.warning(
                            "Rotten Tomatoes had an imdb alias for %s but it didn't match %s."
                            % (imdb_id, ", or ".join(mismatch))
                        )
                    else:
                        log.debug("imdb_id %s maps to rt_id %s, checking db for info." % (imdb_id, result["id"]))
                        movie = (
                            session.query(RottenTomatoesMovie)
                            .filter(RottenTomatoesMovie.id == result.get("id"))
                            .first()
                        )
                        if movie:
                            log.debug(
                                "Movie %s was in database, but did not have the imdb_id stored, "
                                "forcing an update" % movie
                            )
                            movie = set_movie_details(movie, session, result)
                            session.merge(movie)
                        else:
                            log.debug("%s was not in database, setting info." % result["title"])
                            movie = RottenTomatoesMovie()
                            movie = set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError("set_movie_details returned %s" % movie)
                            session.add(movie)
                else:
                    log.debug("IMDB alias %s returned no results." % imdb_id)
            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = set_movie_details(movie, session, result)
                    session.add(movie)
            if not movie and title:
                log.verbose("Searching from rt `%s`" % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get("movies")
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(lambda x: x == " ", movie_res["title"].lower(), title.lower())
                            movie_res["match"] = seq.ratio()
                        results.sort(key=lambda x: x["match"], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get("year"):
                                movie_res["year"] = int(movie_res["year"])
                                if movie_res["year"] != year:
                                    release_year = False
                                    if movie_res.get("release_dates", {}).get("theater"):
                                        log.debug("Checking year against theater release date")
                                        release_year = time.strptime(
                                            movie_res["release_dates"].get("theater"), "%Y-%m-%d"
                                        ).tm_year
                                    elif movie_res.get("release_dates", {}).get("dvd"):
                                        log.debug("Checking year against dvd release date")
                                        release_year = time.strptime(
                                            movie_res["release_dates"].get("dvd"), "%Y-%m-%d"
                                        ).tm_year
                                    if not (release_year and release_year == year):
                                        log.debug(
                                            "removing %s - %s (wrong year: %s)"
                                            % (
                                                movie_res["title"],
                                                movie_res["id"],
                                                str(release_year or movie_res["year"]),
                                            )
                                        )
                                        results.remove(movie_res)
                                        continue
                            if movie_res["match"] < MIN_MATCH:
                                log.debug("removing %s (min_match)" % movie_res["title"])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError("no appropiate results")

                        if len(results) == 1:
                            log.debug("SUCCESS: only one movie remains")
                        else:
                            # Check min difference between best two hits
                            diff = results[0]["match"] - results[1]["match"]
                            if diff < MIN_DIFF:
                                log.debug(
                                    "unable to determine correct movie, min_diff too small"
                                    "(`%s (%d) - %s` <-?-> `%s (%d) - %s`)"
                                    % (
                                        results[0]["title"],
                                        results[0]["year"],
                                        results[0]["id"],
                                        results[1]["title"],
                                        results[1]["year"],
                                        results[1]["id"],
                                    )
                                )
                                for r in results:
                                    log.debug("remain: %s (match: %s) %s" % (r["title"], r["match"], r["id"]))
                                raise PluginError("min_diff")

                        imdb_alt_id = results[0].get("alternate_ids", {}).get("imdb")
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                        else:
                            result = movies_info(results[0].get("id"))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                        try:
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(
                                lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"),
                                movie.alternate_ids,
                            ):
                                log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie))
                                movie.alternate_ids.append(
                                    RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t"))
                                )
                            session.add(movie)
                            session.commit()
                        except IntegrityError:
                            log.warning(
                                "Found movie %s in database after search even though we "
                                "already looked, updating it with search result." % movie
                            )
                            session.rollback()
                            movie = (
                                session.query(RottenTomatoesMovie)
                                .filter(RottenTomatoesMovie.id == result["id"])
                                .first()
                            )
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(
                                lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"),
                                movie.alternate_ids,
                            ):
                                log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie))
                                movie.alternate_ids.append(
                                    RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t"))
                                )
                            session.merge(movie)
                            session.commit()

                        if title.lower() != movie.title.lower():
                            log.debug("Saving search result for '%s'" % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError("Error looking up movie from RottenTomatoes")

    if not movie:
        raise PluginError("No results found from rotten tomatoes for %s" % id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ["alternate_ids", "cast", "directors", "genres", "links", "posters", "release_dates"]:
            getattr(movie, attr)
        session.commit()
        return movie
Example #35
0
class Task(object):
    """
    Represents one task in the configuration.

    **Fires events:**

    * task.execute.before_plugin

      Before a plugin is about to be executed. Note that since this will also include all
      builtin plugins the amount of calls can be quite high

      ``parameters: task, keyword``

    * task.execute.after_plugin

      After a plugin has been executed.

      ``parameters: task, keyword``

    * task.execute.completed

      After task execution has been completed

      ``parameters: task``

    """

    max_reruns = 5

    def __init__(self, manager, name, config=None, options=None):
        """
        :param Manager manager: Manager instance.
        :param string name: Name of the task.
        :param dict config: Task configuration.
        """
        self.name = unicode(name)
        self.manager = manager
        # raw_config should remain the untouched input config
        if config is None:
            config = manager.config['tasks'].get(name, {})
        self.config = copy.deepcopy(config)
        self.prepared_config = None
        if options is None:
            options = copy.copy(self.manager.options.execute)
        elif isinstance(options, dict):
            options_namespace = copy.copy(self.manager.options.execute)
            options_namespace.__dict__.update(options)
            options = options_namespace
        self.options = options

        # simple persistence
        self.simple_persistence = SimpleTaskPersistence(self)

        # not to be reset
        self._rerun_count = 0

        self.config_modified = None

        # use reset to init variables when creating
        self._reset()

    @property
    def undecided(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.undecided

    @property
    def failed(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.failed

    @property
    def rejected(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.rejected

    @property
    def accepted(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.accepted

    @property
    def entries(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.entries

    @property
    def all_entries(self):
        """
        .. deprecated:: Use API v3
        """
        return self._all_entries

    @property
    def is_rerun(self):
        return self._rerun_count

    # TODO: can we get rid of this now that Tasks are instantiated on demand?
    def _reset(self):
        """Reset task state"""
        log.debug('resetting %s' % self.name)
        self.enabled = not self.name.startswith('_')
        self.session = None
        self.priority = 65535

        self.requests = requests.Session()

        # List of all entries in the task
        self._all_entries = EntryContainer()

        self.disabled_phases = []

        # These are just to query what happened in task. Call task.abort to set.
        self.aborted = False
        self.abort_reason = None
        self.silent_abort = False

        self._rerun = False

        # current state
        self.current_phase = None
        self.current_plugin = None

    def __cmp__(self, other):
        return cmp(self.priority, other.priority)

    def __str__(self):
        return '<Task(name=%s,aborted=%s)>' % (self.name, self.aborted)

    def disable_phase(self, phase):
        """Disable ``phase`` from execution.

        All disabled phases are re-enabled by :meth:`Task._reset()` after task
        execution has been completed.

        :param string phase: Name of ``phase``
        :raises ValueError: *phase* could not be found.
        """
        if phase not in task_phases:
            raise ValueError('%s is not a valid phase' % phase)
        if phase not in self.disabled_phases:
            log.debug('Disabling %s phase' % phase)
            self.disabled_phases.append(phase)

    def abort(self, reason='Unknown', silent=False):
        """Abort this task execution, no more plugins will be executed except the abort handling ones."""
        self.aborted = True
        self.abort_reason = reason
        self.silent_abort = silent
        if not self.silent_abort:
            log.warning('Aborting task (plugin: %s)' % self.current_plugin)
        else:
            log.debug('Aborting task (plugin: %s)' % self.current_plugin)
        raise TaskAbort(reason, silent=silent)

    def find_entry(self, category='entries', **values):
        """
        Find and return :class:`~flexget.entry.Entry` with given attributes from task or None

        :param string category: entries, accepted, rejected or failed. Defaults to entries.
        :param values: Key values of entries to be searched
        :return: Entry or None
        """
        cat = getattr(self, category)
        if not isinstance(cat, EntryIterator):
            raise TypeError('category must be a EntryIterator')
        for entry in cat:
            for k, v in values.iteritems():
                if not (k in entry and entry[k] == v):
                    break
            else:
                return entry
        return None

    def plugins(self, phase=None):
        """Get currently enabled plugins.

        :param string phase:
          Optional, limits to plugins currently configured on given phase, sorted in phase order.
        :return:
          An iterator over configured :class:`flexget.plugin.PluginInfo` instances enabled on this task.
        """
        if phase:
            plugins = sorted(get_plugins(phase=phase),
                             key=lambda p: p.phase_handlers[phase],
                             reverse=True)
        else:
            plugins = all_plugins.itervalues()
        return (p for p in plugins if p.name in self.config or p.builtin)

    def __run_task_phase(self, phase):
        """Executes task phase, ie. call all enabled plugins on the task.

        Fires events:

        * task.execute.before_plugin
        * task.execute.after_plugin

        :param string phase: Name of the phase
        """
        if phase not in phase_methods:
            raise Exception('%s is not a valid task phase' % phase)
        # warn if no inputs, filters or outputs in the task
        if phase in ['input', 'filter', 'output']:
            if not self.manager.unit_test:
                # Check that there is at least one manually configured plugin for these phases
                for p in self.plugins(phase):
                    if not p.builtin:
                        break
                else:
                    log.warning(
                        'Task doesn\'t have any %s plugins, you should add (at least) one!'
                        % phase)

        for plugin in self.plugins(phase):
            # Abort this phase if one of the plugins disables it
            if phase in self.disabled_phases:
                return
            # store execute info, except during entry events
            self.current_phase = phase
            self.current_plugin = plugin.name

            if plugin.api_ver == 1:
                # backwards compatibility
                # pass method only task (old behaviour)
                args = (self, )
            else:
                # pass method task, copy of config (so plugin cannot modify it)
                args = (self, copy.copy(self.config.get(plugin.name)))

            try:
                fire_event('task.execute.before_plugin', self, plugin.name)
                response = self.__run_plugin(plugin, phase, args)
                if phase == 'input' and response:
                    # add entries returned by input to self.all_entries
                    for e in response:
                        e.task = self
                    self.all_entries.extend(response)
            finally:
                fire_event('task.execute.after_plugin', self, plugin.name)

    def __run_plugin(self, plugin, phase, args=None, kwargs=None):
        """
        Execute given plugins phase method, with supplied args and kwargs.
        If plugin throws unexpected exceptions :meth:`abort` will be called.

        :param PluginInfo plugin: Plugin to be executed
        :param string phase: Name of the phase to be executed
        :param args: Passed to the plugin
        :param kwargs: Passed to the plugin
        """
        keyword = plugin.name
        method = plugin.phase_handlers[phase]
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}

        # log.trace('Running %s method %s' % (keyword, method))
        # call the plugin
        try:
            return method(*args, **kwargs)
        except TaskAbort:
            raise
        except PluginWarning as warn:
            # check if this warning should be logged only once (may keep repeating)
            if warn.kwargs.get('log_once', False):
                from flexget.utils.log import log_once
                log_once(warn.value, warn.log)
            else:
                warn.log.warning(warn)
        except EntryUnicodeError as eue:
            msg = (
                'Plugin %s tried to create non-unicode compatible entry (key: %s, value: %r)'
                % (keyword, eue.key, eue.value))
            log.critical(msg)
            self.abort(msg)
        except PluginError as err:
            err.log.critical(err.value)
            self.abort(err.value)
        except DependencyError as e:
            msg = (
                'Plugin `%s` cannot be used because dependency `%s` is missing.'
                % (keyword, e.missing))
            log.critical(msg)
            log.debug(e.message)
            self.abort(msg)
        except Warning as e:
            # If warnings have been elevated to errors
            msg = 'Warning during plugin %s: %s' % (keyword, e)
            log.exception(msg)
            self.abort(msg)
        except Exception as e:
            msg = 'BUG: Unhandled error in plugin %s: %s' % (keyword, e)
            log.exception(msg)
            self.abort(msg)

    def rerun(self):
        """Immediately re-run the task after execute has completed,
        task can be re-run up to :attr:`.max_reruns` times."""
        msg = 'Plugin %s has requested task to be ran again after execution has completed.' % self.current_plugin
        # Only print the first request for a rerun to the info log
        log.debug(msg) if self._rerun else log.info(msg)
        if self._rerun_count >= self.max_reruns:
            self._rerun = False
            log.info(
                'Task has been re-run %s times already, stopping for now' %
                self._rerun_count)
            return
        self._rerun = True

    def config_changed(self):
        """
        Sets config_modified flag to True for the remainder of this run.
        Used when the db changes, and all entries need to be reprocessed.
        """
        self.config_modified = True

    @useTaskLogging
    def execute(self):
        """
        Executes the the task.

        If :attr:`.enabled` is False task is not executed. Certain :attr:`.options`
        affect how execution is handled.

        - :attr:`.options.disable_phases` is a list of phases that are not enabled
          for this execution.
        - :attr:`.options.inject` is a list of :class:`Entry` instances used instead
          of running input phase.
        """
        if not self.enabled:
            log.debug('Not running disabled task %s' % self.name)
        if self.options.cron:
            self.manager.db_cleanup()

        self._reset()
        log.debug('executing %s' % self.name)
        if not self.enabled:
            log.debug('task %s disabled during preparation, not running' %
                      self.name)
            return

        # Handle keyword args
        if self.options.learn:
            log.info('Disabling download and output phases because of --learn')
            self.disable_phase('download')
            self.disable_phase('output')
        if self.options.disable_phases:
            map(self.disable_phase, self.options.disable_phases)
        if self.options.inject:
            # If entries are passed for this execution (eg. rerun), disable the input phase
            self.disable_phase('input')
            self.all_entries.extend(self.options.inject)

        log.debug('starting session')
        self.session = Session()

        # Save current config hash and set config_modidied flag
        config_hash = hashlib.md5(str(sorted(self.config.items()))).hexdigest()
        last_hash = self.session.query(TaskConfigHash).filter(
            TaskConfigHash.task == self.name).first()
        if self.is_rerun:
            # Restore the config to state right after start phase
            if self.prepared_config:
                self.config = copy.deepcopy(self.prepared_config)
            else:
                log.error('BUG: No prepared_config on rerun, please report.')
            self.config_modified = False
        elif not last_hash:
            self.config_modified = True
            last_hash = TaskConfigHash(task=self.name, hash=config_hash)
            self.session.add(last_hash)
        elif last_hash.hash != config_hash:
            self.config_modified = True
            last_hash.hash = config_hash
        else:
            self.config_modified = False

        # run phases
        try:
            for phase in task_phases:
                if phase in self.disabled_phases:
                    # log keywords not executed
                    for plugin in self.plugins(phase):
                        if plugin.name in self.config:
                            log.info(
                                'Plugin %s is not executed because %s phase is disabled (e.g. --test)'
                                % (plugin.name, phase))
                    continue
                if phase == 'start' and self.is_rerun:
                    log.debug('skipping task_start during rerun')
                elif phase == 'exit' and self._rerun:
                    log.debug(
                        'not running task_exit yet because task will rerun')
                else:
                    # run all plugins with this phase
                    self.__run_task_phase(phase)
                    if phase == 'start':
                        # Store a copy of the config state after start phase to restore for reruns
                        self.prepared_config = copy.deepcopy(self.config)
        except TaskAbort:
            # Roll back the session before calling abort handlers
            self.session.rollback()
            try:
                self.__run_task_phase('abort')
                # Commit just the abort handler changes if no exceptions are raised there
                self.session.commit()
            except TaskAbort as e:
                log.exception('abort handlers aborted: %s' % e)
            raise
        else:
            for entry in self.all_entries:
                entry.complete()
            log.debug('committing session')
            self.session.commit()
            fire_event('task.execute.completed', self)
        finally:
            # this will cause database rollback on exception
            self.session.close()

        # rerun task
        if self._rerun:
            log.info(
                'Rerunning the task in case better resolution can be achieved.'
            )
            self._rerun_count += 1
            # TODO: Potential optimization is to take snapshots (maybe make the ones backlog uses built in instead of
            # taking another one) after input and just inject the same entries for the rerun
            self.execute()

    def __eq__(self, other):
        if hasattr(other, 'name'):
            return self.name == other.name
        return NotImplemented

    def __copy__(self):
        new = type(self)(self.manager, self.name, self.config, self.options)
        # Update all the variables of new instance to match our own
        new.__dict__.update(self.__dict__)
        # Some mutable objects need to be copies
        new.options = copy.copy(self.options)
        new.config = copy.deepcopy(self.config)
        return new

    copy = __copy__
Example #36
0
class Task(object):

    """
    Represents one task in the configuration.

    **Fires events:**

    * task.execute.before_plugin

      Before a plugin is about to be executed. Note that since this will also include all
      builtin plugins the amount of calls can be quite high

      ``parameters: task, keyword``

    * task.execute.after_plugin

      After a plugin has been executed.

      ``parameters: task, keyword``

    * task.execute.completed

      After task execution has been completed

      ``parameters: task``

    """

    max_reruns = 5

    def __init__(self, manager, name, config):
        """
        :param Manager manager: Manager instance.
        :param string name: Name of the task.
        :param dict config: Task configuration.
        """
        self.name = unicode(name)
        self.config = config
        self.manager = manager

        # simple persistence
        self.simple_persistence = SimpleTaskPersistence(self)

        # not to be reset
        self._rerun_count = 0

        # This should not be used until after process_start, when it is evaluated
        self.config_modified = None

        # use reset to init variables when creating
        self._reset()

    # Make these read-only properties
    all_entries = property(lambda self: self._all_entries)
    entries = property(lambda self: self.all_entries.entries)
    accepted = property(lambda self: self.all_entries.accepted)
    rejected = property(lambda self: self.all_entries.rejected)
    failed = property(lambda self: self.all_entries.failed)

    @property
    def is_rerun(self):
        return self._rerun_count

    def _reset(self):
        """Reset task state"""
        log.debug('resetting %s' % self.name)
        self.enabled = True
        self.session = None
        self.priority = 65535

        self.requests = requests.Session()

        # List of all entries in the task
        self._all_entries = EntryContainer(task=self)

        self.disabled_phases = []

        # TODO: task.abort() should be done by using exception? not a flag that has to be checked everywhere
        self._abort = False
        self._abort_reason = None
        self._silent_abort = False

        self._rerun = False

        # current state
        self.current_phase = None
        self.current_plugin = None

    def __cmp__(self, other):
        return cmp(self.priority, other.priority)

    def __str__(self):
        return '<Task(name=%s,aborted=%s)>' % (self.name, str(self.aborted))

    @property
    def aborted(self):
        return self._abort and not self._silent_abort

    @property
    def abort_reason(self):
        return self._abort_reason

    @property
    def undecided(self):
        """Iterate over undecided entries"""
        return (entry for entry in self.entries if not entry in self.accepted and entry not in self.rejected)

    def disable_phase(self, phase):
        """Disable ``phase`` from execution.

        All disabled phases are re-enabled by :meth:`Task._reset()` after task
        execution has been completed.

        :param string phase: Name of ``phase``
        :raises ValueError: *phase* could not be found.
        """
        if phase not in task_phases:
            raise ValueError('%s is not a valid phase' % phase)
        if phase not in self.disabled_phases:
            log.debug('Disabling %s phase' % phase)
            self.disabled_phases.append(phase)

    def abort(self, reason='Unknown', **kwargs):
        """Abort this task execution, no more plugins will be executed after the current one exists."""
        if self._abort:
            return
        self._abort_reason = reason
        if not kwargs.get('silent', False):
            log.info('Aborting task (plugin: %s)' % self.current_plugin)
            self._silent_abort = False
        else:
            log.debug('Aborting task (plugin: %s)' % self.current_plugin)
            self._silent_abort = True
        # Run the abort phase before we set the _abort flag
        self._abort = True
        self.__run_task_phase('abort')

    def find_entry(self, category='entries', **values):
        """
        Find and return :class:`~flexget.entry.Entry` with given attributes from task or None

        :param string category: entries, accepted, rejected or failed. Defaults to entries.
        :param values: Key values of entries to be searched
        :return: Entry or None
        """
        cat = getattr(self, category)
        if not isinstance(cat, EntryIterator):
            raise TypeError('category must be a EntryIterator')
        for entry in cat:
            for k, v in values.iteritems():
                if not (k in entry and entry[k] == v):
                    break
            else:
                return entry
        return None

    def plugins(self, phase=None):
        """Get currently enabled plugins.

        :param string phase:
          Optional, limits to plugins currently configured on given phase, sorted in phase order.
        :return:
          An iterator over configured :class:`flexget.plugin.PluginInfo` instances enabled on this task.
        """
        if phase:
            plugins = sorted(get_plugins_by_phase(phase), key=lambda p: p.phase_handlers[phase], reverse=True)
        else:
            plugins = all_plugins.itervalues()
        return (p for p in plugins if p.name in self.config or p.builtin)

    def __run_task_phase(self, phase):
        """Executes task phase, ie. call all enabled plugins on the task.

        Fires events:

        * task.execute.before_plugin
        * task.execute.after_plugin

        :param string phase: Name of the phase
        """
        if phase not in task_phases + ['abort', 'process_start', 'process_end']:
            raise Exception('%s is not a valid task phase' % phase)
        # warn if no inputs, filters or outputs in the task
        if phase in ['input', 'filter', 'output']:
            if not self.manager.unit_test:
                # Check that there is at least one manually configured plugin for these phases
                for p in self.plugins(phase):
                    if not p.builtin:
                        break
                else:
                    log.warning('Task doesn\'t have any %s plugins, you should add (at least) one!' % phase)

        for plugin in self.plugins(phase):
            # Abort this phase if one of the plugins disables it
            if phase in self.disabled_phases:
                return
            # store execute info, except during entry events
            self.current_phase = phase
            self.current_plugin = plugin.name

            if plugin.api_ver == 1:
                # backwards compatibility
                # pass method only task (old behaviour)
                args = (self,)
            else:
                # pass method task, copy of config (so plugin cannot modify it)
                args = (self, copy.copy(self.config.get(plugin.name)))

            try:
                fire_event('task.execute.before_plugin', self, plugin.name)
                response = self.__run_plugin(plugin, phase, args)
                if phase == 'input' and response:
                    # add entries returned by input to self.entries
                    self.all_entries.extend(response)
            finally:
                fire_event('task.execute.after_plugin', self, plugin.name)

            # Make sure we abort if any plugin sets our abort flag
            if self._abort and phase != 'abort':
                return

    def _run_entry_phase(self, phase, entry, **kwargs):
        # TODO: entry events are not very elegant, refactor into real (new) events or something ...
        if phase not in ['accept', 'reject', 'fail']:
            raise Exception('Not a valid entry phase')
        phase_plugins = self.plugins(phase)
        for plugin in phase_plugins:
            self.__run_plugin(plugin, phase, (self, entry), kwargs)

    def __run_plugin(self, plugin, phase, args=None, kwargs=None):
        """
        Execute given plugins phase method, with supplied args and kwargs.
        If plugin throws unexpected exceptions :meth:`abort` will be called.

        :param PluginInfo plugin: Plugin to be executed
        :param string phase: Name of the phase to be executed
        :param args: Passed to the plugin
        :param kwargs: Passed to the plugin
        """
        keyword = plugin.name
        method = plugin.phase_handlers[phase]
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}

        # log.trace('Running %s method %s' % (keyword, method))
        # call the plugin
        try:
            return method(*args, **kwargs)
        except PluginWarning as warn:
            # check if this warning should be logged only once (may keep repeating)
            if warn.kwargs.get('log_once', False):
                from flexget.utils.log import log_once
                log_once(warn.value, warn.log)
            else:
                warn.log.warning(warn)
        except EntryUnicodeError as eue:
            msg = ('Plugin %s tried to create non-unicode compatible entry (key: %s, value: %r)' %
                   (keyword, eue.key, eue.value))
            log.critical(msg)
            self.abort(msg)
        except PluginError as err:
            err.log.critical(err.value)
            self.abort(err.value)
        except DependencyError as e:
            msg = ('Plugin `%s` cannot be used because dependency `%s` is missing.' %
                   (keyword, e.missing))
            log.critical(msg)
            log.debug(e.message)
            self.abort(msg)
        except Exception as e:
            msg = 'BUG: Unhandled error in plugin %s: %s' % (keyword, e)
            log.exception(msg)
            self.abort(msg)
            # don't handle plugin errors gracefully with unit test
            if self.manager.unit_test:
                raise

    def rerun(self):
        """Immediately re-run the task after execute has completed,
        task can be re-run up to :attr:`.max_reruns` times."""
        self._rerun = True
        log.info('Plugin %s has requested task to be ran again after execution has completed.' %
                 self.current_plugin)

    def config_changed(self):
        """Forces config_modified flag to come out true on next run. Used when the db changes, and all
        entries need to be reprocessed."""
        log.debug('Marking config as changed.')
        session = self.session or Session()
        task_hash = session.query(TaskConfigHash).filter(TaskConfigHash.task == self.name).first()
        if task_hash:
            task_hash.hash = ''
        self.config_modified = True
        # If we created our own session, commit and close it.
        if not self.session:
            session.commit()
            session.close()

    @useTaskLogging
    def execute(self, disable_phases=None, entries=None):
        """Executes the task.

        :param list disable_phases: Disable given phases names during execution
        :param list entries: Entries to be used in execution instead
            of using the input. Disables input phase.
        """

        log.debug('executing %s' % self.name)

        # Store original config state to be restored if a rerun is needed
        config_backup = copy.deepcopy(self.config)

        self._reset()
        # Handle keyword args
        if disable_phases:
            map(self.disable_phase, disable_phases)
        if entries:
            # If entries are passed for this execution (eg. rerun), disable the input phase
            self.disable_phase('input')
            self.all_entries.extend(entries)

        # validate configuration
        errors = self.validate()
        if self._abort:  # todo: bad practice
            return
        if errors and self.manager.unit_test:  # todo: bad practice
            raise Exception('configuration errors')
        if self.manager.options.validate:
            if not errors:
                log.info('Task \'%s\' passed' % self.name)
            self.enabled = False
            return

        log.debug('starting session')
        self.session = Session()

        # Save current config hash and set config_modidied flag
        config_hash = hashlib.md5(str(self.config.items())).hexdigest()
        last_hash = self.session.query(TaskConfigHash).filter(TaskConfigHash.task == self.name).first()
        if self.is_rerun:
            # Make sure on rerun config is not marked as modified
            self.config_modified = False
        elif not last_hash:
            self.config_modified = True
            last_hash = TaskConfigHash(task=self.name, hash=config_hash)
            self.session.add(last_hash)
        elif last_hash.hash != config_hash:
            self.config_modified = True
            last_hash.hash = config_hash
        else:
            self.config_modified = False

        try:
            # run phases
            for phase in task_phases:
                if phase in self.disabled_phases:
                    # log keywords not executed
                    for plugin in self.plugins(phase):
                        if plugin.name in self.config:
                            log.info('Plugin %s is not executed because %s phase is disabled (e.g. --test)' %
                                     (plugin.name, phase))
                    continue

                # run all plugins with this phase
                self.__run_task_phase(phase)

                # if abort flag has been set task should be aborted now
                # since this calls return rerun will not be done
                if self._abort:
                    return

            log.debug('committing session, abort=%s' % self._abort)
            self.session.commit()
            fire_event('task.execute.completed', self)
        finally:
            # this will cause database rollback on exception and task.abort
            self.session.close()

        # rerun task
        if self._rerun:
            if self._rerun_count >= self.max_reruns:
                log.info('Task has been rerunning already %s times, stopping for now' % self._rerun_count)
                # reset the counter for future runs (necessary only with webui)
                self._rerun_count = 0
            else:
                log.info('Rerunning the task in case better resolution can be achieved.')
                self._rerun_count += 1
                # Restore config to original state before running again
                self.config = config_backup
                self.execute(disable_phases=disable_phases, entries=entries)

        # Clean up entries after the task has executed to reduce ram usage, #1652
        # TODO: This doesn't work with unified entries, not sure best replacement
        """if not self.manager.unit_test:
            log.debug('Clearing all entries from task.')
            self.entries = []
            self.rejected = []
            self.failed = []"""

    def _process_start(self):
        """Execute process_start phase"""
        self.__run_task_phase('process_start')

    def _process_end(self):
        """Execute terminate phase for this task"""
        if self.manager.options.validate:
            log.debug('No process_end phase with --check')
            return
        self.__run_task_phase('process_end')

    def validate(self):
        """Called during task execution. Validates config, prints errors and aborts task if invalid."""
        errors = self.validate_config(self.config)
        # log errors and abort
        if errors:
            log.critical('Task \'%s\' has configuration errors:' % self.name)
            for error in errors:
                log.error(error)
            # task has errors, abort it
            self.abort('\n'.join(errors))
        return errors

    @staticmethod
    def validate_config(config):
        """Plugin configuration validation. Return list of error messages that were detected."""
        validate_errors = []
        # validate config is a dictionary
        if not isinstance(config, dict):
            validate_errors.append('Config is not a dictionary.')
            return validate_errors
        # validate all plugins
        for keyword in config:
            if keyword.startswith('_'):
                continue
            try:
                plugin = get_plugin_by_name(keyword)
            except:
                validate_errors.append('Unknown plugin \'%s\'' % keyword)
                continue
            if hasattr(plugin.instance, 'validator'):
                try:
                    validator = plugin.instance.validator()
                except TypeError as e:
                    log.critical('Invalid validator method in plugin %s' % keyword)
                    log.exception(e)
                    continue
                if not validator.name == 'root':
                    # if validator is not root type, add root validator as it's parent
                    validator = validator.add_root_parent()
                if not validator.validate(config[keyword]):
                    for msg in validator.errors.messages:
                        validate_errors.append('%s %s' % (keyword, msg))
            else:
                log.warning('Used plugin %s does not support validating. Please notify author!' % keyword)

        return validate_errors
Example #37
0
class Task(object):

    """
    Represents one task in the configuration.

    **Fires events:**

    * task.execute.before_plugin

      Before a plugin is about to be executed. Note that since this will also include all
      builtin plugins the amount of calls can be quite high

      ``parameters: task, keyword``

    * task.execute.after_plugin

      After a plugin has been executed.

      ``parameters: task, keyword``

    * task.execute.completed

      After task execution has been completed

      ``parameters: task``

    """

    max_reruns = 5

    def __init__(self, manager, name, config=None, options=None):
        """
        :param Manager manager: Manager instance.
        :param string name: Name of the task.
        :param dict config: Task configuration.
        """
        self.name = unicode(name)
        self.manager = manager
        # raw_config should remain the untouched input config
        if config is None:
            config = manager.config['tasks'].get(name, {})
        self.config = copy.deepcopy(config)
        self.prepared_config = None
        if options is None:
            options = copy.copy(self.manager.options.execute)
        elif isinstance(options, dict):
            options_namespace = copy.copy(self.manager.options.execute)
            options_namespace.__dict__.update(options)
            options = options_namespace
        self.options = options

        # simple persistence
        self.simple_persistence = SimpleTaskPersistence(self)

        # not to be reset
        self._rerun_count = 0

        self.config_modified = None

        # use reset to init variables when creating
        self._reset()

    @property
    def undecided(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.undecided

    @property
    def failed(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.failed

    @property
    def rejected(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.rejected

    @property
    def accepted(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.accepted

    @property
    def entries(self):
        """
        .. deprecated:: Use API v3
        """
        return self.all_entries.entries

    @property
    def all_entries(self):
        """
        .. deprecated:: Use API v3
        """
        return self._all_entries

    @property
    def is_rerun(self):
        return self._rerun_count

    # TODO: can we get rid of this now that Tasks are instantiated on demand?
    def _reset(self):
        """Reset task state"""
        log.debug('resetting %s' % self.name)
        self.enabled = not self.name.startswith('_')
        self.session = None
        self.priority = 65535

        self.requests = requests.Session()

        # List of all entries in the task
        self._all_entries = EntryContainer()

        self.disabled_phases = []

        # These are just to query what happened in task. Call task.abort to set.
        self.aborted = False
        self.abort_reason = None
        self.silent_abort = False

        self._rerun = False

        # current state
        self.current_phase = None
        self.current_plugin = None

    def __cmp__(self, other):
        return cmp(self.priority, other.priority)

    def __str__(self):
        return '<Task(name=%s,aborted=%s)>' % (self.name, self.aborted)

    def disable_phase(self, phase):
        """Disable ``phase`` from execution.

        All disabled phases are re-enabled by :meth:`Task._reset()` after task
        execution has been completed.

        :param string phase: Name of ``phase``
        :raises ValueError: *phase* could not be found.
        """
        if phase not in task_phases:
            raise ValueError('%s is not a valid phase' % phase)
        if phase not in self.disabled_phases:
            log.debug('Disabling %s phase' % phase)
            self.disabled_phases.append(phase)

    def abort(self, reason='Unknown', silent=False):
        """Abort this task execution, no more plugins will be executed except the abort handling ones."""
        self.aborted = True
        self.abort_reason = reason
        self.silent_abort = silent
        if not self.silent_abort:
            log.warning('Aborting task (plugin: %s)' % self.current_plugin)
        else:
            log.debug('Aborting task (plugin: %s)' % self.current_plugin)
        raise TaskAbort(reason, silent=silent)

    def find_entry(self, category='entries', **values):
        """
        Find and return :class:`~flexget.entry.Entry` with given attributes from task or None

        :param string category: entries, accepted, rejected or failed. Defaults to entries.
        :param values: Key values of entries to be searched
        :return: Entry or None
        """
        cat = getattr(self, category)
        if not isinstance(cat, EntryIterator):
            raise TypeError('category must be a EntryIterator')
        for entry in cat:
            for k, v in values.iteritems():
                if not (k in entry and entry[k] == v):
                    break
            else:
                return entry
        return None

    def plugins(self, phase=None):
        """Get currently enabled plugins.

        :param string phase:
          Optional, limits to plugins currently configured on given phase, sorted in phase order.
        :return:
          An iterator over configured :class:`flexget.plugin.PluginInfo` instances enabled on this task.
        """
        if phase:
            plugins = sorted(get_plugins(phase=phase), key=lambda p: p.phase_handlers[phase], reverse=True)
        else:
            plugins = all_plugins.itervalues()
        return (p for p in plugins if p.name in self.config or p.builtin)

    def __run_task_phase(self, phase):
        """Executes task phase, ie. call all enabled plugins on the task.

        Fires events:

        * task.execute.before_plugin
        * task.execute.after_plugin

        :param string phase: Name of the phase
        """
        if phase not in phase_methods:
            raise Exception('%s is not a valid task phase' % phase)
        # warn if no inputs, filters or outputs in the task
        if phase in ['input', 'filter', 'output']:
            if not self.manager.unit_test:
                # Check that there is at least one manually configured plugin for these phases
                for p in self.plugins(phase):
                    if not p.builtin:
                        break
                else:
                    log.warning('Task doesn\'t have any %s plugins, you should add (at least) one!' % phase)

        for plugin in self.plugins(phase):
            # Abort this phase if one of the plugins disables it
            if phase in self.disabled_phases:
                return
            # store execute info, except during entry events
            self.current_phase = phase
            self.current_plugin = plugin.name

            if plugin.api_ver == 1:
                # backwards compatibility
                # pass method only task (old behaviour)
                args = (self,)
            else:
                # pass method task, copy of config (so plugin cannot modify it)
                args = (self, copy.copy(self.config.get(plugin.name)))

            try:
                fire_event('task.execute.before_plugin', self, plugin.name)
                response = self.__run_plugin(plugin, phase, args)
                if phase == 'input' and response:
                    # add entries returned by input to self.all_entries
                    for e in response:
                        e.task = self
                    self.all_entries.extend(response)
            finally:
                fire_event('task.execute.after_plugin', self, plugin.name)

    def __run_plugin(self, plugin, phase, args=None, kwargs=None):
        """
        Execute given plugins phase method, with supplied args and kwargs.
        If plugin throws unexpected exceptions :meth:`abort` will be called.

        :param PluginInfo plugin: Plugin to be executed
        :param string phase: Name of the phase to be executed
        :param args: Passed to the plugin
        :param kwargs: Passed to the plugin
        """
        keyword = plugin.name
        method = plugin.phase_handlers[phase]
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}

        # log.trace('Running %s method %s' % (keyword, method))
        # call the plugin
        try:
            return method(*args, **kwargs)
        except TaskAbort:
            raise
        except PluginWarning as warn:
            # check if this warning should be logged only once (may keep repeating)
            if warn.kwargs.get('log_once', False):
                from flexget.utils.log import log_once
                log_once(warn.value, warn.log)
            else:
                warn.log.warning(warn)
        except EntryUnicodeError as eue:
            msg = ('Plugin %s tried to create non-unicode compatible entry (key: %s, value: %r)' %
                   (keyword, eue.key, eue.value))
            log.critical(msg)
            self.abort(msg)
        except PluginError as err:
            err.log.critical(err.value)
            self.abort(err.value)
        except DependencyError as e:
            msg = ('Plugin `%s` cannot be used because dependency `%s` is missing.' %
                   (keyword, e.missing))
            log.critical(msg)
            log.debug(e.message)
            self.abort(msg)
        except Warning as e:
            # If warnings have been elevated to errors
            msg = 'Warning during plugin %s: %s' % (keyword, e)
            log.exception(msg)
            self.abort(msg)
        except Exception as e:
            msg = 'BUG: Unhandled error in plugin %s: %s' % (keyword, e)
            log.exception(msg)
            self.abort(msg)

    def rerun(self):
        """Immediately re-run the task after execute has completed,
        task can be re-run up to :attr:`.max_reruns` times."""
        msg = 'Plugin %s has requested task to be ran again after execution has completed.' % self.current_plugin
        # Only print the first request for a rerun to the info log
        log.debug(msg) if self._rerun else log.info(msg)
        if self._rerun_count >= self.max_reruns:
            self._rerun = False
            log.info('Task has been re-run %s times already, stopping for now' % self._rerun_count)
            return
        self._rerun = True

    def config_changed(self):
        """
        Sets config_modified flag to True for the remainder of this run.
        Used when the db changes, and all entries need to be reprocessed.
        """
        self.config_modified = True

    @useTaskLogging
    def execute(self):
        """
        Executes the the task.

        If :attr:`.enabled` is False task is not executed. Certain :attr:`.options`
        affect how execution is handled.

        - :attr:`.options.disable_phases` is a list of phases that are not enabled
          for this execution.
        - :attr:`.options.inject` is a list of :class:`Entry` instances used instead
          of running input phase.
        """
        if not self.enabled:
            log.debug('Not running disabled task %s' % self.name)
        if self.options.cron:
            self.manager.db_cleanup()

        self._reset()
        log.debug('executing %s' % self.name)
        if not self.enabled:
            log.debug('task %s disabled during preparation, not running' % self.name)
            return

        # Handle keyword args
        if self.options.learn:
            log.info('Disabling download and output phases because of --learn')
            self.disable_phase('download')
            self.disable_phase('output')
        if self.options.disable_phases:
            map(self.disable_phase, self.options.disable_phases)
        if self.options.inject:
            # If entries are passed for this execution (eg. rerun), disable the input phase
            self.disable_phase('input')
            self.all_entries.extend(self.options.inject)

        log.debug('starting session')
        self.session = Session()

        # Save current config hash and set config_modidied flag
        config_hash = hashlib.md5(str(sorted(self.config.items()))).hexdigest()
        last_hash = self.session.query(TaskConfigHash).filter(TaskConfigHash.task == self.name).first()
        if self.is_rerun:
            # Restore the config to state right after start phase
            if self.prepared_config:
                self.config = copy.deepcopy(self.prepared_config)
            else:
                log.error('BUG: No prepared_config on rerun, please report.')
            self.config_modified = False
        elif not last_hash:
            self.config_modified = True
            last_hash = TaskConfigHash(task=self.name, hash=config_hash)
            self.session.add(last_hash)
        elif last_hash.hash != config_hash:
            self.config_modified = True
            last_hash.hash = config_hash
        else:
            self.config_modified = False

        # run phases
        try:
            for phase in task_phases:
                if phase in self.disabled_phases:
                    # log keywords not executed
                    for plugin in self.plugins(phase):
                        if plugin.name in self.config:
                            log.info('Plugin %s is not executed because %s phase is disabled (e.g. --test)' %
                                     (plugin.name, phase))
                    continue
                if phase == 'start' and self.is_rerun:
                    log.debug('skipping task_start during rerun')
                elif phase == 'exit' and self._rerun:
                    log.debug('not running task_exit yet because task will rerun')
                else:
                    # run all plugins with this phase
                    self.__run_task_phase(phase)
                    if phase == 'start':
                        # Store a copy of the config state after start phase to restore for reruns
                        self.prepared_config = copy.deepcopy(self.config)
        except TaskAbort:
            # Roll back the session before calling abort handlers
            self.session.rollback()
            try:
                self.__run_task_phase('abort')
                # Commit just the abort handler changes if no exceptions are raised there
                self.session.commit()
            except TaskAbort as e:
                log.exception('abort handlers aborted: %s' % e)
            raise
        else:
            for entry in self.all_entries:
                entry.complete()
            log.debug('committing session')
            self.session.commit()
            fire_event('task.execute.completed', self)
        finally:
            # this will cause database rollback on exception
            self.session.close()

        # rerun task
        if self._rerun:
            log.info('Rerunning the task in case better resolution can be achieved.')
            self._rerun_count += 1
            # TODO: Potential optimization is to take snapshots (maybe make the ones backlog uses built in instead of
            # taking another one) after input and just inject the same entries for the rerun
            self.execute()

    def __eq__(self, other):
        if hasattr(other, 'name'):
            return self.name == other.name
        return NotImplemented

    def __copy__(self):
        new = type(self)(self.manager, self.name, self.config, self.options)
        # Update all the variables of new instance to match our own
        new.__dict__.update(self.__dict__)
        # Some mutable objects need to be copies
        new.options = copy.copy(self.options)
        new.config = copy.deepcopy(self.config)
        return new

    copy = __copy__
Example #38
0
    def migrate2(self):
        session = Session()

        try:
            from progressbar import ProgressBar, Percentage, Bar, ETA
        except:
            print 'Critical: progressbar library not found, try running `bin/easy_install progressbar` ?'
            return

        class Seen(Base):

            __tablename__ = 'seen'

            id = Column(Integer, primary_key=True)
            field = Column(String)
            value = Column(String, index=True)
            task = Column('feed', String)
            added = Column(DateTime)

            def __init__(self, field, value, task):
                self.field = field
                self.value = value
                self.task = task
                self.added = datetime.now()

            def __str__(self):
                return '<Seen(%s=%s)>' % (self.field, self.value)

        print ''

        # REPAIR / REMOVE DUPLICATES
        index = 0
        removed = 0
        total = session.query(Seen).count() + 1

        widgets = ['Repairing - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')]
        bar = ProgressBar(widgets=widgets, maxval=total).start()

        for seen in session.query(Seen).all():
            index += 1
            if index % 10 == 0:
                bar.update(index)
            amount = 0
            for dupe in session.query(Seen).filter(Seen.value == seen.value):
                amount += 1
                if amount > 1:
                    removed += 1
                    session.delete(dupe)
        bar.finish()

        # MIGRATE
        total = session.query(Seen).count() + 1
        widgets = ['Upgrading - ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')]
        bar = ProgressBar(widgets=widgets, maxval=total).start()

        index = 0
        for seen in session.query(Seen).all():
            index += 1
            if not index % 10:
                bar.update(index)
            se = SeenEntry(u'N/A', seen.task, u'migrated')
            se.added = seen.added
            se.fields.append(SeenField(seen.field, seen.value))
            session.add(se)
        bar.finish()

        session.execute('drop table seen;')
        session.commit()
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None,
                 only_cached=False, session=None):
    """
    Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.
    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup

    """

    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title_parser.parse(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or imdb_id or title):
            raise PluginError('Failed to parse name from %s' % smart_match)

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str():
        return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug('Looking up rotten tomatoes information for %s' % id_str())

    movie = None

    # Try to lookup from cache
    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).\
            filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = session.query(RottenTomatoesAlternateId).\
            filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\
            filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first()
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            log.debug('No matches in movie cache found, checking search cache.')
            found = session.query(RottenTomatoesSearchResult).\
                filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                log.debug('Movie found in search cache.')
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str())
            try:
                imdb_alt_id = movie.alternate_ids and filter(
                    lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, 'imdb')
                else:
                    result = movies_info(movie.id)
                movie = _set_movie_details(movie, session, result)
                session.merge(movie)
            except URLError:
                log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.')
        else:
            log.debug('Movie %s information restored from cache.' % id_str())
    else:
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str())
        try:
            # Lookups using imdb_id
            # TODO: extract to method
            if imdb_id:
                log.debug('Using IMDB alias %s.' % imdb_id)
                result = movies_alias(imdb_id, 'imdb')
                if result:
                    mismatch = []
                    min_match = difflib.SequenceMatcher(lambda x: x == ' ',
                                                        re.sub('\s+\(.*\)$', '', result['title'].lower()),
                                                        title.lower()).ratio() < MIN_MATCH
                    if title and min_match:
                        mismatch.append('the title (%s <-?-> %s)' % (title, result['title']))
                    result['year'] = int(result['year'])
                    if year and fabs(result['year'] - year) > 1:
                        mismatch.append('the year (%s <-?-> %s)' % (year, result['year']))
                        release_year = None
                        if result.get('release_dates', {}).get('theater'):
                            log.debug('Checking year against theater release date')
                            release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the theater release (%s)' % release_year)
                        elif result.get('release_dates', {}).get('dvd'):
                            log.debug('Checking year against dvd release date')
                            release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the DVD release (%s)' % release_year)
                    if mismatch:
                        log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' %
                                    (imdb_id, ', or '.join(mismatch)))
                    else:
                        log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id']))
                        movie = session.query(RottenTomatoesMovie).\
                            filter(RottenTomatoesMovie.id == result.get('id')).first()
                        if movie:
                            log.debug('Movie %s was in database, but did not have the imdb_id stored, '
                                      'forcing an update' % movie)
                            movie = _set_movie_details(movie, session, result)
                            session.merge(movie)
                        else:
                            log.debug('%s was not in database, setting info.' % result['title'])
                            movie = RottenTomatoesMovie()
                            movie = _set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError('set_movie_details returned %s' % movie)
                            session.add(movie)
                else:
                    log.debug('IMDB alias %s returned no results.' % imdb_id)

            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = _set_movie_details(movie, session, result)
                    session.add(movie)

            if not movie and title:
                # TODO: Extract to method
                log.verbose('Searching from rt `%s`' % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(
                                lambda x: x == ' ', movie_res['title'].lower(), title.lower())
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year'):
                                movie_res['year'] = int(movie_res['year'])
                                if movie_res['year'] != year:
                                    release_year = False
                                    if movie_res.get('release_dates', {}).get('theater'):
                                        log.debug('Checking year against theater release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('theater'),
                                                                     '%Y-%m-%d').tm_year
                                    elif movie_res.get('release_dates', {}).get('dvd'):
                                        log.debug('Checking year against dvd release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('dvd'),
                                                                     '%Y-%m-%d').tm_year
                                    if not (release_year and release_year == year):
                                        log.debug('removing %s - %s (wrong year: %s)' %
                                                  (movie_res['title'], movie_res['id'],
                                                   str(release_year or movie_res['year'])))
                                        results.remove(movie_res)
                                        continue
                            if movie_res['match'] < MIN_MATCH:
                                log.debug('removing %s (min_match)' % movie_res['title'])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            log.debug('SUCCESS: only one movie remains')
                        else:
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                log.debug('unable to determine correct movie, min_diff too small'
                                          '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' %
                                          (results[0]['title'], results[0]['year'], results[0]['id'],
                                           results[1]['title'], results[1]['year'], results[1]['id']))
                                for r in results:
                                    log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id']))
                                raise PluginError('min_diff')

                        imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb')
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                        else:
                            result = movies_info(results[0].get('id'))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                        movie = _set_movie_details(movie, session, result)
                        if imdb_id and not filter(
                            lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'),
                                movie.alternate_ids):  # TODO: get rid of these confusing lambdas
                            log.warning('Adding flexget_imdb alternate id %s for movie %s' %
                                        (imdb_id, movie))
                            movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',
                                                                                 imdb_id.lstrip('t')))
                        session.add(movie)
                        session.commit()

                        if title.lower() != movie.title.lower():
                            log.debug('Saving search result for \'%s\'' % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' % id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']:
            getattr(movie, attr)
        session.commit()
        return movie
Example #40
0
class Task(object):

    """
    Represents one task in the configuration.

    **Fires events:**

    * task.execute.before_plugin

      Before a plugin is about to be executed. Note that since this will also include all
      builtin plugins the amount of calls can be quite high

      ``parameters: task, keyword``

    * task.execute.after_plugin

      After a plugin has been executed.

      ``parameters: task, keyword``

    * task.execute.completed

      After task execution has been completed

      ``parameters: task``

    """

    max_reruns = 5

    def __init__(self, manager, name, config):
        """
        :param Manager manager: Manager instance.
        :param string name: Name of the task.
        :param dict config: Task configuration.
        """
        self.name = unicode(name)
        self.config = config
        self.manager = manager

        # simple persistence
        self.simple_persistence = SimpleTaskPersistence(self)

        # not to be reset
        self._rerun_count = 0

        # This should not be used until after process_start, when it is evaluated
        self.config_modified = None

        # use reset to init variables when creating
        self._reset()

    # Make these read-only properties
    all_entries = property(lambda self: self._all_entries)
    entries = property(lambda self: self.all_entries.entries)
    accepted = property(lambda self: self.all_entries.accepted)
    rejected = property(lambda self: self.all_entries.rejected)
    failed = property(lambda self: self.all_entries.failed)

    @property
    def is_rerun(self):
        return self._rerun_count

    def _reset(self):
        """Reset task state"""
        log.debug('resetting %s' % self.name)
        self.enabled = True
        self.session = None
        self.priority = 65535

        self.requests = requests.Session()

        # List of all entries in the task
        self._all_entries = EntryContainer(task=self)

        self.disabled_phases = []

        # TODO: task.abort() should be done by using exception? not a flag that has to be checked everywhere
        self._abort = False
        self._abort_reason = None
        self._silent_abort = False

        self._rerun = False

        # current state
        self.current_phase = None
        self.current_plugin = None

    def __cmp__(self, other):
        return cmp(self.priority, other.priority)

    def __str__(self):
        return '<Task(name=%s,aborted=%s)>' % (self.name, str(self.aborted))

    @property
    def aborted(self):
        return self._abort and not self._silent_abort

    @property
    def abort_reason(self):
        return self._abort_reason

    @property
    def undecided(self):
        """Iterate over undecided entries"""
        return (entry for entry in self.entries if not entry in self.accepted and entry not in self.rejected)

    def disable_phase(self, phase):
        """Disable ``phase`` from execution.

        All disabled phases are re-enabled by :meth:`Task._reset()` after task
        execution has been completed.

        :param string phase: Name of ``phase``
        :raises ValueError: *phase* could not be found.
        """
        if phase not in task_phases:
            raise ValueError('%s is not a valid phase' % phase)
        if phase not in self.disabled_phases:
            log.debug('Disabling %s phase' % phase)
            self.disabled_phases.append(phase)

    def abort(self, reason='Unknown', **kwargs):
        """Abort this task execution, no more plugins will be executed after the current one exists."""
        if self._abort:
            return
        self._abort_reason = reason
        if not kwargs.get('silent', False):
            log.info('Aborting task (plugin: %s)' % self.current_plugin)
            self._silent_abort = False
        else:
            log.debug('Aborting task (plugin: %s)' % self.current_plugin)
            self._silent_abort = True
        # Run the abort phase before we set the _abort flag
        self._abort = True
        self.__run_task_phase('abort')

    def find_entry(self, category='entries', **values):
        """
        Find and return :class:`~flexget.entry.Entry` with given attributes from task or None

        :param string category: entries, accepted, rejected or failed. Defaults to entries.
        :param values: Key values of entries to be searched
        :return: Entry or None
        """
        cat = getattr(self, category)
        if not isinstance(cat, EntryIterator):
            raise TypeError('category must be a EntryIterator')
        for entry in cat:
            for k, v in values.iteritems():
                if not (k in entry and entry[k] == v):
                    break
            else:
                return entry
        return None

    def plugins(self, phase=None):
        """Get currently enabled plugins.

        :param string phase:
          Optional, limits to plugins currently configured on given phase, sorted in phase order.
        :return:
          An iterator over configured :class:`flexget.plugin.PluginInfo` instances enabled on this task.
        """
        if phase:
            plugins = sorted(get_plugins_by_phase(phase), key=lambda p: p.phase_handlers[phase], reverse=True)
        else:
            plugins = all_plugins.itervalues()
        return (p for p in plugins if p.name in self.config or p.builtin)

    def __run_task_phase(self, phase):
        """Executes task phase, ie. call all enabled plugins on the task.

        Fires events:

        * task.execute.before_plugin
        * task.execute.after_plugin

        :param string phase: Name of the phase
        """
        if phase not in task_phases + ['abort', 'process_start', 'process_end']:
            raise Exception('%s is not a valid task phase' % phase)
        # warn if no inputs, filters or outputs in the task
        if phase in ['input', 'filter', 'output']:
            if not self.manager.unit_test:
                # Check that there is at least one manually configured plugin for these phases
                for p in self.plugins(phase):
                    if not p.builtin:
                        break
                else:
                    log.warning('Task doesn\'t have any %s plugins, you should add (at least) one!' % phase)

        for plugin in self.plugins(phase):
            # Abort this phase if one of the plugins disables it
            if phase in self.disabled_phases:
                return
            # store execute info, except during entry events
            self.current_phase = phase
            self.current_plugin = plugin.name

            if plugin.api_ver == 1:
                # backwards compatibility
                # pass method only task (old behaviour)
                args = (self,)
            else:
                # pass method task, copy of config (so plugin cannot modify it)
                args = (self, copy.copy(self.config.get(plugin.name)))

            try:
                fire_event('task.execute.before_plugin', self, plugin.name)
                response = self.__run_plugin(plugin, phase, args)
                if phase == 'input' and response:
                    # add entries returned by input to self.entries
                    self.all_entries.extend(response)
            finally:
                fire_event('task.execute.after_plugin', self, plugin.name)

            # Make sure we abort if any plugin sets our abort flag
            if self._abort and phase != 'abort':
                return

    def _run_entry_phase(self, phase, entry, **kwargs):
        # TODO: entry events are not very elegant, refactor into real (new) events or something ...
        if phase not in ['accept', 'reject', 'fail']:
            raise Exception('Not a valid entry phase')
        phase_plugins = self.plugins(phase)
        for plugin in phase_plugins:
            self.__run_plugin(plugin, phase, (self, entry), kwargs)

    def __run_plugin(self, plugin, phase, args=None, kwargs=None):
        """
        Execute given plugins phase method, with supplied args and kwargs.
        If plugin throws unexpected exceptions :meth:`abort` will be called.

        :param PluginInfo plugin: Plugin to be executed
        :param string phase: Name of the phase to be executed
        :param args: Passed to the plugin
        :param kwargs: Passed to the plugin
        """
        keyword = plugin.name
        method = plugin.phase_handlers[phase]
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}

        # log.trace('Running %s method %s' % (keyword, method))
        # call the plugin
        try:
            return method(*args, **kwargs)
        except PluginWarning as warn:
            # check if this warning should be logged only once (may keep repeating)
            if warn.kwargs.get('log_once', False):
                from flexget.utils.log import log_once
                log_once(warn.value, warn.log)
            else:
                warn.log.warning(warn)
        except EntryUnicodeError as eue:
            msg = ('Plugin %s tried to create non-unicode compatible entry (key: %s, value: %r)' %
                   (keyword, eue.key, eue.value))
            log.critical(msg)
            self.abort(msg)
        except PluginError as err:
            err.log.critical(err.value)
            self.abort(err.value)
        except DependencyError as e:
            msg = ('Plugin `%s` cannot be used because dependency `%s` is missing.' %
                   (keyword, e.missing))
            log.critical(msg)
            log.debug(e.message)
            self.abort(msg)
        except Exception as e:
            msg = 'BUG: Unhandled error in plugin %s: %s' % (keyword, e)
            log.exception(msg)
            self.abort(msg)
            # don't handle plugin errors gracefully with unit test
            if self.manager.unit_test:
                raise

    def rerun(self):
        """Immediately re-run the task after execute has completed,
        task can be re-run up to :attr:`.max_reruns` times."""
        self._rerun = True
        log.info('Plugin %s has requested task to be ran again after execution has completed.' %
                 self.current_plugin)

    def config_changed(self):
        """Forces config_modified flag to come out true on next run. Used when the db changes, and all
        entries need to be reprocessed."""
        log.debug('Marking config as changed.')
        session = self.session or Session()
        task_hash = session.query(TaskConfigHash).filter(TaskConfigHash.task == self.name).first()
        if task_hash:
            task_hash.hash = ''
        self.config_modified = True
        # If we created our own session, commit and close it.
        if not self.session:
            session.commit()
            session.close()

    @useTaskLogging
    def execute(self, disable_phases=None, entries=None):
        """Executes the task.

        :param list disable_phases: Disable given phases names during execution
        :param list entries: Entries to be used in execution instead
            of using the input. Disables input phase.
        """

        log.debug('executing %s' % self.name)

        # Store original config state to be restored if a rerun is needed
        config_backup = copy.deepcopy(self.config)

        self._reset()
        # Handle keyword args
        if disable_phases:
            map(self.disable_phase, disable_phases)
        if entries:
            # If entries are passed for this execution (eg. rerun), disable the input phase
            self.disable_phase('input')
            self.all_entries.extend(entries)

        # validate configuration
        errors = self.validate()
        if self._abort:  # todo: bad practice
            return
        if errors and self.manager.unit_test:  # todo: bad practice
            raise Exception('configuration errors')
        if self.manager.options.validate:
            if not errors:
                log.info('Task \'%s\' passed' % self.name)
            self.enabled = False
            return

        log.debug('starting session')
        self.session = Session()

        # Save current config hash and set config_modidied flag
        config_hash = hashlib.md5(str(sorted(self.config.items()))).hexdigest()
        last_hash = self.session.query(TaskConfigHash).filter(TaskConfigHash.task == self.name).first()
        if self.is_rerun:
            # Make sure on rerun config is not marked as modified
            self.config_modified = False
        elif not last_hash:
            self.config_modified = True
            last_hash = TaskConfigHash(task=self.name, hash=config_hash)
            self.session.add(last_hash)
        elif last_hash.hash != config_hash:
            self.config_modified = True
            last_hash.hash = config_hash
        else:
            self.config_modified = False

        try:
            # run phases
            for phase in task_phases:
                if phase in self.disabled_phases:
                    # log keywords not executed
                    for plugin in self.plugins(phase):
                        if plugin.name in self.config:
                            log.info('Plugin %s is not executed because %s phase is disabled (e.g. --test)' %
                                     (plugin.name, phase))
                    continue

                # run all plugins with this phase
                self.__run_task_phase(phase)

                # if abort flag has been set task should be aborted now
                # since this calls return rerun will not be done
                if self._abort:
                    return

            log.debug('committing session, abort=%s' % self._abort)
            self.session.commit()
            fire_event('task.execute.completed', self)
        finally:
            # this will cause database rollback on exception and task.abort
            self.session.close()

        # rerun task
        if self._rerun:
            if self._rerun_count >= self.max_reruns:
                log.info('Task has been rerunning already %s times, stopping for now' % self._rerun_count)
                # reset the counter for future runs (necessary only with webui)
                self._rerun_count = 0
            else:
                log.info('Rerunning the task in case better resolution can be achieved.')
                self._rerun_count += 1
                # Restore config to original state before running again
                self.config = config_backup
                self.execute(disable_phases=disable_phases, entries=entries)

        # Clean up entries after the task has executed to reduce ram usage, #1652
        # TODO: This doesn't work with unified entries, not sure best replacement
        """if not self.manager.unit_test:
            log.debug('Clearing all entries from task.')
            self.entries = []
            self.rejected = []
            self.failed = []"""

    def _process_start(self):
        """Execute process_start phase"""
        self.__run_task_phase('process_start')

    def _process_end(self):
        """Execute terminate phase for this task"""
        if self.manager.options.validate:
            log.debug('No process_end phase with --check')
            return
        self.__run_task_phase('process_end')

    def validate(self):
        """Called during task execution. Validates config, prints errors and aborts task if invalid."""
        errors = self.validate_config(self.config)
        # log errors and abort
        if errors:
            log.critical('Task \'%s\' has configuration errors:' % self.name)
            for error in errors:
                log.error(error.error_with_path)
            # task has errors, abort it
            self.abort('\n'.join(e.error_with_path for e in errors))
        return errors

    @staticmethod
    def validate_config(config):
        schema = plugin_schemas(context='task')
        # Don't validate commented out plugins
        schema['patternProperties'] = {'^_': {}}
        validator = config_schema.SchemaValidator(schema)
        return validator.process_config(config)
Example #41
0
    def lookup(self, entry, search_allowed=True):
        """
        Perform imdb lookup for entry.

        :param entry: Entry instance
        :param search_allowed: Allow fallback to search
        :raises PluginError: Failure reason
        """

        from flexget.manager import manager

        if entry.get('imdb_url', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
        elif entry.get('imdb_id', eval_lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
        elif entry.get('title', eval_lazy=False):
            log.debug('lookup for %s' % entry['title'])
        else:
            raise PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.')

        session = Session()

        try:
            # entry sanity checks
            for field in ['imdb_votes', 'imdb_score']:
                if entry.get(field, eval_lazy=False):
                    value = entry[field]
                    if not isinstance(value, (int, float)):
                        raise PluginError('Entry field %s should be a number!' % field)

            # if imdb_id is included, build the url.
            if entry.get('imdb_id', eval_lazy=False) and not entry.get('imdb_url', eval_lazy=False):
                entry['imdb_url'] = make_url(entry['imdb_id'])

            # make sure imdb url is valid
            if entry.get('imdb_url', eval_lazy=False):
                imdb_id = extract_id(entry['imdb_url'])
                if imdb_id:
                    entry['imdb_url'] = make_url(imdb_id)
                else:
                    log.debug('imdb url %s is invalid, removing it' % entry['imdb_url'])
                    del(entry['imdb_url'])

            # no imdb_url, check if there is cached result for it or if the
            # search is known to fail
            if not entry.get('imdb_url', eval_lazy=False):
                result = session.query(SearchResult).\
                    filter(SearchResult.title == entry['title']).first()
                if result:
                    if result.fails and not manager.options.retry:
                        # this movie cannot be found, not worth trying again ...
                        log.debug('%s will fail lookup' % entry['title'])
                        raise PluginError('Title `%s` lookup fails' % entry['title'])
                    else:
                        if result.url:
                            log.trace('Setting imdb url for %s from db' % entry['title'])
                            entry['imdb_url'] = result.url

            # no imdb url, but information required, try searching
            if not entry.get('imdb_url', eval_lazy=False) and search_allowed:
                log.verbose('Searching from imdb `%s`' % entry['title'])

                search = ImdbSearch()
                search_name = entry.get('movie_name', entry['title'], eval_lazy=False)
                search_result = search.smart_match(search_name)
                if search_result:
                    entry['imdb_url'] = search_result['url']
                    # store url for this movie, so we don't have to search on
                    # every run
                    result = SearchResult(entry['title'], entry['imdb_url'])
                    session.add(result)
                    log.verbose('Found %s' % (entry['imdb_url']))
                else:
                    log_once('Imdb lookup failed for %s' % entry['title'], log)
                    # store FAIL for this title
                    result = SearchResult(entry['title'])
                    result.fails = True
                    session.add(result)
                    raise PluginError('Title `%s` lookup failed' % entry['title'])

            # check if this imdb page has been parsed & cached
            movie = session.query(Movie).\
                options(joinedload_all(Movie.genres),
                    joinedload_all(Movie.languages),
                    joinedload_all(Movie.actors),
                    joinedload_all(Movie.directors)).\
                filter(Movie.url == entry['imdb_url']).first()

            # determine whether or not movie details needs to be parsed
            req_parse = False
            if not movie:
                req_parse = True
            elif movie.expired:
                req_parse = True

            if req_parse:
                if movie is not None:
                    if movie.expired:
                        log.verbose('Movie `%s` details expired, refreshing ...' % movie.title)
                    # Remove the old movie, we'll store another one later.
                    session.query(MovieLanguage).filter(MovieLanguage.movie_id == movie.id).delete()
                    session.query(Movie).filter(Movie.url == entry['imdb_url']).delete()

                # search and store to cache
                if 'title' in entry:
                    log.verbose('Parsing imdb for `%s`' % entry['title'])
                else:
                    log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
                try:
                    movie = self._parse_new_movie(entry['imdb_url'], session)
                except UnicodeDecodeError:
                    log.error('Unable to determine encoding for %s. Installing chardet library may help.' %
                              entry['imdb_url'])
                    # store cache so this will not be tried again
                    movie = Movie()
                    movie.url = entry['imdb_url']
                    session.add(movie)
                    raise PluginError('UnicodeDecodeError')
                except ValueError as e:
                    # TODO: might be a little too broad catch, what was this for anyway? ;P
                    if manager.options.debug:
                        log.exception(e)
                    raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log)

            for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']:
                log.trace('movie.%s: %s' % (att, getattr(movie, att)))

            # store to entry
            entry.update_using_map(self.field_map, movie)
        finally:
            log.trace('committing session')
            session.commit()
Example #42
0
    def build(self, task):
        if not (plt and pd):
            logger.warning('Dependency does not exist: [matplotlib, pandas]')
            return
        if not task.accepted and not task.failed:
            return

        session = Session()

        columns = [
            'site',
            'downloaded',
            'uploaded',
            'share_ratio',
            'points',
            'seeding',
            'leeching',
            'hr',
        ]

        data = {'sort_column': [], 'default_order': []}

        total_details = {}

        total_changed = {}

        for column in columns:
            data[column] = []
            total_details[column] = 0
            total_changed[column] = 0

        order = len(task.all_entries)

        for entry in task.all_entries:
            data['default_order'].append(order)
            order = order - 1
            user_details_db = self._get_user_details(session,
                                                     entry['site_name'])
            if user_details_db is None:
                user_details_db = UserDetailsEntry(site=entry['site_name'],
                                                   downloaded=0,
                                                   uploaded=0,
                                                   share_ratio=0,
                                                   points=0,
                                                   seeding=0,
                                                   leeching=0,
                                                   hr=0)
                session.add(user_details_db)
                session.commit()
                user_details_db = self._get_user_details(
                    session, entry['site_name'])

            # failed
            if not entry.get('details'):
                for column in columns:
                    value = getattr(user_details_db, column)
                    if entry.failed:
                        data[column].append(
                            self.buid_data_text(column, value) + '*')
                    else:
                        data[column].append(self.buid_data_text(column, value))
                    if not entry.get('do_not_count') and column not in [
                            'site'
                    ]:
                        self.count(total_details, column, value)
                data['sort_column'].append(0)
                continue

            # now
            details_now = {}
            for key, value in entry['details'].items():
                details_now[key] = self.transfer_data(key, value)

            # changed
            details_changed = {}
            for key, value_now in details_now.items():
                if value_now != '*':
                    details_changed[key] = value_now - getattr(
                        user_details_db, key)
                else:
                    details_changed[key] = '*'
            if details_changed['uploaded'] == '*':
                data['sort_column'].append(0)
            else:
                data['sort_column'].append(details_changed['uploaded'])

            # append to data
            data['site'].append(entry['site_name'])
            for column in columns:
                if column == 'site':
                    continue
                data[column].append('{}{}'.format(
                    self.buid_data_text(column,
                                        getattr(user_details_db, column)),
                    self.buid_data_text(column,
                                        details_changed[column],
                                        append=True)))
                if total_details.get(column) is None:
                    total_details[column] = 0
                if total_changed.get(column) is None:
                    total_changed[column] = 0
                if not entry.get('do_not_count') and column not in [
                        'share_ratio', 'points'
                ]:
                    total_details[column] = total_details[column] + getattr(
                        user_details_db, column)
                    if details_changed[column] != '*':
                        total_changed[column] = total_changed[
                            column] + details_changed[column]

            # update db
            for key, value in details_now.items():
                if value != '*':
                    setattr(user_details_db, key, value)
            session.commit()

        data['site'].append('total')
        for column in columns:
            if column == 'site':
                continue
            data[column].append('{}{}'.format(
                self.buid_data_text(column, total_details[column]),
                self.buid_data_text(column, total_changed[column],
                                    append=True)))
        data['sort_column'].append(float('inf'))
        data['default_order'].append(float('inf'))
        df = pd.DataFrame(data)
        df.sort_values(by=['sort_column', 'default_order'],
                       ascending=False,
                       inplace=True)
        df.drop(columns=['sort_column', 'default_order'], inplace=True)
        line_count = len(data['site'])
        fig = plt.figure(figsize=(8, line_count / 1.8))
        plt.axis('off')
        colors = []
        for x in df.values:
            cc = []
            for y in x:
                if '-' in y and 'm-team' not in y:
                    cc.append('#f38181')
                elif '+' in y:
                    cc.append('#95e1d3')
                elif '*' in y:
                    cc.append('#eff48e')
                else:
                    cc.append('white')
            colors.append(cc)
        col_widths = [0.14, 0.16, 0.16, 0.14, 0.14, 0.1, 0.1, 0.06]
        table = plt.table(cellText=df.values,
                          cellColours=colors,
                          bbox=[0, 0, 1, 1],
                          colLabels=df.columns,
                          colWidths=col_widths,
                          loc='best')
        table.auto_set_font_size(False)
        table.set_fontsize(10)
        fig.tight_layout()
        plt.title(datetime.now().replace(microsecond=0))
        plt.savefig('details_report.png', bbox_inches='tight', dpi=300)
Example #43
0
    def migrate2(self):
        session = Session()

        try:
            from progressbar import ProgressBar, Percentage, Bar, ETA
        except:
            print 'Critical: progressbar library not found, try running `bin/easy_install progressbar` ?'
            return

        class Seen(Base):

            __tablename__ = 'seen'

            id = Column(Integer, primary_key=True)
            field = Column(String)
            value = Column(String, index=True)
            task = Column('feed', String)
            added = Column(DateTime)

            def __init__(self, field, value, task):
                self.field = field
                self.value = value
                self.task = task
                self.added = datetime.now()

            def __str__(self):
                return '<Seen(%s=%s)>' % (self.field, self.value)

        print ''

        # REPAIR / REMOVE DUPLICATES
        index = 0
        removed = 0
        total = session.query(Seen).count() + 1

        widgets = [
            'Repairing - ',
            ETA(), ' ',
            Percentage(), ' ',
            Bar(left='[', right=']')
        ]
        bar = ProgressBar(widgets=widgets, maxval=total).start()

        for seen in session.query(Seen).all():
            index += 1
            if index % 10 == 0:
                bar.update(index)
            amount = 0
            for dupe in session.query(Seen).filter(Seen.value == seen.value):
                amount += 1
                if amount > 1:
                    removed += 1
                    session.delete(dupe)
        bar.finish()

        # MIGRATE
        total = session.query(Seen).count() + 1
        widgets = [
            'Upgrading - ',
            ETA(), ' ',
            Percentage(), ' ',
            Bar(left='[', right=']')
        ]
        bar = ProgressBar(widgets=widgets, maxval=total).start()

        index = 0
        for seen in session.query(Seen).all():
            index += 1
            if not index % 10:
                bar.update(index)
            se = SeenEntry(u'N/A', seen.task, u'migrated')
            se.added = seen.added
            se.fields.append(SeenField(seen.field, seen.value))
            session.add(se)
        bar.finish()

        session.execute('drop table seen;')
        session.commit()
Example #44
0
    def lookup(self, entry, search_allowed=True):
        """Perform imdb lookup for entry.
        Raises PluginError with failure reason."""

        from flexget.manager import manager

        if entry.get('imdb_url', lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_url'])
        elif entry.get('imdb_id', lazy=False):
            log.debug('No title passed. Lookup for %s' % entry['imdb_id'])
        elif entry.get('title', lazy=False):
            log.debug('lookup for %s' % entry['title'])
        else:
            raise PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.')

        take_a_break = False
        session = Session()

        try:
            # entry sanity checks
            for field in ['imdb_votes', 'imdb_score']:
                if entry.get(field, lazy=False):
                    value = entry[field]
                    if not isinstance(value, (int, float)):
                        raise PluginError('Entry field %s should be a number!' % field)

            # if imdb_id is included, build the url.
            if entry.get('imdb_id', lazy=False) and not entry.get('imdb_url', lazy=False):
                entry['imdb_url'] = make_url(entry['imdb_id'])

            # make sure imdb url is valid
            if entry.get('imdb_url', lazy=False):
                imdb_id = extract_id(entry['imdb_url'])
                if imdb_id:
                    entry['imdb_url'] = make_url(imdb_id)
                else:
                    log.debug('imdb url %s is invalid, removing it' % entry['imdb_url'])
                    del(entry['imdb_url'])

            # no imdb_url, check if there is cached result for it or if the
            # search is known to fail
            if not entry.get('imdb_url', lazy=False):
                result = session.query(SearchResult).\
                         filter(SearchResult.title == entry['title']).first()
                if result:
                    if result.fails and not manager.options.retry:
                        # this movie cannot be found, not worth trying again ...
                        log.debug('%s will fail lookup' % entry['title'])
                        raise PluginError('Title `%s` lookup fails' % entry['title'])
                    else:
                        if result.url:
                            log.trace('Setting imdb url for %s from db' % entry['title'])
                            entry['imdb_url'] = result.url

            # no imdb url, but information required, try searching
            if not entry.get('imdb_url', lazy=False) and search_allowed:
                log.verbose('Searching from imdb `%s`' % entry['title'])

                take_a_break = True
                search = ImdbSearch()
                search_result = search.smart_match(entry['title'])
                if search_result:
                    entry['imdb_url'] = search_result['url']
                    # store url for this movie, so we don't have to search on
                    # every run
                    result = SearchResult(entry['title'], entry['imdb_url'])
                    session.add(result)
                    log.verbose('Found %s' % (entry['imdb_url']))
                else:
                    log_once('Imdb lookup failed for %s' % entry['title'], log)
                    # store FAIL for this title
                    result = SearchResult(entry['title'])
                    result.fails = True
                    session.add(result)
                    raise PluginError('Title `%s` lookup failed' % entry['title'])


            # check if this imdb page has been parsed & cached
            movie = session.query(Movie).\
                options(joinedload_all(Movie.genres, Movie.languages,
                Movie.actors, Movie.directors)).\
                filter(Movie.url == entry['imdb_url']).first()

            refresh_interval = 2
            if movie:
                if movie.year:
                    age = (datetime.now().year - movie.year)
                    refresh_interval += age * 5
                    log.debug('cached movie `%s` age %i refresh interval %i days' % (movie.title, age, refresh_interval))

            if not movie or movie.updated is None or \
               movie.updated < datetime.now() - timedelta(days=refresh_interval):
                # Remove the old movie, we'll store another one later.
                session.query(Movie).filter(Movie.url == entry['imdb_url']).delete()
                # search and store to cache
                if 'title' in entry:
                    log.verbose('Parsing imdb for `%s`' % entry['title'])
                else:
                    log.verbose('Parsing imdb for `%s`' % entry['imdb_id'])
                try:
                    take_a_break = True
                    imdb = ImdbParser()
                    imdb.parse(entry['imdb_url'])
                    # store to database
                    movie = Movie()
                    movie.photo = imdb.photo
                    movie.title = imdb.name
                    movie.score = imdb.score
                    movie.votes = imdb.votes
                    movie.year = imdb.year
                    movie.mpaa_rating = imdb.mpaa_rating
                    movie.plot_outline = imdb.plot_outline
                    movie.url = entry['imdb_url']
                    for name in imdb.genres:
                        genre = session.query(Genre).\
                            filter(Genre.name == name).first()
                        if not genre:
                            genre = Genre(name)
                        movie.genres.append(genre) # pylint:disable=E1101
                    for name in imdb.languages:
                        language = session.query(Language).\
                            filter(Language.name == name).first()
                        if not language:
                            language = Language(name)
                        movie.languages.append(language) # pylint:disable=E1101
                    for imdb_id, name in imdb.actors.iteritems():
                        actor = session.query(Actor).\
                            filter(Actor.imdb_id == imdb_id).first()
                        if not actor:
                            actor = Actor(imdb_id, name)
                        movie.actors.append(actor) # pylint:disable=E1101
                    for imdb_id, name in imdb.directors.iteritems():
                        director = session.query(Director).\
                            filter(Director.imdb_id == imdb_id).first()
                        if not director:
                            director = Director(imdb_id, name)
                        movie.directors.append(director) # pylint:disable=E1101
                    # so that we can track how long since we've updated the info later
                    movie.updated = datetime.now()
                    session.add(movie)

                except UnicodeDecodeError:
                    log.error('Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url'])
                    # store cache so this will not be tried again
                    movie = Movie()
                    movie.url = entry['imdb_url']
                    session.add(movie)
                    raise PluginError('UnicodeDecodeError')
                except ValueError, e:
                    # TODO: might be a little too broad catch, what was this for anyway? ;P
                    if manager.options.debug:
                        log.exception(e)
                    raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log)

            for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']:
                log.trace('movie.%s: %s' % (att, getattr(movie, att)))

            # store to entry
            entry.update_using_map(self.field_map, movie)

            # give imdb a little break between requests (see: http://flexget.com/ticket/129#comment:1)
            if (take_a_break and
                not manager.options.debug and
                not manager.unit_test):
                import time
                time.sleep(3)
Example #45
0
def lookup_movie(title=None, year=None, rottentomatoes_id=None, smart_match=None,
                 only_cached=False, session=None):
    """
    Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.
    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param string title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup

    """

    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title_parser.parse(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or title):
            raise PluginError('Failed to parse name from %s' % smart_match)

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not rottentomatoes_id:
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str():
        return '<title=%s,year=%s,rottentomatoes_id=%s>' % (title, year, rottentomatoes_id)

    if not session:
        session = Session()

    log.debug('Looking up rotten tomatoes information for %s' % id_str())

    movie = None

    # Try to lookup from cache
    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).\
            filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            log.debug('No matches in movie cache found, checking search cache.')
            found = session.query(RottenTomatoesSearchResult).\
                filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                log.debug('Movie found in search cache.')
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str())
            try:
                result = movies_info(movie.id)
                movie = _set_movie_details(movie, session, result)
                session.merge(movie)
            except URLError:
                log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.')
        else:
            log.debug('Movie %s information restored from cache.' % id_str())
    else:
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str())
        try:
            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = _set_movie_details(movie, session, result)
                    session.add(movie)

            if not movie and title:
                # TODO: Extract to method
                log.verbose('Searching from rt `%s`' % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(
                                lambda x: x == ' ', movie_res['title'].lower(), title.lower())
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year'):
                                movie_res['year'] = int(movie_res['year'])
                                if movie_res['year'] != year:
                                    release_year = False
                                    if movie_res.get('release_dates', {}).get('theater'):
                                        log.debug('Checking year against theater release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('theater'),
                                                                     '%Y-%m-%d').tm_year
                                    elif movie_res.get('release_dates', {}).get('dvd'):
                                        log.debug('Checking year against dvd release date')
                                        release_year = time.strptime(movie_res['release_dates'].get('dvd'),
                                                                     '%Y-%m-%d').tm_year
                                    if not (release_year and release_year == year):
                                        log.debug('removing %s - %s (wrong year: %s)' %
                                                  (movie_res['title'], movie_res['id'],
                                                   str(release_year or movie_res['year'])))
                                        results.remove(movie_res)
                                        continue
                            if movie_res['match'] < MIN_MATCH:
                                log.debug('removing %s (min_match)' % movie_res['title'])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            log.debug('SUCCESS: only one movie remains')
                        else:
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                log.debug('unable to determine correct movie, min_diff too small'
                                          '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' %
                                          (results[0]['title'], results[0]['year'], results[0]['id'],
                                           results[1]['title'], results[1]['year'], results[1]['id']))
                                for r in results:
                                    log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id']))
                                raise PluginError('min_diff')

                        result = movies_info(results[0].get('id'))

                        if not result:
                            result = results[0]

                        movie = session.query(RottenTomatoesMovie).filter(
                            RottenTomatoesMovie.id == result['id']).first()

                        if not movie:
                            movie = RottenTomatoesMovie()
                            movie = _set_movie_details(movie, session, result)
                            session.add(movie)
                            session.commit()


                        if title.lower() != movie.title.lower():
                            log.debug('Saving search result for \'%s\'' % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' % id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']:
            getattr(movie, attr)
        session.commit()
        return movie
Example #46
0
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None):
    """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments.

    Any combination of criteria can be passed, the most specific criteria specified will be used.

    :param rottentomatoes_id: rottentomatoes_id of desired movie
    :param imdb_id: imdb_id of desired movie
    :param title: title of desired movie
    :param year: release year of desired movie
    :param smart_match: attempt to clean and parse title and year from a string
    :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache
    :param session: optionally specify a session to use, if specified, returned Movie will be live in that session
    :returns: The Movie object populated with data from Rotten Tomatoes
    :raises: PluginError if a match cannot be found or there are other problems with the lookup

    """

    if smart_match:
        # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year
        title_parser = MovieParser()
        title_parser.parse(smart_match)
        title = title_parser.name
        year = title_parser.year
        if title == '' and not (rottentomatoes_id or imdb_id or title):
            raise PluginError('Failed to parse name from %s' % raw_name)

    if title:
        search_string = title.lower()
        if year:
            search_string = '%s %s' % (search_string, year)
    elif not (rottentomatoes_id or imdb_id):
        raise PluginError('No criteria specified for rotten tomatoes lookup')

    def id_str():
        return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id)

    if not session:
        session = Session()

    log.debug('Looking up rotten tomatoes information for %s' % id_str())

    movie = None

    if rottentomatoes_id:
        movie = session.query(RottenTomatoesMovie).\
                filter(RottenTomatoesMovie.id == rottentomatoes_id).first()
    if not movie and imdb_id:
        alt_id = session.query(RottenTomatoesAlternateId).\
                filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\
                filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first()
        if alt_id:
            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first()
    if not movie and title:
        movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower())
        if year:
            movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year)
        movie = movie_filter.first()
        if not movie:
            found = session.query(RottenTomatoesSearchResult). \
                    filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first()
            if found and found.movie:
                movie = found.movie
    if movie:
        # Movie found in cache, check if cache has expired.
        if movie.expired and not only_cached:
            log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str())
            try:
                imdb_alt_id = movie.alternate_ids and filter(lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id
                if imdb_alt_id:
                    result = movies_alias(imdb_alt_id, 'imdb')
                else:
                    result = movies_info(movie.id)
                movie = set_movie_details(movie, session, result)
                session.merge(movie)
            except URLError:
                log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.')
        else:
            log.debug('Movie %s information restored from cache.' % id_str())
    else:
        if only_cached:
            raise PluginError('Movie %s not found from cache' % id_str())
        # There was no movie found in the cache, do a lookup from Rotten Tomatoes
        log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str())
        try:
            # Lookups using imdb_id
            if imdb_id:
                log.debug('Using IMDB alias %s.' % imdb_id)
                result = movies_alias(imdb_id, 'imdb')
                if result:
                    mismatch = []
                    if title and difflib.SequenceMatcher(lambda x: x == ' ', re.sub('\s+\(.*\)$', '', result['title'].lower()),
                            title.lower()).ratio() < MIN_MATCH:
                        mismatch.append('the title (%s <-?-> %s)' % (title, result['title']))
                    if year and fabs(result['year'] - year) > 1:
                        mismatch.append('the year (%s <-?-> %s)' % (year, result['year']))
                        release_year = None
                        if result.get('release_dates', {}).get('theater'):
                            log.debug('Checking year against theater release date')
                            release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the theater release (%s)' % release_year)
                        elif result.get('release_dates', {}).get('dvd'):
                            log.debug('Checking year against dvd release date')
                            release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year
                            if fabs(release_year - year) > 1:
                                mismatch.append('the DVD release (%s)' % release_year)
                    if mismatch:
                        log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' % \
                            (imdb_id, ', or '.join(mismatch)))
                    else:
                        log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id']))
                        movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result.get('id')).first()
                        if movie:
                            log.debug('Movie %s was in database, but did not have the imdb_id stored, '
                                    'forcing an update' % movie)
                            movie = set_movie_details(movie, session, result)
                            session.merge(movie)
                        else:
                            log.debug('%s was not in database, setting info.' % result['title'])
                            movie = RottenTomatoesMovie()
                            movie = set_movie_details(movie, session, result)
                            if not movie:
                                raise PluginError('set_movie_details returned %s' % movie)
                            session.add(movie)
                else:
                    log.debug('IMDB alias %s returned no results.' % imdb_id)
            if not movie and rottentomatoes_id:
                result = movies_info(rottentomatoes_id)
                if result:
                    movie = RottenTomatoesMovie()
                    movie = set_movie_details(movie, session, result)
                    session.add(movie)
            if not movie and title:
                log.verbose('Searching from rt `%s`' % search_string)
                results = movies_search(search_string)
                if results:
                    results = results.get('movies')
                    if results:
                        for movie_res in results:
                            seq = difflib.SequenceMatcher(lambda x: x == ' ',
                                    movie_res['title'].lower(), title.lower())
                            movie_res['match'] = seq.ratio()
                        results.sort(key=lambda x: x['match'], reverse=True)

                        # Remove all movies below MIN_MATCH, and different year
                        for movie_res in results[:]:

                            if year and movie_res.get('year') and movie_res['year'] != year:
                                release_year = False
                                if movie_res.get('release_dates', {}).get('theater'):
                                    log.debug('Checking year against theater release date')
                                    release_year = time.strptime(movie_res['release_dates'].get('theater'), '%Y-%m-%d').tm_year
                                elif movie_res.get('release_dates', {}).get('dvd'):
                                    log.debug('Checking year against dvd release date')
                                    release_year = time.strptime(movie_res['release_dates'].get('dvd'), '%Y-%m-%d').tm_year
                                if not (release_year and release_year == year):
                                    log.debug('removing %s - %s (wrong year: %s)' % (movie_res['title'],
                                        movie_res['id'], str(release_year or movie_res['year'])))
                                    results.remove(movie_res)
                                    continue
                            if movie_res['match'] < MIN_MATCH:
                                log.debug('removing %s (min_match)' % movie_res['title'])
                                results.remove(movie_res)
                                continue

                        if not results:
                            raise PluginError('no appropiate results')

                        if len(results) == 1:
                            log.debug('SUCCESS: only one movie remains')
                        else:
                            # Check min difference between best two hits
                            diff = results[0]['match'] - results[1]['match']
                            if diff < MIN_DIFF:
                                log.debug('unable to determine correct movie, min_diff too small'
                                        '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' %
                                        (results[0]['title'], results[0]['year'], results[0]['id'],
                                            results[1]['title'], results[1]['year'], results[1]['id']))
                                for r in results:
                                    log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'],
                                        r['id']))
                                raise PluginError('min_diff')

                        imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb')
                        if imdb_alt_id:
                            result = movies_alias(imdb_alt_id)
                        else:
                            result = movies_info(results[0].get('id'))

                        if not result:
                            result = results[0]

                        movie = RottenTomatoesMovie()
                        try:
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and
                                    alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids):
                                log.warning('Adding flexget_imdb alternate id %s for movie %s' %
                                        (imdb_id, movie))
                                movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\
                                        imdb_id.lstrip('t')))
                            session.add(movie)
                        except IntegrityError:
                            log.warning('Found movie %s in database after search even though we '
                                'already looked, updating it with search result.' % movie)
                            session.rollback()
                            movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result['id']).first()
                            movie = set_movie_details(movie, session, result)
                            if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and
                                    alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids):
                                log.warning('Adding flexget_imdb alternate id %s for movie %s' %
                                        (imdb_id, movie))
                                movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\
                                        imdb_id.lstrip('t')))
                            session.merge(movie)

                        if title.lower() != movie.title.lower():
                            log.debug('Saving search result for \'%s\'' % search_string)
                            session.add(RottenTomatoesSearchResult(search=search_string, movie=movie))
        except URLError:
            raise PluginError('Error looking up movie from RottenTomatoes')

    if not movie:
        raise PluginError('No results found from rotten tomatoes for %s' % id_str())
    else:
        # Access attributes to force the relationships to eager load before we detach from session
        for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']:
            getattr(movie, attr)
        session.commit()
        return movie