def on_feed_exit(self, feed, config): """Add downloaded movies to the database""" log.debug('check for learning') for entry in feed.accepted: if 'imdb_id' not in entry: log.debug('`%s` does not have imdb_id' % entry['title']) continue parser = MovieParser() parser.data = entry['title'] parser.parse() quality = parser.quality.name log.debug('quality: %s' % quality) log.debug('imdb_id: %s' % entry['imdb_id']) log.debug('proper count: %s' % parser.proper_count) proper_movie = feed.session.query(ProperMovie).\ filter(ProperMovie.imdb_id == entry['imdb_id']).\ filter(ProperMovie.quality == quality).\ filter(ProperMovie.proper_count == parser.proper_count).first() if not proper_movie: pm = ProperMovie() pm.title = entry['title'] pm.feed = feed.name pm.imdb_id = entry['imdb_id'] pm.quality = quality pm.proper_count = parser.proper_count feed.session.add(pm) log.debug('added %s' % pm) else: log.debug('%s already exists' % proper_movie)
def on_task_output(self, task, config): """Add downloaded movies to the database""" log.debug('check for learning') for entry in task.accepted: if 'imdb_id' not in entry: log.debug('`%s` does not have imdb_id' % entry['title']) continue parser = MovieParser() parser.data = entry['title'] parser.parse() quality = parser.quality.name log.debug('quality: %s' % quality) log.debug('imdb_id: %s' % entry['imdb_id']) log.debug('proper count: %s' % parser.proper_count) proper_movie = task.session.query(ProperMovie).\ filter(ProperMovie.imdb_id == entry['imdb_id']).\ filter(ProperMovie.quality == quality).\ filter(ProperMovie.proper_count == parser.proper_count).first() if not proper_movie: pm = ProperMovie() pm.title = entry['title'] pm.task = task.name pm.imdb_id = entry['imdb_id'] pm.quality = quality pm.proper_count = parser.proper_count task.session.add(pm) log.debug('added %s' % pm) else: log.debug('%s already exists' % proper_movie)
class MovieComparator(StringComparator): """Compares two strings for similarity based on extracted movie title, year and quality.""" def __init__(self): self.a_year, self.b_year = None, None self.a_quality, self.b_quality = qualities.Quality( ), qualities.Quality() self.parser = MovieParser() super(MovieComparator, self).__init__(cutoff=0.9) def set_seq1(self, a): """Set first string for comparison.""" self.parser.parse(a) super(MovieComparator, self).set_seq1(self.parser.name) self.a_year = self.parser.year self.a_quality = self.parser.quality def set_seq2(self, b): """Set second string for comparison.""" self.parser.parse(b) super(MovieComparator, self).set_seq2(self.parser.name) self.b_year = self.parser.year self.b_quality = self.parser.quality def matches(self, other=None): """Compare the two strings, return True if they appear to be the same movie. :param other: String to compare against. If not specified, last specified string will be used. :return: True if match is close enough. """ result = super(MovieComparator, self).matches(other) if self.a_quality: if self.a_quality != self.b_quality: return False if self.a_year and self.b_year: if self.a_year != self.b_year: # TODO: Make this fuzzier? tmdb and imdb years do not always match return False return result def search_string(self): """Return a cleaned string based on seq1 that can be used for searching.""" result = self.a if isinstance(result, unicode): # Convert to combined form for better search results result = normalize('NFC', result) if self.a_year: result += ' %s' % self.a_year if self.a_quality: # Shorten some quality strings in search because of multiple acceptable forms if '720p' in self.a_quality.name: result += ' 720p' elif '1080p' in self.a_quality.name: result += ' 1080p' else: result += ' %s' % self.a_quality return result
class MovieComparator(StringComparator): """Compares two strings for similarity based on extracted movie title, year and quality.""" def __init__(self): self.a_year, self.b_year = None, None self.a_quality, self.b_quality = qualities.UNKNOWN, qualities.UNKNOWN self.parser = MovieParser() super(MovieComparator, self).__init__(cutoff=0.9) def set_seq1(self, a): """Set first string for comparison.""" self.parser.parse(a) super(MovieComparator, self).set_seq1(self.parser.name) self.a_year = self.parser.year self.a_quality = self.parser.quality def set_seq2(self, b): """Set second string for comparison.""" self.parser.parse(b) super(MovieComparator, self).set_seq2(self.parser.name) self.b_year = self.parser.year self.b_quality = self.parser.quality def matches(self, other=None): """Compare the two strings, return True if they appear to be the same movie. :param other: String to compare against. If not specified, last specified string will be used. :return: True if match is close enough. """ result = super(MovieComparator, self).matches(other) if self.a_quality > qualities.UNKNOWN: if self.a_quality != self.b_quality: return False if self.a_year and self.b_year: if self.a_year != self.b_year: # TODO: Make this fuzzier? tmdb and imdb years do not always match return False return result def search_string(self): """Return a cleaned string based on seq1 that can be used for searching.""" result = self.a if isinstance(result, unicode): # Convert to combined form for better search results result = normalize('NFC', result) if self.a_year: result += ' %s' % self.a_year if self.a_quality > qualities.UNKNOWN: # Shorten some quality strings in search because of multiple acceptable forms if '720p' in self.a_quality.name: result += ' 720p' elif '1080p' in self.a_quality.name: result += ' 1080p' else: result += ' %s' % self.a_quality return result
def parse_movie(self, data, **kwargs): log.debug('Parsing movie: `%s` kwargs: %s', data, kwargs) start = time.clock() parser = MovieParser() try: parser.parse(data) except ParseWarning as pw: log_once(pw.value, logger=log) end = time.clock() log.debug('Parsing result: %s (in %s ms)', parser, (end - start) * 1000) return parser
def smart_match(self, raw_name): """Accepts messy name, cleans it and uses information available to make smartest and best match""" from flexget.utils.titles.movie import MovieParser parser = MovieParser() parser.data = raw_name parser.parse() name = parser.name year = parser.year if name == '': log.critical('Failed to parse name from %s' % raw_name) return None log.debug('smart_match name=%s year=%s' % (name, str(year))) return self.best_match(name, year)
def parse_movie(self, data, **kwargs): log.debug('Parsing movie: `%s` kwargs: %s', data, kwargs) start = time.clock() parser = MovieParser() try: parser.parse(data) except ParseWarning as pw: log_once(pw.value, logger=log) result = MovieParseResult(data=data, name=parser.name, year=parser.year, quality=parser.quality, proper_count=parser.proper_count) end = time.clock() log.debug('Parsing result: %s (in %s ms)', parser, (end - start) * 1000) return result
def parse_movie(self, data, **kwargs): log.debug('Parsing movie: `%s` kwargs: %s', data, kwargs) start = preferred_clock() parser = MovieParser() try: parser.parse(data) except ParseWarning as pw: log_once(pw.value, logger=log) result = MovieParseResult( data=data, name=parser.name, year=parser.year, quality=parser.quality, proper_count=parser.proper_count, valid=bool(parser.name) ) log.debug('Parsing result: %s (in %s ms)', parser, (preferred_clock() - start) * 1000) return result
def parse(self, data, type_=None, name=None, **kwargs): internal_parser = None if kwargs is None: kwargs = {} if name: kwargs['name'] = name metainfo = False try: metainfo = kwargs.pop('metainfo') except KeyError: pass if metainfo and type_ == PARSER_EPISODE: internal_parser = SeriesParser(**kwargs) internal_parser.assume_quality = types.MethodType( assume_quality_func, internal_parser) if self.parse_serie(internal_parser, data): return internal_parser else: type_ == PARSER_MOVIE if type_ == PARSER_EPISODE: internal_parser = SeriesParser(**kwargs) elif type_ == PARSER_MOVIE: internal_parser = MovieParser() else: internal_parser = MovieParser() internal_parser.assume_quality = types.MethodType( assume_quality_func, internal_parser) internal_parser.parse(data) return internal_parser
parser.data = cur_filename parser.parse log.debug(parser.id_type) if parser.id_type == 'ep': entry['series_id'] = ''.join([ 'S', str(parser.season).rjust(2, str('0')), 'E', str(parser.episode).rjust(2, str('0')) ]) elif parser.id_type == 'sequence': entry['series_id'] = parser.episode elif parser.id_type and parser.id: entry['series_id'] = parser.id else: from flexget.utils.titles.movie import MovieParser parser = MovieParser() parser.data = cur_filename parser.parse() log.info(parser) testname = parser.name testyear = parser.year parser.data = entry['title'] parser.parse() log.info(parser) if len(parser.name) > len(testname): entry['name'] = parser.name entry['movie_name'] = parser.name else: entry['name'] = testname entry['movie_name'] = testname if parser.year:
def on_task_filter(self, task, config): if not task.accepted: log.debug('nothing accepted, aborting') return config = self.build_config(config) imdb_lookup = plugin.get_plugin_by_name('imdb_lookup').instance incompatible_dirs = 0 incompatible_entries = 0 count_entries = 0 count_dirs = 0 # list of imdb ids gathered from paths / cache imdb_ids = [] for folder in config: folder = path(folder).expanduser() # see if this path has already been scanned if folder in self.cache: log.verbose('Using cached scan for %s ...' % folder) imdb_ids.extend(self.cache[folder]) continue path_ids = [] if not folder.isdir(): log.critical('Path %s does not exist' % folder) continue log.verbose('Scanning path %s ...' % folder) # Help debugging by removing a lot of noise #logging.getLogger('movieparser').setLevel(logging.WARNING) #logging.getLogger('imdb_lookup').setLevel(logging.WARNING) # scan through # TODO: add also video files? for item in folder.walkdirs(errors='warn'): if item.name.lower() in self.skip: continue count_dirs += 1 movie = MovieParser() movie.parse(item.name) try: imdb_id = imdb_lookup.imdb_id_lookup(movie_title=movie.name, raw_title=item.name, session=task.session) if imdb_id in path_ids: log.trace('duplicate %s' % item) continue if imdb_id is not None: log.trace('adding: %s' % imdb_id) path_ids.append(imdb_id) except plugin.PluginError as e: log.trace('%s lookup failed (%s)' % (item, e.value)) incompatible_dirs += 1 # store to cache and extend to found list self.cache[folder] = path_ids imdb_ids.extend(path_ids) log.debug('-- Start filtering entries ----------------------------------') # do actual filtering for entry in task.accepted: count_entries += 1 if not entry.get('imdb_id', eval_lazy=False): try: imdb_lookup.lookup(entry) except plugin.PluginError as e: log.trace('entry %s imdb failed (%s)' % (entry['title'], e.value)) incompatible_entries += 1 continue # actual filtering if entry['imdb_id'] in imdb_ids: entry.reject('movie exists') if incompatible_dirs or incompatible_entries: log.verbose('There were some incompatible items. %s of %s entries ' 'and %s of %s directories could not be verified.' % (incompatible_entries, count_entries, incompatible_dirs, count_dirs)) log.debug('-- Finished filtering entries -------------------------------')
def __init__(self): self.a_year, self.b_year = None, None self.a_quality, self.b_quality = qualities.UNKNOWN, qualities.UNKNOWN self.parser = MovieParser() super(MovieComparator, self).__init__(cutoff=0.9)
def on_task_filter(self, task, config): if not task.accepted: log.debug('nothing accepted, aborting') return config = self.build_config(config) imdb_lookup = get_plugin_by_name('imdb_lookup').instance incompatible_dirs = 0 incompatible_entries = 0 count_entries = 0 count_dirs = 0 # list of imdb ids gathered from paths / cache imdb_ids = [] for path in config: # see if this path has already been scanned if path in self.cache: log.verbose('Using cached scan for %s ...' % path) imdb_ids.extend(self.cache[path]) continue path_ids = [] # with unicode it crashes on some paths .. path = str(os.path.expanduser(path)) if not os.path.exists(path): log.critical('Path %s does not exist' % path) continue log.verbose('Scanning path %s ...' % path) # Help debugging by removing a lot of noise #logging.getLogger('movieparser').setLevel(logging.WARNING) #logging.getLogger('imdb_lookup').setLevel(logging.WARNING) # scan through for root, dirs, files in os.walk(path): # convert filelists into utf-8 to avoid unicode problems dirs = [x.decode('utf-8', 'ignore') for x in dirs] # files = [x.decode('utf-8', 'ignore') for x in files] # TODO: add also video files? for item in dirs: if item.lower() in self.skip: continue count_dirs += 1 movie = MovieParser() movie.parse(item) try: imdb_id = imdb_lookup.imdb_id_lookup( movie_title=movie.name, raw_title=item, session=task.session) if imdb_id in path_ids: log.trace('duplicate %s' % item) continue if imdb_id is not None: log.trace('adding: %s' % imdb_id) path_ids.append(imdb_id) except PluginError as e: log.trace('%s lookup failed (%s)' % (item, e.value)) incompatible_dirs += 1 # store to cache and extend to found list self.cache[path] = path_ids imdb_ids.extend(path_ids) log.debug( '-- Start filtering entries ----------------------------------') # do actual filtering for entry in task.accepted: count_entries += 1 if not entry.get('imdb_id', eval_lazy=False): try: imdb_lookup.lookup(entry) except PluginError as e: log.trace('entry %s imdb failed (%s)' % (entry['title'], e.value)) incompatible_entries += 1 continue # actual filtering if entry['imdb_id'] in imdb_ids: entry.reject('movie exists') if incompatible_dirs or incompatible_entries: log.verbose('There were some incompatible items. %s of %s entries ' 'and %s of %s directories could not be verified.' % (incompatible_entries, count_entries, incompatible_dirs, count_dirs)) log.debug( '-- Finished filtering entries -------------------------------')
def on_feed_filter(self, feed, config): log.debug('check for enforcing') # parse config if isinstance(config, bool): # configured a boolean false, disable plugin if not config: return # configured a boolean true, disable timeframe timeframe = None else: # parse time window amount, unit = config.split(' ') log.debug('amount: %s unit: %s' % (repr(amount), repr(unit))) params = {unit: int(amount)} try: timeframe = timedelta(**params) except TypeError: raise PluginError('Invalid time format', log) # throws DependencyError if not present aborting feed imdb_lookup = get_plugin_by_name('imdb_lookup').instance for entry in feed.entries: if 'imdb_id' not in entry: try: imdb_lookup.lookup(entry) except PluginError, pe: log_once(pe.value) continue parser = MovieParser() parser.data = entry['title'] parser.parse() quality = parser.quality.name log.debug('quality: %s' % quality) log.debug('imdb_id: %s' % entry['imdb_id']) log.debug('current proper count: %s' % parser.proper_count) proper_movie = feed.session.query(ProperMovie).\ filter(ProperMovie.imdb_id == entry['imdb_id']).\ filter(ProperMovie.quality == quality).\ order_by(desc(ProperMovie.proper_count)).first() if not proper_movie: log.debug('no previous download recorded for %s' % entry['imdb_id']) continue highest_proper_count = proper_movie.proper_count log.debug('highest_proper_count: %i' % highest_proper_count) accept_proper = False if parser.proper_count > highest_proper_count: log.debug('proper detected: %s ' % proper_movie) if timeframe is None: accept_proper = True else: expires = proper_movie.added + timeframe log.debug('propers timeframe: %s' % timeframe) log.debug('added: %s' % proper_movie.added) log.debug('propers ignore after: %s' % str(expires)) if datetime.now() < expires: accept_proper = True else: log.verbose('Proper `%s` has past it\'s expiration time' % entry['title']) if accept_proper: log.info('Accepting proper version previously downloaded movie `%s`' % entry['title']) # TODO: does this need to be called? # fire_event('forget', entry['imdb_url']) fire_event('forget', entry['imdb_id']) feed.accept(entry, 'proper version of previously downloaded movie')
def __init__(self): self.a_year, self.b_year = None, None self.a_quality, self.b_quality = qualities.Quality( ), qualities.Quality() self.parser = MovieParser() super(MovieComparator, self).__init__(cutoff=0.9)
entry['series_name'] = ''.join([entry['series_name'][0:-4], '(', entry['series_name'][-4:], ')']) log.verbose(entry['series_name']) parser.data = cur_filename parser.parse log.debug(parser.id_type) if parser.id_type == 'ep': entry['series_id'] = ''.join(['S', str(parser.season).rjust(2, str('0')), 'E', str(parser.episode).rjust(2, str('0'))]) elif parser.id_type == 'sequence': entry['series_id'] = parser.episode elif parser.id_type and parser.id: entry['series_id'] = parser.id else: from flexget.utils.titles.movie import MovieParser parser = MovieParser() parser.data = cur_file parser.parse() log.info(parser) testname = parser.name testyear = parser.year parser.data = entry['title'] parser.parse() log.info(parser) if len(parser.name) > len(testname): entry['name'] = parser.name entry['movie_name'] = parser.name else: entry['name'] = testname entry['movie_name'] = testname entry['year'] = parser.year
def on_task_filter(self, task, config): log.debug('check for enforcing') # parse config if isinstance(config, bool): # configured a boolean false, disable plugin if not config: return # configured a boolean true, disable timeframe timeframe = None else: # parse time window log.debug('interval: %s' % config) try: timeframe = parse_timedelta(config) except ValueError: raise plugin.PluginError('Invalid time format', log) # throws DependencyError if not present aborting task imdb_lookup = plugin.get_plugin_by_name('imdb_lookup').instance for entry in task.entries: parser = MovieParser() parser.data = entry['title'] parser.parse() # if we have imdb_id already evaluated if entry.get('imdb_id', None, eval_lazy=False) is None: try: # TODO: fix imdb_id_lookup, cumbersome that it returns None and or throws exception # Also it's crappy name! imdb_id = imdb_lookup.imdb_id_lookup( movie_title=parser.name, raw_title=entry['title']) if imdb_id is None: continue entry['imdb_id'] = imdb_id except plugin.PluginError as pe: log_once(pe.value) continue quality = parser.quality.name log.debug('quality: %s' % quality) log.debug('imdb_id: %s' % entry['imdb_id']) log.debug('current proper count: %s' % parser.proper_count) proper_movie = task.session.query(ProperMovie).\ filter(ProperMovie.imdb_id == entry['imdb_id']).\ filter(ProperMovie.quality == quality).\ order_by(desc(ProperMovie.proper_count)).first() if not proper_movie: log.debug('no previous download recorded for %s' % entry['imdb_id']) continue highest_proper_count = proper_movie.proper_count log.debug('highest_proper_count: %i' % highest_proper_count) accept_proper = False if parser.proper_count > highest_proper_count: log.debug('proper detected: %s ' % proper_movie) if timeframe is None: accept_proper = True else: expires = proper_movie.added + timeframe log.debug('propers timeframe: %s' % timeframe) log.debug('added: %s' % proper_movie.added) log.debug('propers ignore after: %s' % str(expires)) if datetime.now() < expires: accept_proper = True else: log.verbose( 'Proper `%s` has past it\'s expiration time' % entry['title']) if accept_proper: log.info( 'Accepting proper version previously downloaded movie `%s`' % entry['title']) # TODO: does this need to be called? # fire_event('forget', entry['imdb_url']) fire_event('forget', entry['imdb_id']) entry.accept('proper version of previously downloaded movie')
def on_feed_filter(self, feed, config): log.debug('check for enforcing') # parse config if isinstance(config, bool): # configured a boolean false, disable plugin if not config: return # configured a boolean true, disable timeframe timeframe = None else: # parse time window log.debug('interval: %s' % config) try: timeframe = parse_timedelta(config) except ValueError: raise PluginError('Invalid time format', log) # throws DependencyError if not present aborting feed imdb_lookup = get_plugin_by_name('imdb_lookup').instance for entry in feed.entries: parser = MovieParser() parser.data = entry['title'] parser.parse() # if we have imdb_id already evaluated if entry.get('imdb_id', None, eval_lazy=False) is None: try: # TODO: fix imdb_id_lookup, cumbersome that it returns None and or throws exception # Also it's crappy name! imdb_id = imdb_lookup.imdb_id_lookup(movie_title=parser.name, raw_title=entry['title']) if imdb_id is None: continue entry['imdb_id'] = imdb_id except PluginError, pe: log_once(pe.value) continue quality = parser.quality.name log.debug('quality: %s' % quality) log.debug('imdb_id: %s' % entry['imdb_id']) log.debug('current proper count: %s' % parser.proper_count) proper_movie = feed.session.query(ProperMovie).\ filter(ProperMovie.imdb_id == entry['imdb_id']).\ filter(ProperMovie.quality == quality).\ order_by(desc(ProperMovie.proper_count)).first() if not proper_movie: log.debug('no previous download recorded for %s' % entry['imdb_id']) continue highest_proper_count = proper_movie.proper_count log.debug('highest_proper_count: %i' % highest_proper_count) accept_proper = False if parser.proper_count > highest_proper_count: log.debug('proper detected: %s ' % proper_movie) if timeframe is None: accept_proper = True else: expires = proper_movie.added + timeframe log.debug('propers timeframe: %s' % timeframe) log.debug('added: %s' % proper_movie.added) log.debug('propers ignore after: %s' % str(expires)) if datetime.now() < expires: accept_proper = True else: log.verbose('Proper `%s` has past it\'s expiration time' % entry['title']) if accept_proper: log.info('Accepting proper version previously downloaded movie `%s`' % entry['title']) # TODO: does this need to be called? # fire_event('forget', entry['imdb_url']) fire_event('forget', entry['imdb_id']) feed.accept(entry, 'proper version of previously downloaded movie')
def on_task_filter(self, task, config): if not task.accepted: log.debug('nothing accepted, aborting') return config = self.build_config(config) imdb_lookup = get_plugin_by_name('imdb_lookup').instance incompatible_dirs = 0 incompatible_entries = 0 count_entries = 0 count_dirs = 0 # list of imdb ids gathered from paths / cache imdb_ids = [] for path in config: # see if this path has already been scanned if path in self.cache: log.verbose('Using cached scan for %s ...' % path) imdb_ids.extend(self.cache[path]) continue path_ids = [] # with unicode it crashes on some paths .. path = str(os.path.expanduser(path)) if not os.path.exists(path): log.critical('Path %s does not exist' % path) continue log.verbose('Scanning path %s ...' % path) # Help debugging by removing a lot of noise #logging.getLogger('movieparser').setLevel(logging.WARNING) #logging.getLogger('imdb_lookup').setLevel(logging.WARNING) # scan through for root, dirs, files in os.walk(path): # convert filelists into utf-8 to avoid unicode problems dirs = [x.decode('utf-8', 'ignore') for x in dirs] # files = [x.decode('utf-8', 'ignore') for x in files] # TODO: add also video files? for item in dirs: if item.lower() in self.skip: continue count_dirs += 1 movie = MovieParser() movie.parse(item) try: imdb_id = imdb_lookup.imdb_id_lookup(movie_title=movie.name, raw_title=item, session=task.session) if imdb_id in path_ids: log.trace('duplicate %s' % item) continue if imdb_id is not None: log.trace('adding: %s' % imdb_id) path_ids.append(imdb_id) except PluginError, e: log.trace('%s lookup failed (%s)' % (item, e.value)) incompatible_dirs += 1 # store to cache and extend to found list self.cache[path] = path_ids imdb_ids.extend(path_ids)
def on_task_filter(self, task, config): if not task.accepted: log.debug('nothing accepted, aborting') return config = self.build_config(config) imdb_lookup = plugin.get_plugin_by_name('imdb_lookup').instance incompatible_dirs = 0 incompatible_entries = 0 count_entries = 0 count_dirs = 0 # list of imdb ids gathered from paths / cache imdb_ids = [] for folder in config: folder = path(folder).expanduser() # see if this path has already been scanned if folder in self.cache: log.verbose('Using cached scan for %s ...' % folder) imdb_ids.extend(self.cache[folder]) continue path_ids = [] if not folder.isdir(): log.critical('Path %s does not exist' % folder) continue log.verbose('Scanning path %s ...' % folder) # Help debugging by removing a lot of noise #logging.getLogger('movieparser').setLevel(logging.WARNING) #logging.getLogger('imdb_lookup').setLevel(logging.WARNING) # scan through # TODO: add also video files? for item in folder.walkdirs(errors='warn'): if item.name.lower() in self.skip: continue count_dirs += 1 movie = MovieParser() movie.parse(item.name) try: imdb_id = imdb_lookup.imdb_id_lookup( movie_title=movie.name, raw_title=item.name, session=task.session) if imdb_id in path_ids: log.trace('duplicate %s' % item) continue if imdb_id is not None: log.trace('adding: %s' % imdb_id) path_ids.append(imdb_id) except plugin.PluginError as e: log.trace('%s lookup failed (%s)' % (item, e.value)) incompatible_dirs += 1 # store to cache and extend to found list self.cache[folder] = path_ids imdb_ids.extend(path_ids) log.debug( '-- Start filtering entries ----------------------------------') # do actual filtering for entry in task.accepted: count_entries += 1 if not entry.get('imdb_id', eval_lazy=False): try: imdb_lookup.lookup(entry) except plugin.PluginError as e: log.trace('entry %s imdb failed (%s)' % (entry['title'], e.value)) incompatible_entries += 1 continue # actual filtering if entry['imdb_id'] in imdb_ids: entry.reject('movie exists') if incompatible_dirs or incompatible_entries: log.verbose('There were some incompatible items. %s of %s entries ' 'and %s of %s directories could not be verified.' % (incompatible_entries, count_entries, incompatible_dirs, count_dirs)) log.debug( '-- Finished filtering entries -------------------------------')
def on_task_filter(self, task, config): if not task.accepted: log.debug("nothing accepted, aborting") return config = self.build_config(config) imdb_lookup = plugin.get_plugin_by_name("imdb_lookup").instance incompatible_dirs = 0 incompatible_entries = 0 count_entries = 0 count_dirs = 0 # list of imdb ids gathered from paths / cache imdb_ids = [] for path in config: # see if this path has already been scanned if path in self.cache: log.verbose("Using cached scan for %s ..." % path) imdb_ids.extend(self.cache[path]) continue path_ids = [] # with unicode it crashes on some paths .. path = str(os.path.expanduser(path)) if not os.path.exists(path): log.critical("Path %s does not exist" % path) continue log.verbose("Scanning path %s ..." % path) # Help debugging by removing a lot of noise # logging.getLogger('movieparser').setLevel(logging.WARNING) # logging.getLogger('imdb_lookup').setLevel(logging.WARNING) # scan through for root, dirs, files in os.walk(path, followlinks=True): # convert filelists into utf-8 to avoid unicode problems dirs = [x.decode("utf-8", "ignore") for x in dirs] # files = [x.decode('utf-8', 'ignore') for x in files] # TODO: add also video files? for item in dirs: if item.lower() in self.skip: continue count_dirs += 1 movie = MovieParser() movie.parse(item) try: imdb_id = imdb_lookup.imdb_id_lookup( movie_title=movie.name, raw_title=item, session=task.session ) if imdb_id in path_ids: log.trace("duplicate %s" % item) continue if imdb_id is not None: log.trace("adding: %s" % imdb_id) path_ids.append(imdb_id) except plugin.PluginError as e: log.trace("%s lookup failed (%s)" % (item, e.value)) incompatible_dirs += 1 # store to cache and extend to found list self.cache[path] = path_ids imdb_ids.extend(path_ids) log.debug("-- Start filtering entries ----------------------------------") # do actual filtering for entry in task.accepted: count_entries += 1 if not entry.get("imdb_id", eval_lazy=False): try: imdb_lookup.lookup(entry) except plugin.PluginError as e: log.trace("entry %s imdb failed (%s)" % (entry["title"], e.value)) incompatible_entries += 1 continue # actual filtering if entry["imdb_id"] in imdb_ids: entry.reject("movie exists") if incompatible_dirs or incompatible_entries: log.verbose( "There were some incompatible items. %s of %s entries " "and %s of %s directories could not be verified." % (incompatible_entries, count_entries, incompatible_dirs, count_dirs) ) log.debug("-- Finished filtering entries -------------------------------")