def database_status(ctx): """ displays details on the current database store """ db_path = join(ctx['NNTPSettings'].work_dir, 'cache', 'search') logger.debug('Scanning %s for databases...' % db_path) with pushd(db_path, create_if_missing=True): results = find( db_path, suffix_filter=SQLITE_DATABASE_EXTENSION, fsinfo=True, max_depth=1, ) # Use our Database first if it exists session = ctx['NNTPSettings'].session() if not session: logger.error('Could not acquire a database connection.') exit(1) # PEP8 E712 does not allow us to make a comparison to a boolean value # using the == instead of the keyword 'in'. However SQLAlchemy # requires us to do just because that's how the amazing tool works. # so to get around the pep8 error, we'll just define a variable equal # to True and then we can compare to it pep8_e712 = True try: # Get a list of watched groups groups = dict( session.query(Group.name, Group.id).filter(Group.watch == pep8_e712).all()) except OperationalError: # Get a list of watched groups logger.warning('The database does not appear to be initialized.') logger.info('Try running: "nr db init" first.') exit(0) if not len(results): logger.info('There are no groups configured to be watched.') exit(0) for _, meta in results.iteritems(): # Open up the database flags = '' if meta['filename'] in groups: flags += 'W' print('%-65s %-10s %s' % ( meta['filename'], bytes_to_strsize(meta['size']), flags, ))
def database_status(ctx): """ displays details on the current database store """ db_path = join(ctx['NNTPSettings'].work_dir, 'cache', 'search') logger.debug('Scanning %s for databases...' % db_path) with pushd(db_path, create_if_missing=True): results = find( db_path, suffix_filter=SQLITE_DATABASE_EXTENSION, fsinfo=True, max_depth=1, ) # Use our Database first if it exists session = ctx['NNTPSettings'].session() if not session: logger.error('Could not acquire a database connection.') exit(1) # PEP8 E712 does not allow us to make a comparison to a boolean value # using the == instead of the keyword 'in'. However SQLAlchemy # requires us to do just because that's how the amazing tool works. # so to get around the pep8 error, we'll just define a variable equal # to True and then we can compare to it pep8_e712 = True try: # Get a list of watched groups groups = dict(session.query(Group.name, Group.id) .filter(Group.watch == pep8_e712).all()) except OperationalError: # Get a list of watched groups logger.warning('The database does not appear to be initialized.') logger.info('Try running: "nr db init" first.') exit(0) if not len(results): logger.info('There are no groups configured to be watched.') exit(0) for _, meta in results.iteritems(): # Open up the database flags = '' if meta['filename'] in groups: flags += 'W' print('%-65s %-10s %s' % ( meta['filename'], bytes_to_strsize(meta['size']), flags, ))
def watch_dir(self, path, regex=None, prefix=None, suffix=None, ignore=None, case_sensitive=True, seconds=15): """Monitors a directory for files that have been added/changed path: is the path to monitor ignore: is a sortedset of files already parsed seconds: is how long it takes a file to go untouched for before we presume it has been completely written to disk. """ if ignore is None: ignore = sortedset() findings = find( path, fsinfo=True, regex_filter=regex, prefix_filter=prefix, suffix_filter=suffix, case_sensitive=case_sensitive, ) findings = [ (p, f['size'], f['created'], f['modified']) for p, f in findings.items() if (f['modified'] - f['created']).total_seconds() >= seconds and f['basename'] not in ignore ] # Sort list by created date findings.sort(key=lambda x: x[3]) for f in findings: logger.info('Created %s (size=%s)' % ( f, bytes_to_strsize(f[1]), )) # Add to our filter list ignore.add(f[0]) # Return our ignore list (which is acutally also a found list) return ignore
def test_find_depth(self): """ Test the regex part of the find function """ # Temporary directory to work with work_dir = join(self.tmp_dir, 'Utils_Test.find', 'depth') # Create some depth to test within: # /depth01.jpeg # /level02/depth02.jpeg # /level02/level03/depth03.jpeg # /level02/level03/level04/depth04.jpeg # ... work_dir_depth = work_dir assert self.touch(join(work_dir, 'depth01.jpeg')) is True for idx in range(2, 11): work_dir_depth = join(work_dir_depth, 'level%.2d' % idx) assert self.touch(join(work_dir_depth, 'depth%.2d.jpeg' % idx), ) is True # Just to give us a ballpark of the total files (and depth) we're # looking at here: results = find( work_dir, suffix_filter='.jpeg', case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 10 # Search only the first level results = find( work_dir, suffix_filter='.jpeg', max_depth=1, case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 1 assert 'depth01.jpeg' == basename(results.keys()[0]) # Search from the fifth level on results = find( work_dir, suffix_filter='.jpeg', min_depth=5, case_sensitive=True, ) assert isinstance(results, dict) # Why 6? Because we're starting at (and including) the 5th level # level 5 = +1 # level 6 = +1 (2) # level 7 = +1 (3) # level 8 = +1 (4) # level 9 = +1 (5) # level 10 = +1 (6) assert len(results) == 6 # Double check that our files are infact in relation to the depth # we expect them to be at: for idx in range(5, 11): assert 'depth%.2d.jpeg' % idx \ in [basename(x) for x in results.keys()] # Search only the second level results = find( work_dir, suffix_filter='.jpeg', min_depth=2, max_depth=2, case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 1 assert 'depth02.jpeg' == basename(results.keys()[0]) # Search the 3rd and 4th levels only results = find( work_dir, suffix_filter='.jpeg', min_depth=3, max_depth=4, case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 2 assert 'depth03.jpeg' in [basename(x) for x in results.keys()] assert 'depth04.jpeg' in [basename(x) for x in results.keys()] # if min_depth > max_depth you'll get a None type assert find( work_dir, suffix_filter='.jpeg', min_depth=5, max_depth=4, case_sensitive=True, ) is None # Create some more depth levels to test that we scan all directories of # all levels when requested. # /level02b/depth02b.jpeg # /level02b/level03b/depth03.jpeg # /level02b/level03b/level04b/depth04.jpeg # ... # This runs in parallel with the directories already created above work_dir_depth = work_dir for idx in range(2, 11): work_dir_depth = join(work_dir_depth, 'level%.2db' % idx) assert self.touch(join(work_dir_depth, 'depth%.2d.jpeg' % idx), ) is True # Just to give us a ballpark of the total files (and depth) we're # looking at here: results = find( work_dir, suffix_filter='.jpeg', case_sensitive=True, ) assert isinstance(results, dict) # Not 20 (because no extra file was created on depth level 1) assert len(results) == 19 # Search only the second level results = find( work_dir, suffix_filter='.jpeg', min_depth=2, max_depth=2, case_sensitive=True, ) assert isinstance(results, dict) # there should be 2 now assert len(results) == 2 for k in results.keys(): # 2 directories now each with the same filename assert 'depth02.jpeg' == basename(k) # Create a 12th and 13th level; but store nothing in the 12th work_dir_12 = join(work_dir_depth, 'level%.2d' % 12) assert mkdir(work_dir_12) is True work_dir_13 = join(work_dir_12, 'level%.2d' % 13) assert self.touch(join(work_dir_13, 'depth%.2d.jpeg' % 13), ) is True # Search the 12th level which contains no files # (the 13th does but we're explicity not looking there) results = find( work_dir_12, min_depth=1, max_depth=1, ) # even with no results we should get a dictionary response assert isinstance(results, dict) # there should be 0 now assert len(results) == 0
def test_find_regex(self): """ Test the regex part of the find function """ # Temporary directory to work with work_dir = join(self.tmp_dir, 'Utils_Test.find', 'regex') # Create 10 temporary files for idx in range(1, 11): assert self.touch(join(work_dir, 'file%.3d.mpg' % idx)) is True # Create 10 temporary files for idx in range(1, 11): assert self.touch(join(work_dir, 'file%.3d-extra.mpeg' % idx), ) is True # Create some other random entries of close names (+4 files) assert self.touch(join(work_dir, 'File000.mpg')) is True assert self.touch(join(work_dir, 'File000-EXTRA.nfo')) is True assert self.touch(join(work_dir, 'unknown.MPEG')) is True assert self.touch(join(work_dir, 'README.txt')) is True # At this point we have our temporary directory filled with 24 files. # Case insensitive results results = find( work_dir, regex_filter='.*\.mpe?g$', case_sensitive=False, ) assert isinstance(results, dict) assert len(results) == 22 # Case sensitive results won't pick up on unknown.MPEG results = find( work_dir, regex_filter='.*\.mpe?g$', case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 21 # You can also just compile the regular expression yourself and pass # that in if you'd rather _regex = re.compile('.*\.TXT', re.I) results = find(work_dir, regex_filter=_regex) assert isinstance(results, dict) # Case insensitive re.I was passed in, so we will match on README.txt assert len(results) == 1 # Invalid regular expressions will always yield a None return value # and not a dictionary. assert find(work_dir, regex_filter='((((()') is None # You can chain multiple regular expressions together using # sets, lists and tuples; here is a list example results = find( work_dir, regex_filter=[ '.*\.mpe?g$', '.*\.txt$', ], case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 22 # tuple example results = find( work_dir, regex_filter=( '.*\.mpe?g$', '.*\.txt$', '^unknown.*', ), case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 23 # Finally, here is a set() example results = find( work_dir, regex_filter=( '.*\.mpe?g$', '.*\.nfo$', '.*\.txt$', '^unknown.*', ), case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 24
def test_find_suffix(self): """ Test the suffix part of the find function """ # Temporary directory to work with work_dir = join(self.tmp_dir, 'Utils_Test.find', 'suffix') # Create 10 temporary files for idx in range(1, 11): assert self.touch(join(work_dir, 'file%.3d.mkv' % idx)) is True # Create 10 temporary files for idx in range(1, 11): assert self.touch(join(work_dir, 'file%.3d-extra.mkv' % idx), ) is True # Create some other random entries of close names (+4 files) assert self.touch(join(work_dir, 'File000.mkv')) is True assert self.touch(join(work_dir, 'File000-EXTRA.nfo')) is True assert self.touch(join(work_dir, 'unknown.MKV')) is True assert self.touch(join(work_dir, 'README')) is True # At this point we have our temporary directory filled with 24 files. # Case insensitive results results = find(work_dir, suffix_filter='mkv', case_sensitive=False) assert isinstance(results, dict) assert len(results) == 22 # Case sensitive results won't pick up on unknown.MKV results = find(work_dir, suffix_filter='mkv', case_sensitive=True) assert isinstance(results, dict) assert len(results) == 21 # We can also pass in a tuple of suffixes which will cause us to hit # more matches results = find( work_dir, suffix_filter=('MKV', 'ME'), case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 2 # support list of suffixes results = find( work_dir, suffix_filter=['nfo', 'mkv', 'README'], case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 23 # support set of suffixes results = find( work_dir, suffix_filter=['nfo', 'mkv', 'MKV', 'README'], case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 24
def test_unrar(self): """ Tests the un-raring of content """ # Generate temporary folder to work with work_dir = join(self.tmp_dir, 'CodecRar_Test.rar.single', 'work') # Initialize Codec cr = CodecRar(work_dir=work_dir) # Now we want to prepare a work folder source_dir = join(self.tmp_dir, 'CodecRar_Test.rar.single', 'source') # create some dummy file entries for i in range(0, 10): # Create some temporary files to work with in our source # directory tmp_file = join(source_dir, 'DSC_IMG%.3d.jpeg' % i) self.touch(tmp_file, size='100K', random=True) # Add our file to the encoding process cr.add(tmp_file) # Now we want to compress this content content = cr.encode() # We should have successfully encoded our content into # one single .rar file assert isinstance(content, sortedset) assert len(content) == 1 # Now we want to extract the content decoded = cr.decode(content) assert isinstance(decoded, sortedset) assert len(decoded) == 1 assert isinstance(decoded[0], NNTPBinaryContent) # Decoded content is always attached! assert decoded[0].is_attached() is True decoded_path = decoded[0].path() # It's actually the directory containing the contents of all # the rar's provided in the same hiarchy they were provided in # since we only provided one rar file, we only opened it assert isdir(decoded_path) # Extracted content always occurs in a different directory assert decoded_path != source_dir # In fact it should be the same 10 fake images we created # create some dummy file entries results = find(search_dir=decoded_path) assert len(results) == 10 # Just grab the first item from the list so we can get the # temporary path tmp_path = dirname(next(iter(results))) for i in range(0, len(results)): tmp_name = 'DSC_IMG%.3d.jpeg' % i assert join(tmp_path, tmp_name) in results # Now if we destroy our decoded object, we should also lose # it's content del decoded assert isdir(decoded_path) is False
def test_7z_uncompress(self): """ Tests the uncompressing of content """ # Generate temporary folder to work with work_dir = join(self.tmp_dir, 'Codec7Zip_Test.rar.single', 'work') # Initialize Codec cr = Codec7Zip(work_dir=work_dir) # Now we want to prepare a work folder source_dir = join( self.tmp_dir, 'Codec7Zip_Test.7z.single', 'source' ) # create some dummy file entries for i in range(0, 10): # Create some temporary files to work with in our source # directory tmp_file = join(source_dir, 'DSC_IMG%.3d.jpeg' % i) self.touch(tmp_file, size='100K', random=True) # Add our file to the encoding process cr.add(tmp_file) # Now we want to compress this content content = cr.encode() # We should have successfully encoded our content into # one single .rar file assert isinstance(content, sortedset) assert len(content) == 1 # Now we want to extract the content decoded = cr.decode(content) assert isinstance(decoded, sortedset) assert len(decoded) == 1 assert isinstance(decoded[0], NNTPBinaryContent) # Decoded content is always attached! assert decoded[0].is_attached() is True decoded_path = decoded[0].path() # It's actually the directory containing the contents of all # the rar's provided in the same hiarchy they were provided in # since we only provided one rar file, we only opened it assert isdir(decoded_path) # Extracted content always occurs in a different directory assert decoded_path != source_dir # In fact it should be the same 10 fake images we created # create some dummy file entries results = find(search_dir=decoded_path) assert len(results) == 10 # Just grab the first item from the list so we can get the # temporary path tmp_path = dirname(next(iter(results))) for i in range(0, len(results)): tmp_name = 'DSC_IMG%.3d.jpeg' % i assert join(tmp_path, tmp_name) in results # Now if we destroy our decoded object, we should also lose # it's content del decoded assert isdir(decoded_path) is False
def test_find_depth(self): """ Test the regex part of the find function """ # Temporary directory to work with work_dir = join(self.tmp_dir, 'Utils_Test.find', 'depth') # Create some depth to test within: # /depth01.jpeg # /level02/depth02.jpeg # /level02/level03/depth03.jpeg # /level02/level03/level04/depth04.jpeg # ... work_dir_depth = work_dir assert self.touch(join(work_dir, 'depth01.jpeg')) is True for idx in range(2, 11): work_dir_depth = join(work_dir_depth, 'level%.2d' % idx) assert self.touch( join(work_dir_depth, 'depth%.2d.jpeg' % idx), ) is True # Just to give us a ballpark of the total files (and depth) we're # looking at here: results = find( work_dir, suffix_filter='.jpeg', case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 10 # Search only the first level results = find( work_dir, suffix_filter='.jpeg', max_depth=1, case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 1 assert 'depth01.jpeg' == basename(results.keys()[0]) # Search from the fifth level on results = find( work_dir, suffix_filter='.jpeg', min_depth=5, case_sensitive=True, ) assert isinstance(results, dict) # Why 6? Because we're starting at (and including) the 5th level # level 5 = +1 # level 6 = +1 (2) # level 7 = +1 (3) # level 8 = +1 (4) # level 9 = +1 (5) # level 10 = +1 (6) assert len(results) == 6 # Double check that our files are infact in relation to the depth # we expect them to be at: for idx in range(5, 11): assert 'depth%.2d.jpeg' % idx \ in [basename(x) for x in results.keys()] # Search only the second level results = find( work_dir, suffix_filter='.jpeg', min_depth=2, max_depth=2, case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 1 assert 'depth02.jpeg' == basename(results.keys()[0]) # Search the 3rd and 4th levels only results = find( work_dir, suffix_filter='.jpeg', min_depth=3, max_depth=4, case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 2 assert 'depth03.jpeg' in [basename(x) for x in results.keys()] assert 'depth04.jpeg' in [basename(x) for x in results.keys()] # if min_depth > max_depth you'll get a None type assert find( work_dir, suffix_filter='.jpeg', min_depth=5, max_depth=4, case_sensitive=True, ) is None # Create some more depth levels to test that we scan all directories of # all levels when requested. # /level02b/depth02b.jpeg # /level02b/level03b/depth03.jpeg # /level02b/level03b/level04b/depth04.jpeg # ... # This runs in parallel with the directories already created above work_dir_depth = work_dir for idx in range(2, 11): work_dir_depth = join(work_dir_depth, 'level%.2db' % idx) assert self.touch( join(work_dir_depth, 'depth%.2d.jpeg' % idx), ) is True # Just to give us a ballpark of the total files (and depth) we're # looking at here: results = find( work_dir, suffix_filter='.jpeg', case_sensitive=True, ) assert isinstance(results, dict) # Not 20 (because no extra file was created on depth level 1) assert len(results) == 19 # Search only the second level results = find( work_dir, suffix_filter='.jpeg', min_depth=2, max_depth=2, case_sensitive=True, ) assert isinstance(results, dict) # there should be 2 now assert len(results) == 2 for k in results.keys(): # 2 directories now each with the same filename assert 'depth02.jpeg' == basename(k) # Create a 12th and 13th level; but store nothing in the 12th work_dir_12 = join(work_dir_depth, 'level%.2d' % 12) assert mkdir(work_dir_12) is True work_dir_13 = join(work_dir_12, 'level%.2d' % 13) assert self.touch( join(work_dir_13, 'depth%.2d.jpeg' % 13), ) is True # Search the 12th level which contains no files # (the 13th does but we're explicity not looking there) results = find( work_dir_12, min_depth=1, max_depth=1, ) # even with no results we should get a dictionary response assert isinstance(results, dict) # there should be 0 now assert len(results) == 0
def test_find_regex(self): """ Test the regex part of the find function """ # Temporary directory to work with work_dir = join(self.tmp_dir, 'Utils_Test.find', 'regex') # Create 10 temporary files for idx in range(1, 11): assert self.touch(join(work_dir, 'file%.3d.mpg' % idx)) is True # Create 10 temporary files for idx in range(1, 11): assert self.touch( join(work_dir, 'file%.3d-extra.mpeg' % idx), ) is True # Create some other random entries of close names (+4 files) assert self.touch(join(work_dir, 'File000.mpg')) is True assert self.touch(join(work_dir, 'File000-EXTRA.nfo')) is True assert self.touch(join(work_dir, 'unknown.MPEG')) is True assert self.touch(join(work_dir, 'README.txt')) is True # At this point we have our temporary directory filled with 24 files. # Case insensitive results results = find( work_dir, regex_filter='.*\.mpe?g$', case_sensitive=False, ) assert isinstance(results, dict) assert len(results) == 22 # Case sensitive results won't pick up on unknown.MPEG results = find( work_dir, regex_filter='.*\.mpe?g$', case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 21 # You can also just compile the regular expression yourself and pass # that in if you'd rather _regex = re.compile('.*\.TXT', re.I) results = find(work_dir, regex_filter=_regex) assert isinstance(results, dict) # Case insensitive re.I was passed in, so we will match on README.txt assert len(results) == 1 # Invalid regular expressions will always yield a None return value # and not a dictionary. assert find(work_dir, regex_filter='((((()') is None # You can chain multiple regular expressions together using # sets, lists and tuples; here is a list example results = find( work_dir, regex_filter=[ '.*\.mpe?g$', '.*\.txt$', ], case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 22 # tuple example results = find( work_dir, regex_filter=( '.*\.mpe?g$', '.*\.txt$', '^unknown.*', ), case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 23 # Finally, here is a set() example results = find( work_dir, regex_filter=( '.*\.mpe?g$', '.*\.nfo$', '.*\.txt$', '^unknown.*', ), case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 24
def test_find_suffix(self): """ Test the suffix part of the find function """ # Temporary directory to work with work_dir = join(self.tmp_dir, 'Utils_Test.find', 'suffix') # Create 10 temporary files for idx in range(1, 11): assert self.touch(join(work_dir, 'file%.3d.mkv' % idx)) is True # Create 10 temporary files for idx in range(1, 11): assert self.touch( join(work_dir, 'file%.3d-extra.mkv' % idx), ) is True # Create some other random entries of close names (+4 files) assert self.touch(join(work_dir, 'File000.mkv')) is True assert self.touch(join(work_dir, 'File000-EXTRA.nfo')) is True assert self.touch(join(work_dir, 'unknown.MKV')) is True assert self.touch(join(work_dir, 'README')) is True # At this point we have our temporary directory filled with 24 files. # Case insensitive results results = find(work_dir, suffix_filter='mkv', case_sensitive=False) assert isinstance(results, dict) assert len(results) == 22 # Case sensitive results won't pick up on unknown.MKV results = find(work_dir, suffix_filter='mkv', case_sensitive=True) assert isinstance(results, dict) assert len(results) == 21 # We can also pass in a tuple of suffixes which will cause us to hit # more matches results = find( work_dir, suffix_filter=('MKV', 'ME'), case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 2 # support list of suffixes results = find( work_dir, suffix_filter=['nfo', 'mkv', 'README'], case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 23 # support set of suffixes results = find( work_dir, suffix_filter=['nfo', 'mkv', 'MKV', 'README'], case_sensitive=True, ) assert isinstance(results, dict) assert len(results) == 24