def on_snapopen_action( self ): fbroot = self.get_filebrowser_root() if fbroot != "" and fbroot is not None: self._rootdir = fbroot self._snapopen_window.set_title("Snap open (Filebrowser integration)") else: eddtroot = self.get_eddt_root() if eddtroot != "" and eddtroot is not None: self._rootdir = eddtroot self._snapopen_window.set_title("Snap open (EDDT integration)") else: self._snapopen_window.set_title("Snap open (cwd): " + self._rootdir) self._snapopen_window.show() self._glade_entry_name.select_region(0,-1) self._glade_entry_name.grab_focus() # clears the place holder list for all the files self._filelist.clear(); def callback(filename): name = os.path.basename(filename) self._filelist.append([name, filename]) # fetches the data from the current root finder = FileFinder(self._rootdir) finder.start(callback)
def on_pattern_entry(self, widget, event): oldtitle = self._snapopen_window.get_title().replace( " * too many hits", "") if event.keyval in (gtk.keysyms.Up, gtk.keysyms.Down): return if event.keyval == gtk.keysyms.Return: self.open_selected_item(event) return pattern = self._glade_entry_name.get_text() # TODO: respect self._show_hidden # TODO: respect max_result self._liststore.clear() if len(pattern) > 0: def callback(filename): name = os.path.basename(filename) self._liststore.append([name, filename]) self._snapopen_window.set_title("Searching ... ") finder = FileFinder(self._rootdir, "*" + pattern + "*") finder.start(callback) else: self._snapopen_window.set_title("Enter pattern ... ") self._snapopen_window.set_title(oldtitle) selected = [] self._hit_list.get_selection().selected_foreach(self.foreach, selected) if len(selected) == 0: iter = self._liststore.get_iter_first() if iter != None: self._hit_list.get_selection().select_iter(iter)
def test_video_finder_finds_right_files(self, monkeypatch): def mock_directory(walk_directory): yield ('/Users/admin/downloads', [], []) yield ('/Users/admin/downloads/fooDir', [], ['bar.txt', 'fooBar.avi']) yield ('/Users/admin/downloads/binDir', [], ['boo.wmv', 'big.txt']) yield ('/Users/admin/downloads/tooDir', [], ['Big.mp4', 'another.txt']) yield ('/Users/admin/downloads/somDir', [], ['som.mkv', 'som.txt', 'som.srt']) yield ('/Users/admin/downloads/pinDir', ['Sample'], [ 'pin1.mkv', 'pin1.mkv', 'pin1.mkv', 'pin1.mkv', 'pin1.mkv', 'pin1.mkv', 'pin1.mkv', 'pin1.mkv', ]) monkeypatch.setattr(os.path, 'isdir', lambda x: True) monkeypatch.setattr(DirectoryWalker, 'walk_directory', mock_directory) video_finder = FileFinder().find_all_files_in_directory("directory") video_files = video_finder.get_files_of_specific_types(self.vid_ext) assert len(video_files) == 12
def on_pattern_entry( self, widget, event ): oldtitle = self._snapopen_window.get_title().replace(" * too many hits", "") if event.keyval in (gtk.keysyms.Up, gtk.keysyms.Down): return if event.keyval == gtk.keysyms.Return: self.open_selected_item( event ) return pattern = self._glade_entry_name.get_text() # TODO: respect self._show_hidden # TODO: respect max_result self._liststore.clear() if len(pattern) > 0: def callback(filename): name = os.path.basename(filename) self._liststore.append([name, filename]) self._snapopen_window.set_title("Searching ... ") finder = FileFinder(self._rootdir, "*" + pattern + "*") finder.start(callback) else: self._snapopen_window.set_title("Enter pattern ... ") self._snapopen_window.set_title(oldtitle) selected = [] self._hit_list.get_selection().selected_foreach(self.foreach, selected) if len(selected) == 0: iter = self._liststore.get_iter_first() if iter != None: self._hit_list.get_selection().select_iter(iter)
class TestFileFinder(unittest.TestCase): def setUp(self): self.file_finder = FileFinder() #TODO add more testcases to FileFinder #TODO start to use generate_random_binary for file contents @tempdir() def test_cache_files_by_size(self, temp_dir): temp_dir.write('a/b/c', b'1234') temp_dir.write('a/b/d', b'12345') temp_dir.write('a/b/e', b'123456') self.file_finder.cache_files_by_size([temp_dir.path]) expected_dict = { 4: [os.path.join(temp_dir.path, 'a', 'b', 'c')], 5: [os.path.join(temp_dir.path, 'a', 'b', 'd')], 6: [os.path.join(temp_dir.path, 'a', 'b', 'e')], } self.assertDictEqual(expected_dict, self.file_finder.files_by_size) @tempdir() def test_cache_files_by_size_differentiates_two_same_named_subfolders( self, temp_dir): temp_dir.write('a/b1/f1', b'12345') temp_dir.write('b/b1/f1', b'123456789') self.file_finder.cache_files_by_size([temp_dir.path]) expected_dict = { 5: [os.path.join(temp_dir.path, 'a', 'b1', 'f1')], 9: [os.path.join(temp_dir.path, 'b', 'b1', 'f1')], } self.assertDictEqual(expected_dict, self.file_finder.files_by_size) @tempdir() def test_find_candidate_files_from_cache(self, temp_dir): temp_dir.write('a/a1/a.a1.f1', b'1234') temp_dir.write('b/b1/b.b1.f1', b'1234567') temp_dir.write('c/c1/c.c1.f1', b'123456789') temp_dir.write('c/c1/c.c1.f2', b'123456789') self.file_finder.cache_files_by_size([temp_dir.path]) self.assertListEqual( [os.path.join(temp_dir.path, 'a', 'a1', 'a.a1.f1')], self.file_finder.find_candidate_files_matching_size_from_cache(4)) self.assertListEqual( [os.path.join(temp_dir.path, 'b', 'b1', 'b.b1.f1')], self.file_finder.find_candidate_files_matching_size_from_cache(7)) self.assertListEqual([ os.path.join(temp_dir.path, 'c', 'c1', 'c.c1.f1'), os.path.join(temp_dir.path, 'c', 'c1', 'c.c1.f2') ], self.file_finder.find_candidate_files_matching_size_from_cache(9))
def test_listing_directory_with_only_files_and_with_filter(self): self.touch("file1") self.touch("file2.txt") self.touch("file3.txt") self.touch("file4") FileFinder(self.tempdir, '*.txt').start(self.callback) self.assertEqual(self.filelist, set(['file2.txt', 'file3.txt']))
def test_cyclic_symlinks_are_not_followed(self): self.touch("subdir/file.txt") self.touch("foo/test.py") os.symlink("../subdir", os.path.join(self.tempdir, "subdir", "subdir")) FileFinder(self.tempdir).start(self.callback) self.assertEqual(self.filelist, set(['subdir/file.txt', 'foo/test.py']))
def test_listing_file_symlinks(self): self.touch("subdir/file1.txt") os.symlink("file1.txt", os.path.join(self.tempdir, "subdir", "file2.txt")) FileFinder(self.tempdir).start(self.callback) self.assertEqual(self.filelist, set(['subdir/file1.txt', 'subdir/file2.txt']))
def Main(self, forceSearchFiles): self.__logger.debug("Starting main daemon operations...") if forceSearchFiles: FileFinder(self, self.__serverConfig.Folders, self.__serverConfig.FileExtensions) if watchdogPresent: for folder in self.__serverConfig.Folders: self.__logger.debug("watching folder: %s", folder) self.__observer.schedule(self, folder, True) self.__observer.start() self.__RPCDispatcher = RPCClientListener(self, self.__serverConfig.Port) self.__cond.acquire() while True: try: self.__cond.wait() self._LoopInside() except KeyboardInterrupt: break except Exception as e: self.__logger.exception("Error: %s" % e) self.__logger.debug( "Shutting down. Waiting running threads to finish.") for moveThread in self.__fileMoveThreads: moveThread.join()
def test_read_permissions_are_ignored(self): self.touch("subdir/file.txt") subdir = os.path.join(self.tempdir, "subdir") os.chmod(subdir, 0000) try: FileFinder(self.tempdir).start(self.callback) self.assertEqual(self.filelist, set()) finally: os.chmod(subdir, stat.S_IRWXU)
def test_listing_files_in_subdirectories_and_with_filter(self): self.touch("hello/world.txt") self.touch("foo/bar.txt") self.touch("foo/baz.txt") self.touch("foo/banana.jpg") FileFinder(self.tempdir, '*.txt').start(self.callback) self.assertEqual( self.filelist, set(['hello/world.txt', 'foo/bar.txt', 'foo/baz.txt']))
def test_correct_relative_paths_are_passed_to_callbacks(self): tempdir2 = tempfile.mkdtemp() try: filename = os.path.join(tempdir2, "file.txt") open(filename, 'w').close() os.symlink(filename, os.path.join(self.tempdir, "symlink.txt")) FileFinder(self.tempdir).start(self.callback) self.assertEqual(self.filelist, set(['symlink.txt'])) # instead of file.txt finally: shutil.rmtree(tempdir2)
def test_ignore_file(self): self.touch("README.TXT") self.touch("ruby/gc.c") self.touch("ruby/rdoc/index.html") self.touch("ruby/rdoc/methods.html") self.touch("ruby/rdoc/classes/String.html") self.touch("ruby/rdoc/classes/Array.html") self.touch("ruby/rdoc/classes/File/Stat.html") f = open( os.path.join(self.tempdir, "ruby", "rdoc", ".snapopen_ignore"), 'w') f.write("index.html\n") f.write("classes\n") f.close() FileFinder(self.tempdir).start(self.callback) self.assertEqual( self.filelist, set(['README.TXT', 'ruby/gc.c', 'ruby/rdoc/methods.html']))
def main(): arg_parser = TorrentRecovery.setup_parser() argsDict = TorrentRecovery.create_args_dict(arg_parser) verbose = True if 'verbose' in argsDict else False logger = TorrentRecovery.init_logger(verbose) media_dirs = argsDict['media_dirs'] torrentfiles_dir = argsDict['torrent_files_dir'] destination_dir = argsDict['destination_dir'] torrentfiles_list = FileFinder.find_torrent_files(torrentfiles_dir) logger.info('Found %d torrent files in %s', len(torrentfiles_list), torrentfiles_dir) if logger.isEnabledFor(logging.DEBUG): logger.debug('torrentfiles: %s', pprint.pformat(torrentfiles_list)) torrent_provider = DefaultTorrentDataProvider(torrentfiles_list) recovery = TorrentRecovery(media_dirs, destination_dir, torrent_provider) recovery.start()
class TorrentRecovery: def __init__(self, media_dirs, dest_dir, torrent_data_provider): self.log = logging.getLogger('torrent_recovery') self.media_dirs = media_dirs self.dest_dir = dest_dir self.torrent_data_provider = torrent_data_provider self.fileFinder = FileFinder() def start(self): self.log.info('Starting to cache files by size....') self.fileFinder.cache_files_by_size(self.media_dirs) for idx, info in enumerate(self.torrent_data_provider.generator()): self.log.info('processing %d out of %d, file: %s', idx + 1, self.torrent_data_provider.get_file_count(), file) self.log.info('===============================================') self.process_torrent(info) self.log.info('===============================================') def process_torrent(self, info): if self.log.isEnabledFor(logging.DEBUG): self.log.debug('metainfo: %s', pprint.pformat(info)) self.generator = Generator(info, self.fileFinder, self.media_dirs, self.dest_dir) pieces = StringIO.StringIO(info['pieces']) # Iterate through pieces last_file_pos = 0 for piece in self.generator.pieces_generator(): if self.generator.torrent_corrupted: self.log.warning('torrent corrupted: %s', info['name']) break # Compare piece hash with expected hash piece_hash = hashlib.sha1(piece).digest() # seek the offset (skip unwanted files) if self.generator.new_candidate: #save the actual position of pieces corresponding to the #0th byte of any relevant file last_file_pos = pieces.tell() pieces.seek( self.generator.get_last_number_of_skipped_pieces() * 20, os.SEEK_CUR) if piece_hash != pieces.read(20): self.generator.corruption() pieces.seek(last_file_pos) # ensure we've read all pieces if pieces.read(): self.generator.corruption() @staticmethod def setup_parser(): parser = argparse.ArgumentParser() parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=None, required=False, help='more verbose log') parser.add_argument('--media_dirs', type=check_file, required=True, nargs='+', help='list of dirs where media is') parser.add_argument('--torrent_files_dir', type=check_file, required=True, help='a dir where torrentfiles are') parser.add_argument('--destination_dir', type=check_file, required=True, help='a dir where the moved data will reside') return parser @staticmethod def create_args_dict(arg_parser): args = arg_parser.parse_args() log = logging.getLogger('torrent_recovery') log.debug(args) argsDict = vars(args) # #deletes null keys argsDict = dict((k, v) for k, v in argsDict.items() if v) if log.isEnabledFor(logging.DEBUG): log.info("args dict: %s", pprint.pformat(argsDict)) return argsDict @staticmethod def init_logger(verbose): # TODO use dictConfig instead #https://docs.python.org/2/library/logging.config.html#logging-config-api #FORMAT = '%(asctime)-15s %(message)s' #logging.basicConfig(format=FORMAT, level=logging.DEBUG) logger = logging.getLogger('torrent_recovery') logger.setLevel(logging.DEBUG) # create file handler which logs even debug messages fh = logging.FileHandler('torrent_recovery.log') fh.setLevel(logging.DEBUG) # create console handler with a higher log level ch = logging.StreamHandler() ch.setLevel(logging.INFO) if verbose: ch.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) ch.setFormatter(formatter) # add the handlers to the logger logger.addHandler(fh) logger.addHandler(ch) return logger
def test_video_finder_asked_to_search_nonexistent_dir(self, monkeypatch): monkeypatch.setattr(os.path, 'isdir', lambda x: False) with pytest.raises(NotADirectoryError): FileFinder().find_all_files_in_directory("fake_directory")
def test_listing_empty_directory(self): FileFinder(self.tempdir).start(self.callback) self.assertEqual(self.filelist, set())
def test_listing_directory_with_only_files(self): self.touch("file1") self.touch("file2") FileFinder(self.tempdir).start(self.callback) self.assertEqual(self.filelist, set(['file1', 'file2']))
def setUp(self): self.file_finder = FileFinder()
def setUp(self): self.PathAndLength = namedtuple('PathAndLength', 'path length') self.file_finder = FileFinder()
def test_filter_matches_full_relative_path(self): self.touch("foo/bar.txt") self.touch("unrelated/hmpf.txt") FileFinder(self.tempdir, 'foo/*.txt').start(self.callback) self.assertEqual(self.filelist, set(['foo/bar.txt']))
def __init__(self, media_dirs, dest_dir, torrent_data_provider): self.log = logging.getLogger('torrent_recovery') self.media_dirs = media_dirs self.dest_dir = dest_dir self.torrent_data_provider = torrent_data_provider self.fileFinder = FileFinder()
class TestFileOffset(unittest.TestCase): def setUp(self): self.PathAndLength = namedtuple('PathAndLength', 'path length') self.file_finder = FileFinder() @tempdir() def test_determine_offsets_skip_only_the_first_file(self, temp_dir): temp_dir.write( 'a/b1/a.b1.f1', ContentManager.generate_random_binary(123454).getvalue()) temp_dir.write( 'a/b1/a.b1.f2', ContentManager.generate_random_binary(123454).getvalue()) temp_dir.write( 'b/b1/b.b1.f1', ContentManager.generate_random_binary(123455).getvalue()) temp_dir.write('c/c1/c.c1.f1.nfo', ContentManager.generate_random_binary(12).getvalue()) temp_dir.write('c/c1/c.c1.f2.txt', ContentManager.generate_random_binary(12).getvalue()) self.file_finder.cache_files_by_size([temp_dir.path]) paths = [ self.PathAndLength('shouldskip1.txt', 123), self.PathAndLength('path2', 123454), self.PathAndLength('path3', 123455) ] mock_torrent = MockTorrentFile('name', paths, 1234567) generator = Generator(mock_torrent.meta_info, self.file_finder, None, None) self.assertEqual(1, len(generator.offsets)) self.assertEqual((0, 123), generator.offsets[0]) @tempdir() def test_determine_offsets_skip_multiple_files(self, temp_dir): temp_dir.write( 'a/b1/a.b1.f1', ContentManager.generate_random_binary(125000).getvalue()) temp_dir.write( 'a/b1/a.b1.f2', ContentManager.generate_random_binary(125000).getvalue()) temp_dir.write( 'b/b1/b.b1.f1', ContentManager.generate_random_binary(135000).getvalue()) temp_dir.write('c/c1/c.c1.f1.nfo', ContentManager.generate_random_binary(12).getvalue()) temp_dir.write('c/c1/c.c1.f2.txt', ContentManager.generate_random_binary(12).getvalue()) self.file_finder.cache_files_by_size([temp_dir.path]) paths = [ self.PathAndLength('shouldskip1.txt', 122), self.PathAndLength('path2', 125000), self.PathAndLength('shouldskip2.jpg', 123), self.PathAndLength('path3', 135000), self.PathAndLength('shouldskip3.nfo', 124) ] mock_torrent = MockTorrentFile('name', paths, 1234567) generator = Generator(mock_torrent.meta_info, self.file_finder, None, None) offset2_start = 122 + 125000 offset3_start = 122 + 125000 + 123 + 135000 self.assertEqual(3, len(generator.offsets)) self.assertEqual((0, 122), generator.offsets[0]) self.assertEqual((offset2_start, offset2_start + 123), generator.offsets[1]) self.assertEqual((offset3_start, offset3_start + 124), generator.offsets[2]) @tempdir() def test_determine_offsets_offset_consecutive_skip_count_as_one_offset( self, temp_dir): temp_dir.write( 'a/b1/a.b1.f1', ContentManager.generate_random_binary(125000).getvalue()) temp_dir.write( 'a/b1/a.b1.f2', ContentManager.generate_random_binary(125000).getvalue()) temp_dir.write( 'b/b1/b.b1.f1', ContentManager.generate_random_binary(135000).getvalue()) temp_dir.write('c/c1/c.c1.f1.nfo', ContentManager.generate_random_binary(12).getvalue()) temp_dir.write('c/c1/c.c1.f2.txt', ContentManager.generate_random_binary(12).getvalue()) self.file_finder.cache_files_by_size([temp_dir.path]) paths = [ self.PathAndLength('shouldskip1.txt', 122), self.PathAndLength('shouldskip2.jpg', 123), self.PathAndLength('path3', 135000), self.PathAndLength('shouldskip3.nfo', 124) ] mock_torrent = MockTorrentFile('name', paths, 1234567) generator = Generator(mock_torrent.meta_info, self.file_finder, None, None) offset2_start = 122 + 123 + 135000 self.assertEqual(2, len(generator.offsets)) self.assertEqual((0, 122 + 123), generator.offsets[0]) self.assertEqual((offset2_start, offset2_start + 124), generator.offsets[1]) @tempdir() def test_determine_offsets_skip_multiple_files_and_last_file_if_no_candidates_found( self, temp_dir): temp_dir.write( 'a/b1/a.b1.f1', ContentManager.generate_random_binary(125000).getvalue()) temp_dir.write( 'a/b1/a.b1.f2', ContentManager.generate_random_binary(125000).getvalue()) temp_dir.write( 'b/b1/b.b1.f1', ContentManager.generate_random_binary(135000).getvalue()) temp_dir.write('c/c1/c.c1.f1.nfo', ContentManager.generate_random_binary(12).getvalue()) temp_dir.write('c/c1/c.c1.f2.txt', ContentManager.generate_random_binary(12).getvalue()) self.file_finder.cache_files_by_size([temp_dir.path]) paths = [ self.PathAndLength('shouldskip1.txt', 122), self.PathAndLength('path2', 125000), self.PathAndLength('shouldskip2.jpg', 123), self.PathAndLength('path3', 135000), self.PathAndLength('shouldskip3.nfo', 124), self.PathAndLength('shouldskip4.mp3', 23456) ] mock_torrent = MockTorrentFile('name', paths, 1234567) generator = Generator(mock_torrent.meta_info, self.file_finder, None, None) offset2_start = 122 + 125000 offset3_start = 122 + 125000 + 123 + 135000 offset3_end = offset3_start + 124 + 23456 self.assertEqual(3, len(generator.offsets)) self.assertEqual((0, 122), generator.offsets[0]) self.assertEqual((offset2_start, offset2_start + 123), generator.offsets[1]) self.assertEqual((offset3_start, offset3_end), generator.offsets[2]) @tempdir() def test_two_consecutive_not_wanted_files_present_and_nothing_else( self, temp_dir): temp_dir.write('a/b1/a.b1.sfv', ContentManager.generate_random_binary(1).getvalue()) temp_dir.write('a/b1/a.b1.nfo', ContentManager.generate_random_binary(2).getvalue()) self.file_finder.cache_files_by_size([temp_dir.path]) paths = [ self.PathAndLength('shouldskip1.sfv', 1000), self.PathAndLength('shouldskip2.nfo', 2000) ] mock_torrent = MockTorrentFile('name', paths, 123) generator = Generator(mock_torrent.meta_info, self.file_finder, None, None) self.assertEqual(1, len(generator.offsets)) self.assertEqual((0, 3000), generator.offsets[0])