def get_node_syncer( local_dir: str, remote_dir: Optional[str] = None, sync_function: Optional[Union[Callable, str, bool, Type[Syncer]]] = None, ): """Returns a NodeSyncer. Args: local_dir: Source directory for syncing. remote_dir: Target directory for syncing. If not provided, a noop Syncer is returned. sync_function: Function for syncing the local_dir to remote_dir. If string, then it must be a string template for syncer to run. If True or not provided, it defaults rsync (if available) or otherwise remote-task based syncing. If False, a noop Syncer is returned. """ if sync_function == "auto": sync_function = None # Auto-detect key = (local_dir, remote_dir) if key in _syncers: # Get cached syncer return _syncers[key] elif isclass(sync_function) and issubclass(sync_function, Syncer): # Type[Syncer] _syncers[key] = sync_function(local_dir, remote_dir, None) return _syncers[key] elif not remote_dir or sync_function is False: # Do not sync trials if no remote dir specified or syncer=False sync_client = NOOP elif sync_function and sync_function is not True: # String or callable (for function syncers) sync_client = get_sync_client(sync_function) else: # sync_function=True or sync_function=None --> default rsync_function_str = get_rsync_template_if_available() if rsync_function_str: sync_client = CommandBasedClient(rsync_function_str, rsync_function_str) sync_client.set_logdir(local_dir) else: sync_client = RemoteTaskClient() _syncers[key] = NodeSyncer(local_dir, remote_dir, sync_client) return _syncers[key]
def testSyncRemoteTaskOnlyDifferences(self): """Tests the RemoteTaskClient sync client. In this test we generate a directory with multiple files. We then use both ``sync_down`` and ``sync_up`` to synchronize these to different directories (on the same node). We then assert that the files have been transferred correctly. We then edit one of the files and add another one. We then sync up/down again. In this sync, we assert that only modified and new files are transferred. """ temp_source = tempfile.mkdtemp() temp_up_target = tempfile.mkdtemp() temp_down_target = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, temp_source) self.addCleanup(shutil.rmtree, temp_up_target) self.addCleanup(shutil.rmtree, temp_down_target) os.makedirs(os.path.join(temp_source, "A", "a1")) os.makedirs(os.path.join(temp_source, "A", "a2")) os.makedirs(os.path.join(temp_source, "B", "b1")) with open(os.path.join(temp_source, "level_0.txt"), "wt") as fp: fp.write("Level 0\n") with open(os.path.join(temp_source, "A", "level_a1.txt"), "wt") as fp: fp.write("Level A1\n") with open(os.path.join(temp_source, "A", "a1", "level_a2.txt"), "wt") as fp: fp.write("Level A2\n") with open(os.path.join(temp_source, "B", "level_b1.txt"), "wt") as fp: fp.write("Level B1\n") this_node_ip = ray.util.get_node_ip_address() # Sync everything up client = RemoteTaskClient(_store_remotes=True) client.sync_up(source=temp_source, target=(this_node_ip, temp_up_target)) client.wait() # Assume that we synced everything up to second level self.assertTrue( os.path.exists( os.path.join(temp_up_target, "A", "a1", "level_a2.txt")), msg=f"Contents: {os.listdir(temp_up_target)}", ) with open(os.path.join(temp_up_target, "A", "a1", "level_a2.txt"), "rt") as fp: self.assertEqual(fp.read(), "Level A2\n") # Sync everything down client.sync_down(source=(this_node_ip, temp_source), target=temp_down_target) client.wait() # Assume that we synced everything up to second level self.assertTrue( os.path.exists( os.path.join(temp_down_target, "A", "a1", "level_a2.txt")), msg=f"Contents: {os.listdir(temp_down_target)}", ) with open(os.path.join(temp_down_target, "A", "a1", "level_a2.txt"), "rt") as fp: self.assertEqual(fp.read(), "Level A2\n") # Now, edit some stuff in our source. Then confirm only these # edited files are synced with open(os.path.join(temp_source, "A", "a1", "level_a2.txt"), "wt") as fp: fp.write("Level X2\n") # Same length with open(os.path.join(temp_source, "A", "level_a1x.txt"), "wt") as fp: fp.write("Level A1X\n") # New file # Sync up client.sync_up(source=temp_source, target=(this_node_ip, temp_up_target)) # Hi-jack futures files_stats = ray.get(client._stored_files_stats) tarball = ray.get(client._stored_pack_actor_ref.get_full_data.remote()) client.wait() # Existing file should have new content with open(os.path.join(temp_up_target, "A", "a1", "level_a2.txt"), "rt") as fp: self.assertEqual(fp.read(), "Level X2\n") # New file should be there with open(os.path.join(temp_up_target, "A", "level_a1x.txt"), "rt") as fp: self.assertEqual(fp.read(), "Level A1X\n") # Old file should be there with open(os.path.join(temp_up_target, "B", "level_b1.txt"), "rt") as fp: self.assertEqual(fp.read(), "Level B1\n") # In the target dir, level_a1x was not contained self.assertIn(os.path.join("A", "a1", "level_a2.txt"), files_stats) self.assertNotIn(os.path.join("A", "level_a1x.txt"), files_stats) # Inspect tarball with tarfile.open(fileobj=io.BytesIO(tarball)) as tar: files_in_tar = tar.getnames() self.assertIn(os.path.join("A", "a1", "level_a2.txt"), files_in_tar) self.assertIn(os.path.join("A", "level_a1x.txt"), files_in_tar) self.assertNotIn(os.path.join("A", "level_a1.txt"), files_in_tar) # 6 directories (including root) + 2 files self.assertEqual(len(files_in_tar), 8, msg=str(files_in_tar)) # Sync down client.sync_down(source=(this_node_ip, temp_source), target=temp_down_target) # Hi-jack futures files_stats = ray.get(client._stored_files_stats) tarball = ray.get(client._stored_pack_actor_ref.get_full_data.remote()) client.wait() # Existing file should have new content with open(os.path.join(temp_down_target, "A", "a1", "level_a2.txt"), "rt") as fp: self.assertEqual(fp.read(), "Level X2\n") # New file should be there with open(os.path.join(temp_down_target, "A", "level_a1x.txt"), "rt") as fp: self.assertEqual(fp.read(), "Level A1X\n") # Old file should be there with open(os.path.join(temp_down_target, "B", "level_b1.txt"), "rt") as fp: self.assertEqual(fp.read(), "Level B1\n") # In the target dir, level_a1x was not contained self.assertIn(os.path.join("A", "a1", "level_a2.txt"), files_stats) self.assertNotIn(os.path.join("A", "level_a1x.txt"), files_stats) # Inspect tarball with tarfile.open(fileobj=io.BytesIO(tarball)) as tar: files_in_tar = tar.getnames() self.assertIn(os.path.join("A", "a1", "level_a2.txt"), files_in_tar) self.assertIn(os.path.join("A", "level_a1x.txt"), files_in_tar) self.assertNotIn(os.path.join("A", "level_a1.txt"), files_in_tar) # 6 directories (including root) + 2 files self.assertEqual(len(files_in_tar), 8, msg=str(files_in_tar))