Example #1
0
File: syncer.py Project: alipay/ray
def get_node_syncer(
    local_dir: str,
    remote_dir: Optional[str] = None,
    sync_function: Optional[Union[Callable, str, bool, Type[Syncer]]] = None,
):
    """Returns a NodeSyncer.

    Args:
        local_dir: Source directory for syncing.
        remote_dir: Target directory for syncing. If not provided, a
            noop Syncer is returned.
        sync_function: Function for syncing the local_dir to
            remote_dir. If string, then it must be a string template for
            syncer to run. If True or not provided, it defaults rsync
            (if available) or otherwise remote-task based syncing. If
            False, a noop Syncer is returned.
    """
    if sync_function == "auto":
        sync_function = None  # Auto-detect

    key = (local_dir, remote_dir)
    if key in _syncers:
        # Get cached syncer
        return _syncers[key]
    elif isclass(sync_function) and issubclass(sync_function, Syncer):
        # Type[Syncer]
        _syncers[key] = sync_function(local_dir, remote_dir, None)
        return _syncers[key]
    elif not remote_dir or sync_function is False:
        # Do not sync trials if no remote dir specified or syncer=False
        sync_client = NOOP
    elif sync_function and sync_function is not True:
        # String or callable (for function syncers)
        sync_client = get_sync_client(sync_function)
    else:
        # sync_function=True or sync_function=None --> default
        rsync_function_str = get_rsync_template_if_available()
        if rsync_function_str:
            sync_client = CommandBasedClient(rsync_function_str,
                                             rsync_function_str)
            sync_client.set_logdir(local_dir)
        else:
            sync_client = RemoteTaskClient()

    _syncers[key] = NodeSyncer(local_dir, remote_dir, sync_client)
    return _syncers[key]
Example #2
0
    def testSyncRemoteTaskOnlyDifferences(self):
        """Tests the RemoteTaskClient sync client.

        In this test we generate a directory with multiple files.
        We then use both ``sync_down`` and ``sync_up`` to synchronize
        these to different directories (on the same node). We then assert
        that the files have been transferred correctly.

        We then edit one of the files and add another one. We then sync
        up/down again. In this sync, we assert that only modified and new
        files are transferred.
        """
        temp_source = tempfile.mkdtemp()
        temp_up_target = tempfile.mkdtemp()
        temp_down_target = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, temp_source)
        self.addCleanup(shutil.rmtree, temp_up_target)
        self.addCleanup(shutil.rmtree, temp_down_target)

        os.makedirs(os.path.join(temp_source, "A", "a1"))
        os.makedirs(os.path.join(temp_source, "A", "a2"))
        os.makedirs(os.path.join(temp_source, "B", "b1"))
        with open(os.path.join(temp_source, "level_0.txt"), "wt") as fp:
            fp.write("Level 0\n")
        with open(os.path.join(temp_source, "A", "level_a1.txt"), "wt") as fp:
            fp.write("Level A1\n")
        with open(os.path.join(temp_source, "A", "a1", "level_a2.txt"),
                  "wt") as fp:
            fp.write("Level A2\n")
        with open(os.path.join(temp_source, "B", "level_b1.txt"), "wt") as fp:
            fp.write("Level B1\n")

        this_node_ip = ray.util.get_node_ip_address()

        # Sync everything up
        client = RemoteTaskClient(_store_remotes=True)
        client.sync_up(source=temp_source,
                       target=(this_node_ip, temp_up_target))
        client.wait()

        # Assume that we synced everything up to second level
        self.assertTrue(
            os.path.exists(
                os.path.join(temp_up_target, "A", "a1", "level_a2.txt")),
            msg=f"Contents: {os.listdir(temp_up_target)}",
        )
        with open(os.path.join(temp_up_target, "A", "a1", "level_a2.txt"),
                  "rt") as fp:
            self.assertEqual(fp.read(), "Level A2\n")

        # Sync everything down
        client.sync_down(source=(this_node_ip, temp_source),
                         target=temp_down_target)
        client.wait()

        # Assume that we synced everything up to second level
        self.assertTrue(
            os.path.exists(
                os.path.join(temp_down_target, "A", "a1", "level_a2.txt")),
            msg=f"Contents: {os.listdir(temp_down_target)}",
        )
        with open(os.path.join(temp_down_target, "A", "a1", "level_a2.txt"),
                  "rt") as fp:
            self.assertEqual(fp.read(), "Level A2\n")

        # Now, edit some stuff in our source. Then confirm only these
        # edited files are synced
        with open(os.path.join(temp_source, "A", "a1", "level_a2.txt"),
                  "wt") as fp:
            fp.write("Level X2\n")  # Same length
        with open(os.path.join(temp_source, "A", "level_a1x.txt"), "wt") as fp:
            fp.write("Level A1X\n")  # New file

        # Sync up
        client.sync_up(source=temp_source,
                       target=(this_node_ip, temp_up_target))

        # Hi-jack futures
        files_stats = ray.get(client._stored_files_stats)
        tarball = ray.get(client._stored_pack_actor_ref.get_full_data.remote())
        client.wait()

        # Existing file should have new content
        with open(os.path.join(temp_up_target, "A", "a1", "level_a2.txt"),
                  "rt") as fp:
            self.assertEqual(fp.read(), "Level X2\n")

        # New file should be there
        with open(os.path.join(temp_up_target, "A", "level_a1x.txt"),
                  "rt") as fp:
            self.assertEqual(fp.read(), "Level A1X\n")

        # Old file should be there
        with open(os.path.join(temp_up_target, "B", "level_b1.txt"),
                  "rt") as fp:
            self.assertEqual(fp.read(), "Level B1\n")

        # In the target dir, level_a1x was not contained
        self.assertIn(os.path.join("A", "a1", "level_a2.txt"), files_stats)
        self.assertNotIn(os.path.join("A", "level_a1x.txt"), files_stats)

        # Inspect tarball
        with tarfile.open(fileobj=io.BytesIO(tarball)) as tar:
            files_in_tar = tar.getnames()
            self.assertIn(os.path.join("A", "a1", "level_a2.txt"),
                          files_in_tar)
            self.assertIn(os.path.join("A", "level_a1x.txt"), files_in_tar)
            self.assertNotIn(os.path.join("A", "level_a1.txt"), files_in_tar)
            # 6 directories (including root) + 2 files
            self.assertEqual(len(files_in_tar), 8, msg=str(files_in_tar))

        # Sync down
        client.sync_down(source=(this_node_ip, temp_source),
                         target=temp_down_target)

        # Hi-jack futures
        files_stats = ray.get(client._stored_files_stats)
        tarball = ray.get(client._stored_pack_actor_ref.get_full_data.remote())
        client.wait()

        # Existing file should have new content
        with open(os.path.join(temp_down_target, "A", "a1", "level_a2.txt"),
                  "rt") as fp:
            self.assertEqual(fp.read(), "Level X2\n")

        # New file should be there
        with open(os.path.join(temp_down_target, "A", "level_a1x.txt"),
                  "rt") as fp:
            self.assertEqual(fp.read(), "Level A1X\n")

        # Old file should be there
        with open(os.path.join(temp_down_target, "B", "level_b1.txt"),
                  "rt") as fp:
            self.assertEqual(fp.read(), "Level B1\n")

        # In the target dir, level_a1x was not contained
        self.assertIn(os.path.join("A", "a1", "level_a2.txt"), files_stats)
        self.assertNotIn(os.path.join("A", "level_a1x.txt"), files_stats)

        # Inspect tarball
        with tarfile.open(fileobj=io.BytesIO(tarball)) as tar:
            files_in_tar = tar.getnames()
            self.assertIn(os.path.join("A", "a1", "level_a2.txt"),
                          files_in_tar)
            self.assertIn(os.path.join("A", "level_a1x.txt"), files_in_tar)
            self.assertNotIn(os.path.join("A", "level_a1.txt"), files_in_tar)
            # 6 directories (including root) + 2 files
            self.assertEqual(len(files_in_tar), 8, msg=str(files_in_tar))