예제 #1
0
    def sync_to_worker_if_possible(self):
        """Syncs the local logdir on driver to worker if possible.

        Requires ray cluster to be started with the autoscaler. Also requires
        rsync to be installed.
        """
        if self.worker_ip == self.local_ip:
            return
        ssh_key = get_ssh_key()
        ssh_user = get_ssh_user()
        global _log_sync_warned
        if ssh_key is None or ssh_user is None:
            if not _log_sync_warned:
                logger.error("Log sync requires cluster to be setup with "
                             "`ray up`.")
                _log_sync_warned = True
            return
        if not distutils.spawn.find_executable("rsync"):
            logger.error("Log sync requires rsync to be installed.")
            return
        source = '{}/'.format(self.local_dir)
        target = '{}@{}:{}/'.format(ssh_user, self.worker_ip, self.local_dir)
        final_cmd = (("""rsync -savz -e "ssh -i {} -o ConnectTimeout=120s """
                      """-o StrictHostKeyChecking=no" {} {}""").format(
                          quote(ssh_key), quote(source), quote(target)))
        logger.info("Syncing results to %s", str(self.worker_ip))
        sync_process = subprocess.Popen(
            final_cmd, shell=True, stdout=self.logfile)
        sync_process.wait()
예제 #2
0
파일: log_sync.py 프로젝트: zhouhh2017/ray
    def sync_to_worker_if_possible(self):
        """Syncs the local logdir on driver to worker if possible.

        Requires ray cluster to be started with the autoscaler. Also requires
        rsync to be installed.
        """
        if self.worker_ip == self.local_ip:
            return
        ssh_key = get_ssh_key()
        ssh_user = get_ssh_user()
        global _log_sync_warned
        if ssh_key is None or ssh_user is None:
            if not _log_sync_warned:
                logger.error("Log sync requires cluster to be setup with "
                             "`ray up`.")
                _log_sync_warned = True
            return
        if not distutils.spawn.find_executable("rsync"):
            logger.error("Log sync requires rsync to be installed.")
            return
        source = "{}/".format(self.local_dir)
        target = "{}@{}:{}/".format(ssh_user, self.worker_ip, self.local_dir)
        final_cmd = (("""rsync -savz -e "ssh -i {} -o ConnectTimeout=120s """
                      """-o StrictHostKeyChecking=no" {} {}""").format(
                          quote(ssh_key), quote(source), quote(target)))
        logger.info("Syncing results to %s", str(self.worker_ip))
        sync_process = subprocess.Popen(final_cmd,
                                        shell=True,
                                        stdout=self.logfile)
        sync_process.wait()
예제 #3
0
    def sync_now(self, force=False):
        self.last_sync_time = time.time()
        if not self.worker_ip:
            logger.debug("Worker ip unknown, skipping log sync for {}".format(
                self.local_dir))
            return

        if self.worker_ip == self.local_ip:
            worker_to_local_sync_cmd = None  # don't need to rsync
        else:
            ssh_key = get_ssh_key()
            ssh_user = get_ssh_user()
            if ssh_key is None or ssh_user is None:
                logger.error("Log sync requires cluster to be setup with "
                             "`ray create_or_update`.")
                return
            if not distutils.spawn.find_executable("rsync"):
                logger.error("Log sync requires rsync to be installed.")
                return
            source = '{}@{}:{}/'.format(ssh_user, self.worker_ip,
                                        self.local_dir)
            target = '{}/'.format(self.local_dir)
            worker_to_local_sync_cmd = ((
                """rsync -savz -e "ssh -i {} -o ConnectTimeout=120s """
                """-o StrictHostKeyChecking=no" {} {}""").format(
                    quote(ssh_key), quote(source), quote(target)))

        if self.remote_dir:
            if self.sync_func:
                local_to_remote_sync_cmd = None
                try:
                    self.sync_func(self.local_dir, self.remote_dir)
                except Exception:
                    logger.exception("Sync function failed.")
            else:
                local_to_remote_sync_cmd = self.get_remote_sync_cmd()
        else:
            local_to_remote_sync_cmd = None

        if self.sync_process:
            self.sync_process.poll()
            if self.sync_process.returncode is None:
                if force:
                    self.sync_process.kill()
                else:
                    logger.warning("Last sync is still in progress, skipping.")
                    return

        if worker_to_local_sync_cmd or local_to_remote_sync_cmd:
            final_cmd = ""
            if worker_to_local_sync_cmd:
                final_cmd += worker_to_local_sync_cmd
            if local_to_remote_sync_cmd:
                if final_cmd:
                    final_cmd += " && "
                final_cmd += local_to_remote_sync_cmd
            logger.debug("Running log sync: {}".format(final_cmd))
            self.sync_process = subprocess.Popen(final_cmd, shell=True)
예제 #4
0
파일: log_sync.py 프로젝트: jamescasbon/ray
    def sync_now(self, force=False):
        self.last_sync_time = time.time()
        if not self.worker_ip:
            logger.debug("Worker ip unknown, skipping log sync for {}".format(
                self.local_dir))
            return

        if self.worker_ip == self.local_ip:
            worker_to_local_sync_cmd = None  # don't need to rsync
        else:
            ssh_key = get_ssh_key()
            ssh_user = get_ssh_user()
            if ssh_key is None or ssh_user is None:
                logger.error("Log sync requires cluster to be setup with "
                             "`ray create_or_update`.")
                return
            if not distutils.spawn.find_executable("rsync"):
                logger.error("Log sync requires rsync to be installed.")
                return
            source = '{}@{}:{}/'.format(ssh_user, self.worker_ip,
                                        self.local_dir)
            target = '{}/'.format(self.local_dir)
            worker_to_local_sync_cmd = ((
                """rsync -savz -e "ssh -i {} -o ConnectTimeout=120s """
                """-o StrictHostKeyChecking=no" {} {}""").format(
                    quote(ssh_key), quote(source), quote(target)))

        if self.remote_dir:
            if self.sync_func:
                local_to_remote_sync_cmd = None
                try:
                    self.sync_func(self.local_dir, self.remote_dir)
                except Exception:
                    logger.exception("Sync function failed.")
            else:
                local_to_remote_sync_cmd = self.get_remote_sync_cmd()
        else:
            local_to_remote_sync_cmd = None

        if self.sync_process:
            self.sync_process.poll()
            if self.sync_process.returncode is None:
                if force:
                    self.sync_process.kill()
                else:
                    logger.warning("Last sync is still in progress, skipping.")
                    return

        if worker_to_local_sync_cmd or local_to_remote_sync_cmd:
            final_cmd = ""
            if worker_to_local_sync_cmd:
                final_cmd += worker_to_local_sync_cmd
            if local_to_remote_sync_cmd:
                if final_cmd:
                    final_cmd += " && "
                final_cmd += local_to_remote_sync_cmd
            logger.debug("Running log sync: {}".format(final_cmd))
            self.sync_process = subprocess.Popen(final_cmd, shell=True)
예제 #5
0
    def sync_now(self, force=False):
        self.last_sync_time = time.time()
        if not self.worker_ip:
            print("Worker ip unknown, skipping log sync for {}".format(
                self.local_dir))
            return

        if self.worker_ip == self.local_ip:
            worker_to_local_sync_cmd = None  # don't need to rsync
        else:
            ssh_key = get_ssh_key()
            ssh_user = get_ssh_user()
            if ssh_key is None or ssh_user is None:
                print("Error: log sync requires cluster to be setup with "
                      "`ray create_or_update`.")
                return
            if not distutils.spawn.find_executable("rsync"):
                print("Error: log sync requires rsync to be installed.")
                return
            worker_to_local_sync_cmd = ((
                """rsync -avz -e "ssh -i {} -o ConnectTimeout=120s """
                """-o StrictHostKeyChecking=no" '{}@{}:{}/' '{}/'""").format(
                    quote(ssh_key), ssh_user, self.worker_ip,
                    quote(self.local_dir), quote(self.local_dir)))

        if self.remote_dir:
            if self.remote_dir.startswith(S3_PREFIX):
                local_to_remote_sync_cmd = ("aws s3 sync {} {}".format(
                    quote(self.local_dir), quote(self.remote_dir)))
            elif self.remote_dir.startswith(GCS_PREFIX):
                local_to_remote_sync_cmd = ("gsutil rsync -r {} {}".format(
                    quote(self.local_dir), quote(self.remote_dir)))
        else:
            local_to_remote_sync_cmd = None

        if self.sync_process:
            self.sync_process.poll()
            if self.sync_process.returncode is None:
                if force:
                    self.sync_process.kill()
                else:
                    print("Warning: last sync is still in progress, skipping")
                    return

        if worker_to_local_sync_cmd or local_to_remote_sync_cmd:
            final_cmd = ""
            if worker_to_local_sync_cmd:
                final_cmd += worker_to_local_sync_cmd
            if local_to_remote_sync_cmd:
                if final_cmd:
                    final_cmd += " && "
                final_cmd += local_to_remote_sync_cmd
            print("Running log sync: {}".format(final_cmd))
            self.sync_process = subprocess.Popen(final_cmd, shell=True)
예제 #6
0
 def _remote_path(self):
     ssh_user = get_ssh_user()
     global _log_sync_warned
     if not self.has_remote_target():
         return None
     if ssh_user is None:
         if not _log_sync_warned:
             logger.error("Syncer requires cluster to be setup with `ray up`.")
             _log_sync_warned = True
         return None
     return "{}@{}:{}/".format(ssh_user, self.worker_ip, self._remote_dir)
예제 #7
0
파일: log_sync.py 프로젝트: x-malet/ray
 def _remote_path(self):
     ssh_user = get_ssh_user()
     global _log_sync_warned
     if not self._check_valid_worker_ip():
         return
     if ssh_user is None:
         if not _log_sync_warned:
             logger.error("Log sync requires cluster to be setup with "
                          "`ray up`.")
             _log_sync_warned = True
         return
     return "{}@{}:{}/".format(ssh_user, self.worker_ip, self._remote_dir)
예제 #8
0
 def _remote_path(self) -> Optional[Union[str, Tuple[str, str]]]:
     ssh_user = get_ssh_user()
     global _log_sync_warned
     if not self.has_remote_target():
         return None
     if ssh_user is None:
         if not _log_sync_warned:
             logger.error("Syncer requires cluster to be setup with `ray up`.")
             _log_sync_warned = True
         return None
     if self._pass_ip_path_tuples:
         return self.worker_ip, self._remote_dir
     return "{}@{}:{}/".format(ssh_user, self.worker_ip, self._remote_dir)