Esempio n. 1
0
    def setup_distributed(self, mode, cluster):
        """Sets up TensorFLow distributed environment and initializes the model.
        Args:
            urls (str): the URLs that each node uses to connect.
            world_rank (int): the index of the runner.
            world_size (int): the total number of runners.
        """
        self.cluster = cluster
        tc = BarrierTaskContext().get()
        self.rank = self._get_rank(cluster, tc)
        print("cluster is: ", cluster)

        import os
        os.environ["TF_CONFIG"] = json.dumps({
            'cluster': {
                'worker': cluster
            },
            'task': {
                'type': 'worker',
                'index': self.rank
            }
        })
        ips = set([node.split(":")[0] for node in cluster])
        os.environ["no_proxy"] = ",".join(ips)

        self.strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
        )

        # For use in model.evaluate()
        self.local_model = None
        self.backend = "tf-distributed"
  def _run(it):
    from pyspark import BarrierTaskContext

    for i in it:
      worker_num = i

    if use_barrier:
      # use BarrierTaskContext to get placement of all nodes
      barrier_ctx = BarrierTaskContext.get()
      tasks = barrier_ctx.getTaskInfos()
      nodes = [t.address for t in tasks]
      num_workers = len(nodes)
    else:
      nodes = None
      num_workers = num_executors

    # use the placement info to help allocate GPUs
    # note: defaults to CPU if no GPUs present
    num_gpus = tf_args.num_gpus if 'num_gpus' in tf_args else 1
    util.single_node_env(num_gpus=num_gpus, worker_index=worker_num, nodes=nodes)

    # run the user map_fn
    ctx = TFSparkNode.TFNodeContext()
    ctx.defaultFS = defaultFS
    ctx.worker_num = worker_num
    ctx.executor_id = worker_num
    ctx.num_workers = num_workers

    map_fn(tf_args, ctx)

    # return a dummy iterator (since we have to use mapPartitions)
    return [0]
Esempio n. 3
0
    def _run(it):
        # it为executor的编号 [0 - num_executor-1]
        from pyspark import BarrierTaskContext

        for i in it:
            worker_num = i  # executor编号

        # barrier是2.4 新特性,新引入的调度模型
        # 为了将分布式深度学习嵌入进来
        # barrier stage 会为每个task分配一个id,便于task之间交互
        # use BarrierTaskContext to get placement of all nodes
        ctx = BarrierTaskContext.get()
        tasks = ctx.getTaskInfos()
        nodes = [t.address for t in tasks]

        # use the placement info to help allocate GPUs
        num_gpus = tf_args.num_gpus if 'num_gpus' in tf_args else 1  # 拿出用户指定的每个Task的GPU数量,没有指定默认是1
        # 设置环境变量
        # 并且如果有空闲GPU就为本task设置空闲GPU
        util.single_node_env(num_gpus=num_gpus,
                             worker_index=worker_num,
                             nodes=nodes)

        # run the user map_fn
        ctx = TFSparkNode.TFNodeContext()
        ctx.defaultFS = defaultFS
        ctx.worker_num = worker_num
        ctx.executor_id = worker_num  # executor编号
        ctx.num_workers = len(nodes)  # executor数量

        map_fn(tf_args, ctx)

        # return a dummy iterator (since we have to use mapPartitions)
        return [0]
Esempio n. 4
0
 def f(iterator):
     try:
         taskContext = BarrierTaskContext.get()
     except Exception:
         yield -1
     else:
         yield taskContext.partitionId()
Esempio n. 5
0
        def _start_ray_services(iter):
            tc = BarrierTaskContext.get()
            # The address is sorted by partitionId according to the comments
            # Partition 0 is the Master
            task_addrs = [taskInfo.address for taskInfo in tc.getTaskInfos()]
            print(task_addrs)
            master_ip = task_addrs[0].split(":")[0]
            print("current address {}".format(task_addrs[tc.partitionId()]))
            print("master address {}".format(master_ip))
            redis_address = "{}:{}".format(master_ip, self.redis_port)
            process_info = None
            if tc.partitionId() == 0:
                print("partition id is : {}".format(tc.partitionId()))
                process_info = self._start_ray_node(
                    command=self._gen_master_command(), tag="ray-master")
                process_info.master_addr = redis_address

            tc.barrier()
            if tc.partitionId() != 0:
                print("partition id is : {}".format(tc.partitionId()))
                process_info = self._start_ray_node(
                    command=RayServiceFuncGenerator._get_raylet_command(
                        redis_address=redis_address,
                        ray_exec=self.ray_exec,
                        password=self.password,
                        ray_node_cpu_cores=self.ray_node_cpu_cores,
                        labels=self.labels,
                        object_store_memory=self.object_store_memory,
                        extra_params=self.extra_params),
                    tag="raylet")
            yield process_info
Esempio n. 6
0
 def _start_ray_services(iter):
     tc = BarrierTaskContext.get()
     # The address is sorted by partitionId according to the comments
     # Partition 0 is the Master
     task_addrs = [taskInfo.address for taskInfo in tc.getTaskInfos()]
     print(task_addrs)
     master_ip = task_addrs[0].split(":")[0]
     print("current address {}".format(task_addrs[tc.partitionId()]))
     print("master address {}".format(master_ip))
     redis_address = "{}:{}".format(master_ip, self.redis_port)
     if tc.partitionId() == 0:
         print("partition id is : {}".format(tc.partitionId()))
         process_info = self._start_ray_node(command=self._gen_master_command(),
                                             tag="ray-master",
                                             wait_after=self.waiting_time_sec)
         process_info.master_addr = redis_address
         yield process_info
     else:
         print("partition id is : {}".format(tc.partitionId()))
         process_info = self._start_ray_node(
             command=self._get_raylet_command(redis_address=redis_address),
             tag="raylet",
             wait_before=self.waiting_time_sec)
         yield process_info
     tc.barrier()
Esempio n. 7
0
        def context(iterator):
            tp = TaskContext.get().partitionId()
            try:
                bp = BarrierTaskContext.get().partitionId()
            except Exception:
                bp = -1

            yield (tp, bp, os.getpid())
Esempio n. 8
0
 def info_fn(iter):
     tc = BarrierTaskContext.get()
     task_addrs = [
         taskInfo.address.split(":")[0]
         for taskInfo in tc.getTaskInfos()
     ]
     yield task_addrs
     tc.barrier()
Esempio n. 9
0
        def _train_booster(pandas_df_iter):
            """
            Takes in an RDD partition and outputs a booster for that partition after going through
            the Rabit Ring protocol
            """
            from pyspark import BarrierTaskContext

            context = BarrierTaskContext.get()
            context.barrier()

            if use_gpu:
                booster_params["gpu_id"] = (context.partitionId() if is_local
                                            else _get_gpu_id(context))

            _rabit_args = ""
            if context.partitionId() == 0:
                _rabit_args = str(_get_rabit_args(context, num_workers))

            messages = context.allGather(message=str(_rabit_args))
            _rabit_args = _get_args_from_message_list(messages)
            evals_result = {}
            with RabitContext(_rabit_args, context):
                dtrain, dval = None, []
                if has_validation:
                    dtrain, dval = _convert_partition_data_to_dmatrix(
                        pandas_df_iter,
                        has_weight,
                        has_validation,
                        has_base_margin,
                        dmatrix_kwargs=dmatrix_kwargs,
                    )
                    # TODO: Question: do we need to add dtrain to dval list ?
                    dval = [(dtrain, "training"), (dval, "validation")]
                else:
                    dtrain = _convert_partition_data_to_dmatrix(
                        pandas_df_iter,
                        has_weight,
                        has_validation,
                        has_base_margin,
                        dmatrix_kwargs=dmatrix_kwargs,
                    )

                booster = worker_train(
                    params=booster_params,
                    dtrain=dtrain,
                    evals=dval,
                    evals_result=evals_result,
                    **train_call_kwargs_params,
                )
            context.barrier()

            if context.partitionId() == 0:
                yield pd.DataFrame(
                    data={
                        "config": [booster.save_config()],
                        "booster": [booster.save_raw("json").decode("utf-8")]
                    })
Esempio n. 10
0
        def wrapped_train_fn(_):
            import json
            import logging
            import os
            import socket
            from contextlib import closing
            from pyspark import BarrierTaskContext

            # Sets the TF_CONFIG env var so TF servers
            # can communicate with each other
            def set_tf_config(context):
                addrs = [
                    e.address.split(':')[0] for e in context.getTaskInfos()
                ]
                my_addr = addrs[context.partitionId()]
                with closing(socket.socket(socket.AF_INET,
                                           socket.SOCK_STREAM)) as my_sock:
                    my_sock.bind(('', 0))
                    _, my_port = my_sock.getsockname()
                    my_endpoint = "{}:{}".format(my_addr, my_port)
                    worker_endpoints = context.allGather(my_endpoint)
                cluster = {'worker': worker_endpoints}
                tf_config = {
                    'cluster': cluster,
                    'task': {
                        'type': 'worker',
                        'index': context.partitionId()
                    }
                }
                os.environ['TF_CONFIG'] = json.dumps(tf_config)

            # Sets the CUDA_VISIBLE_DEVICES env var so only
            # the appropriate GPUS are used
            def set_gpus(context):
                gpus_owned = MirroredStrategyRunner._get_gpus_owned(
                    context.resources(), gpu_resource_name)

                my_num_gpus = (num_slots //
                               num_tasks) + (context.partitionId() <
                                             (num_slots % num_tasks))
                gpu_addresses = [
                    str(e) for e in random.sample(gpus_owned, my_num_gpus)
                ]
                logging.info(f'Using GPU addresses: {gpu_addresses}')
                os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(gpu_addresses)

            context = BarrierTaskContext.get()
            if use_gpu:
                set_gpus(context)
            else:
                os.environ['CUDA_VISIBLE_DEVICES'] = ''
            set_tf_config(context)
            result = run_tensorflow_program(train_fn, use_custom_strategy,
                                            **kwargs)
            if context.partitionId() == 0:
                return [result]
            return [None]
Esempio n. 11
0
 def train_fn():
     from pyspark import BarrierTaskContext
     context = BarrierTaskContext.get()
     cuda_state = os.environ['CUDA_VISIBLE_DEVICES']
     if cuda_state:
         num_gpus = len(os.environ['CUDA_VISIBLE_DEVICES'].split(','))
     else:
         num_gpus = 0
     return [int(e) for e in context.allGather(str(num_gpus))]
Esempio n. 12
0
        def _start_ray_services(iter):
            from pyspark import BarrierTaskContext
            from zoo.util.utils import get_node_ip
            tc = BarrierTaskContext.get()
            current_ip = get_node_ip()
            print("current address {}".format(current_ip))
            print("master address {}".format(master_ip))
            redis_address = "{}:{}".format(master_ip, self.redis_port)
            process_info = None
            base_path = tempfile.gettempdir()
            ray_master_flag_path = os.path.join(base_path,
                                                self.ray_master_flag)
            if current_ip == master_ip:  # Start the ray master.
                # It is possible that multiple executors are on one node. In this case,
                # the first executor that gets the lock would be the master and it would
                # create a flag to indicate the master has initialized.
                # The flag file is removed when ray start processes finish so that this
                # won't affect other programs.
                ray_master_lock_path = os.path.join(base_path,
                                                    self.ray_master_lock)
                with filelock.FileLock(ray_master_lock_path):
                    if not os.path.exists(ray_master_flag_path):
                        print("partition id is : {}".format(tc.partitionId()))
                        process_info = self._start_ray_node(
                            command=self._gen_master_command(),
                            tag="ray-master")
                        process_info.master_addr = redis_address
                        os.mknod(ray_master_flag_path)

            tc.barrier()
            if not process_info:  # Start raylets.
                # Add a lock to avoid starting multiple raylets on one node at the same time.
                # See this issue: https://github.com/ray-project/ray/issues/10154
                raylet_lock_path = os.path.join(base_path, self.raylet_lock)
                with filelock.FileLock(raylet_lock_path):
                    print("partition id is : {}".format(tc.partitionId()))
                    process_info = self._start_ray_node(
                        command=RayServiceFuncGenerator._get_raylet_command(
                            redis_address=redis_address,
                            ray_exec=self.ray_exec,
                            password=self.password,
                            ray_node_cpu_cores=self.ray_node_cpu_cores,
                            labels=self.labels,
                            object_store_memory=self.object_store_memory,
                            extra_params=self.extra_params),
                        tag="raylet")
                    kill_redundant_log_monitors(redis_address=redis_address)

            if os.path.exists(ray_master_flag_path):
                os.remove(ray_master_flag_path)
            yield process_info
Esempio n. 13
0
    def test_barrier_infos(self):
        """
        Verify that BarrierTaskContext.getTaskInfos() returns a list of all task infos in the
        barrier stage.
        """
        rdd = self.sc.parallelize(range(10), 4)

        def f(iterator):
            yield sum(iterator)

        taskInfos = rdd.barrier().mapPartitions(f).map(lambda x: BarrierTaskContext.get()
                                                       .getTaskInfos()).collect()
        self.assertTrue(len(taskInfos) == 4)
        self.assertTrue(len(taskInfos[0]) == 4)
Esempio n. 14
0
    def test_barrier_infos(self):
        """
        Verify that BarrierTaskContext.getTaskInfos() returns a list of all task infos in the
        barrier stage.
        """
        rdd = self.sc.parallelize(range(10), 4)

        def f(iterator):
            yield sum(iterator)

        taskInfos = rdd.barrier().mapPartitions(f).map(lambda x: BarrierTaskContext.get()
                                                       .getTaskInfos()).collect()
        self.assertTrue(len(taskInfos) == 4)
        self.assertTrue(len(taskInfos[0]) == 4)
def find_ip_and_port(pre_iter):
    tc = BarrierTaskContext().get()
    address = tc.getTaskInfos()[tc.partitionId()].address.split(":")[0]
    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
        s.bind(("", 0))
        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        tc.barrier()
        free_ip_port = f"{address}:{s.getsockname()[1]}"
    return [free_ip_port]
Esempio n. 16
0
        def _start_ray_services(iter):
            from pyspark import BarrierTaskContext
            tc = BarrierTaskContext.get()
            # The address is sorted by partitionId according to the comments
            # Partition 0 is the Master
            task_addrs = [taskInfo.address for taskInfo in tc.getTaskInfos()]
            print(task_addrs)
            master_ip = task_addrs[0].split(":")[0]
            print("current address {}".format(task_addrs[tc.partitionId()]))
            print("master address {}".format(master_ip))
            redis_address = "{}:{}".format(master_ip, self.redis_port)
            process_info = None
            if tc.partitionId() == 0:
                print("partition id is : {}".format(tc.partitionId()))
                process_info = self._start_ray_node(
                    command=self._gen_master_command(), tag="ray-master")
                process_info.master_addr = redis_address

            tc.barrier()
            if tc.partitionId() != 0:
                import tempfile
                import filelock

                base_path = tempfile.gettempdir()
                lock_path = os.path.join(base_path, "ray_on_spark_start.lock")
                with filelock.FileLock(lock_path):
                    print("partition id is : {}".format(tc.partitionId()))
                    process_info = self._start_ray_node(
                        command=RayServiceFuncGenerator._get_raylet_command(
                            redis_address=redis_address,
                            ray_exec=self.ray_exec,
                            password=self.password,
                            ray_node_cpu_cores=self.ray_node_cpu_cores,
                            labels=self.labels,
                            object_store_memory=self.object_store_memory,
                            extra_params=self.extra_params),
                        tag="raylet")
                    kill_redundant_log_monitors(redis_address=redis_address)

            yield process_info
Esempio n. 17
0
 def context_barrier(x):
     tc = BarrierTaskContext.get()
     time.sleep(random.randint(1, 5) * 2)
     tc.barrier()
     return time.time()
Esempio n. 18
0
 def context_barrier(x):
     tc = BarrierTaskContext.get()
     time.sleep(random.randint(1, 10))
     tc.barrier()
     return time.time()
Esempio n. 19
0
 def context_barrier(x):
     tc = BarrierTaskContext.get()
     time.sleep(random.randint(1, 10))
     tc.barrier()
     return (time.time(), os.getpid())
Esempio n. 20
0
 def context_barrier(x):
     tc = BarrierTaskContext.get()
     time.sleep(random.randint(1, 10))
     out = tc.allGather(str(tc.partitionId()))
     pids = [int(e) for e in out]
     return pids
Esempio n. 21
0
def find_ip_and_port(pre_iter):
    tc = BarrierTaskContext().get()
    free_port = find_free_port(tc)
    return [free_port]