Beispiel #1
0
def init_resource(config=None):
    """Initialize NPU resource"""
    if (not isinstance(config, config_pb2.ConfigProto)) or (not issubclass(type(config), config_pb2.ConfigProto)):
        config = config_pb2.ConfigProto()

    npu_optimizer = None
    for custom_optimizer in config.graph_options.rewrite_options.custom_optimizers:
        if custom_optimizer.name == 'NpuOptimizer':
            npu_optimizer = custom_optimizer
            break
    if not npu_optimizer:
        npu_optimizer = config.graph_options.rewrite_options.custom_optimizers.add()
        npu_optimizer.name = 'NpuOptimizer'
        config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF

    config.allow_soft_placement = True
    config.log_device_placement = False
    config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
    config.graph_options.optimizer_options.global_jit_level = config_pb2.OptimizerOptions.OFF

    util.global_dict_init()
    npu_init = npu_ops.initialize_system()
    npu_shutdown = npu_ops.shutdown_system()

    sess = session.Session(config=config)
    sess.run(npu_init)
    npu_rank_id = get_rank_id()
    npu_local_rank_id = get_local_rank_id()
    npu_rank_size = get_rank_size()
    npu_local_rank_size = get_local_rank_size()
    util.set_value("npu_rank_id", npu_rank_id)
    util.set_value("npu_local_rank_id", npu_local_rank_id)
    util.set_value("npu_rank_size", npu_rank_size)
    util.set_value("npu_local_rank_size", npu_local_rank_size)
    return sess, npu_shutdown
Beispiel #2
0
 def _init_distributed_setting(self):
     if not self.distributed:
         return
     self._world_size = hvd.size() if zeus.is_gpu_device(
     ) else get_rank_size()
     self._rank_id = hvd.rank() if zeus.is_gpu_device() else get_rank_id()
     self._local_rank_id = hvd.local_rank() if zeus.is_gpu_device(
     ) else get_local_rank_id()
Beispiel #3
0
 def _init_distributed_setting(self):
     if not self.distributed:
         return
     if zeus.is_npu_device():
         self.npu_init = npu_ops.initialize_system()
         self.npu_shutdown = npu_ops.shutdown_system()
         self.sess.run(self.npu_init)
     self._world_size = hvd.size() if zeus.is_gpu_device() else get_rank_size()
     self._rank_id = hvd.rank() if zeus.is_gpu_device() else get_rank_id()
     self._local_rank_id = hvd.local_rank() if zeus.is_gpu_device() else get_local_rank_id()
Beispiel #4
0
def init_resource():
    util.global_dict_init()
    npu_init = npu_ops.initialize_system()
    npu_shutdown = npu_ops.shutdown_system()
    config = config_pb2.ConfigProto(allow_soft_placement=True,
                                    log_device_placement=False)
    custom_op = config.graph_options.rewrite_options.custom_optimizers.add()
    custom_op.name = "NpuOptimizer"
    config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
    sess = session.Session(config=config)
    sess.run(npu_init)
    npu_rank_id = get_rank_id()
    npu_local_rank_id = get_local_rank_id()
    npu_rank_size = get_rank_size()
    util.set_value("npu_rank_id", npu_rank_id)
    util.set_value("npu_local_rank_id", npu_local_rank_id)
    util.set_value("npu_rank_size", npu_rank_size)
    return sess, npu_shutdown
Beispiel #5
0
    def _init_distributed_setting(self):
        if not self.distributed:
            return

        if zeus.is_npu_device():
            from npu_bridge.estimator import npu_ops
            self.npu_init = npu_ops.initialize_system()
            self.npu_shutdown = npu_ops.shutdown_system()
            self.sess.run(self.npu_init)

        import horovod.tensorflow as hvd
        if zeus.is_gpu_device():
            self._world_size = hvd.size()
            self._rank_id = hvd.rank()
            self._local_rank_id = hvd.local_rank()
        elif zeus.is_npu_device():
            from hccl.manage.api import get_local_rank_id
            from hccl.manage.api import get_rank_size
            from hccl.manage.api import get_rank_id
            self._world_size = get_rank_size()
            self._rank_id = get_rank_id()
            self._local_rank_id = get_local_rank_id()
Beispiel #6
0
 def _experimental_distribute_dataset(self, dataset):
     return dataset.shard(get_rank_size(), get_rank_id())