def init_resource(config=None): """Initialize NPU resource""" if (not isinstance(config, config_pb2.ConfigProto)) or (not issubclass(type(config), config_pb2.ConfigProto)): config = config_pb2.ConfigProto() npu_optimizer = None for custom_optimizer in config.graph_options.rewrite_options.custom_optimizers: if custom_optimizer.name == 'NpuOptimizer': npu_optimizer = custom_optimizer break if not npu_optimizer: npu_optimizer = config.graph_options.rewrite_options.custom_optimizers.add() npu_optimizer.name = 'NpuOptimizer' config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF config.allow_soft_placement = True config.log_device_placement = False config.graph_options.rewrite_options.remapping = RewriterConfig.OFF config.graph_options.optimizer_options.global_jit_level = config_pb2.OptimizerOptions.OFF util.global_dict_init() npu_init = npu_ops.initialize_system() npu_shutdown = npu_ops.shutdown_system() sess = session.Session(config=config) sess.run(npu_init) npu_rank_id = get_rank_id() npu_local_rank_id = get_local_rank_id() npu_rank_size = get_rank_size() npu_local_rank_size = get_local_rank_size() util.set_value("npu_rank_id", npu_rank_id) util.set_value("npu_local_rank_id", npu_local_rank_id) util.set_value("npu_rank_size", npu_rank_size) util.set_value("npu_local_rank_size", npu_local_rank_size) return sess, npu_shutdown
def _init_distributed_setting(self): if not self.distributed: return self._world_size = hvd.size() if zeus.is_gpu_device( ) else get_rank_size() self._rank_id = hvd.rank() if zeus.is_gpu_device() else get_rank_id() self._local_rank_id = hvd.local_rank() if zeus.is_gpu_device( ) else get_local_rank_id()
def _init_distributed_setting(self): if not self.distributed: return if zeus.is_npu_device(): self.npu_init = npu_ops.initialize_system() self.npu_shutdown = npu_ops.shutdown_system() self.sess.run(self.npu_init) self._world_size = hvd.size() if zeus.is_gpu_device() else get_rank_size() self._rank_id = hvd.rank() if zeus.is_gpu_device() else get_rank_id() self._local_rank_id = hvd.local_rank() if zeus.is_gpu_device() else get_local_rank_id()
def init_resource(): util.global_dict_init() npu_init = npu_ops.initialize_system() npu_shutdown = npu_ops.shutdown_system() config = config_pb2.ConfigProto(allow_soft_placement=True, log_device_placement=False) custom_op = config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = "NpuOptimizer" config.graph_options.rewrite_options.remapping = RewriterConfig.OFF sess = session.Session(config=config) sess.run(npu_init) npu_rank_id = get_rank_id() npu_local_rank_id = get_local_rank_id() npu_rank_size = get_rank_size() util.set_value("npu_rank_id", npu_rank_id) util.set_value("npu_local_rank_id", npu_local_rank_id) util.set_value("npu_rank_size", npu_rank_size) return sess, npu_shutdown
def _init_distributed_setting(self): if not self.distributed: return if zeus.is_npu_device(): from npu_bridge.estimator import npu_ops self.npu_init = npu_ops.initialize_system() self.npu_shutdown = npu_ops.shutdown_system() self.sess.run(self.npu_init) import horovod.tensorflow as hvd if zeus.is_gpu_device(): self._world_size = hvd.size() self._rank_id = hvd.rank() self._local_rank_id = hvd.local_rank() elif zeus.is_npu_device(): from hccl.manage.api import get_local_rank_id from hccl.manage.api import get_rank_size from hccl.manage.api import get_rank_id self._world_size = get_rank_size() self._rank_id = get_rank_id() self._local_rank_id = get_local_rank_id()
def _experimental_distribute_dataset(self, dataset): return dataset.shard(get_rank_size(), get_rank_id())