def load(self, model_path, batch_size=0): """ Load an openVINO model. :param model_path: String. The file path to the OpenVINO IR xml file. :param batch_size: Int. Set batch Size, default is 0 (use default batch size). :return: """ self.node_num, self.core_num = get_node_and_core_number() self.path = model_path if batch_size != 0: self.batch_size = batch_size else: import xml.etree.ElementTree as ET tree = ET.parse(model_path) root = tree.getroot() shape_item = root.find('./layers/layer/output/port/dim[1]') if shape_item is None: raise ValueError( "Invalid openVINO IR xml file, please check your model_path" ) self.batch_size = int(shape_item.text) self.model = InferenceModel(supported_concurrent_num=self.core_num) self.model.load_openvino( model_path=model_path, weight_path=model_path[:model_path.rindex(".")] + ".bin", batch_size=batch_size)
def partition(data, num_shards=None): """ Partition local in memory data and form a SparkXShards :param data: np.ndarray, a tuple, list, dict of np.ndarray, or a nested structure made of tuple, list, dict with ndarray as the leaf value :param num_shards: the number of shards that the data will be partitioned into :return: a SparkXShards """ sc = init_nncontext() node_num, core_num = get_node_and_core_number() shard_num = node_num * core_num if num_shards is None else num_shards import numpy as np type_err_msg = """ The types supported in zoo.orca.data.XShards.partition are 1. np.ndarray 2. a tuple, list, dict of np.ndarray 3. nested structure made of tuple, list, dict with ndarray as the leaf value But got data of type {} """.format(type(data)) supported_types = {list, tuple, dict} if isinstance(data, np.ndarray): if data.shape[0] < shard_num: raise ValueError( "The length of data {} is smaller than the total number " "of shards {}. Please adjust the num_shards option to be " "at most {}.".format(data.shape[0], shard_num, data.shape[0])) arrays = np.array_split(data, shard_num) rdd = sc.parallelize(arrays) else: assert type(data) in supported_types, type_err_msg flattened = nest.flatten(data) data_length = len(flattened[0]) data_to_be_shard = [] if data_length < shard_num: raise ValueError( "The length of data {} is smaller than the total number " "of shards {}. Please adjust the num_shards option to be " "at most {}.".format(data_length, shard_num, data_length)) for i in range(shard_num): data_to_be_shard.append([]) for x in flattened: assert len(x) == data_length, \ "the ndarrays in data must all have the same size in first dimension, " \ "got first ndarray of size {} and another {}".format(data_length, len(x)) x_parts = np.array_split(x, shard_num) for idx, x_part in enumerate(x_parts): data_to_be_shard[idx].append(x_part) data_to_be_shard = [ nest.pack_sequence_as(data, shard) for shard in data_to_be_shard ] rdd = sc.parallelize(data_to_be_shard) data_shards = SparkXShards(rdd) return data_shards
def __init__(self, dataset, batch_size, batch_per_thread, validation_dataset=None, intra_threads=None, inter_threads=None): node_num, core_num = get_node_and_core_number() self.intra_threads = intra_threads self.inter_threads = inter_threads if intra_threads is None: self.intra_threads = core_num if inter_threads is None: self.inter_threads = 1 if batch_size > 0: num_parts = dataset.xshards.num_partitions() if num_parts != node_num: dataset.xshards = dataset.xshards.repartition(node_num) assert batch_size % node_num == 0, \ "batch_size should be a multiple of num_shards, got" \ " batch_size {}, node_num {}".format(batch_size, node_num) batch_per_shard = batch_size // node_num self.drop_remainder = True elif batch_per_thread > 0: batch_per_shard = batch_per_thread self.drop_remainder = False else: raise ValueError("one of batch_size or batch_per_thread must be larger than 0") self.rdd = dataset.as_graph_rdd(batch_per_shard, drop_remainder=self.drop_remainder).cache() meta_info = self.rdd.map(lambda x: x[1]).first() tensor_structure = meta_info["tensor_structure"] self.init_op_name = meta_info["init_op_name"] self.output_names = meta_info["output_names"] self.output_types = meta_info["output_types"] self.table_init_op = meta_info["table_init_op"] if validation_dataset is not None: self.val_rdd = validation_dataset.as_graph_rdd(batch_per_shard, False).cache() meta_info = self.val_rdd.map(lambda x: x[1]).first() self.val_init_op_name = meta_info["init_op_name"] self.val_output_names = meta_info["output_names"] self.val_output_types = meta_info["output_types"] else: self.val_rdd = None self.val_init_op_name = None self.val_output_names = None self.val_output_types = None super().__init__(tensor_structure, batch_size=batch_size, batch_per_thread=batch_per_thread, hard_code_batch_size=False) self.shard_index_op_name = None self.validation_dataset = validation_dataset
def load(self, model_path): """ Load an openVINO model. :param model_path: String. The file path to the OpenVINO IR xml file. :return: """ self.node_num, self.core_num = get_node_and_core_number() assert isinstance(model_path, str), "The model_path should be string." assert os.path.exists(model_path), "The model_path should be exist." with open(model_path, 'rb') as file: self.model_bytes = file.read() with open(model_path[:model_path.rindex(".")] + ".bin", 'rb') as file: self.weight_bytes = file.read()
def partition(data): """ Partition local in memory data and form a SparkXShards :param data: np.ndarray, a tuple, list, dict of np.ndarray, or a nested structure made of tuple, list, dict with ndarray as the leaf value :return: a SparkXShards """ sc = init_nncontext() node_num, core_num = get_node_and_core_number() total_core_num = node_num * core_num import numpy as np type_err_msg = """ The types supported in zoo.orca.data.XShards.partition are 1. np.ndarray 2. a tuple, list, dict of np.ndarray 3. nested structure made of tuple, list, dict with ndarray as the leaf value But got data of type {} """.format(type(data)) supported_types = {list, tuple, dict} if isinstance(data, np.ndarray): arrays = np.array_split(data, total_core_num) rdd = sc.parallelize(arrays) else: assert type(data) in supported_types, type_err_msg flattened = nest.flatten(data) data_length = len(flattened[0]) data_to_be_shard = [] for i in range(total_core_num): data_to_be_shard.append([]) for x in flattened: assert len(x) == data_length, \ "the ndarrays in data must all have the same size in first dimension, " \ "got first ndarray of size {} and another {}".format(data_length, len(x)) x_parts = np.array_split(x, total_core_num) for idx, x_part in enumerate(x_parts): data_to_be_shard[idx].append(x_part) data_to_be_shard = [ nest.pack_sequence_as(data, shard) for shard in data_to_be_shard ] rdd = sc.parallelize(data_to_be_shard) data_shards = SparkXShards(rdd) return data_shards
def __init__(self, *, model_path, batch_size=0): self.node_num, self.core_num = get_node_and_core_number() self.path = model_path if batch_size != 0: self.batch_size = batch_size else: import xml.etree.ElementTree as ET tree = ET.parse(model_path) root = tree.getroot() shape_item = root.find('./layers/layer/output/port/dim[1]') if shape_item is None: raise ValueError( "Invalid openVINO IR xml file, please check your model_path" ) self.batch_size = int(shape_item.text) self.model = InferenceModel(supported_concurrent_num=self.core_num) self.model.load_openvino( model_path=model_path, weight_path=model_path[:model_path.rindex(".")] + ".bin", batch_size=batch_size)