Exemple #1
0
    def load(self, model_path, batch_size=0):
        """
        Load an openVINO model.

        :param model_path: String. The file path to the OpenVINO IR xml file.
        :param batch_size: Int. Set batch Size, default is 0 (use default batch size).
        :return:
        """
        self.node_num, self.core_num = get_node_and_core_number()
        self.path = model_path
        if batch_size != 0:
            self.batch_size = batch_size
        else:
            import xml.etree.ElementTree as ET
            tree = ET.parse(model_path)
            root = tree.getroot()
            shape_item = root.find('./layers/layer/output/port/dim[1]')
            if shape_item is None:
                raise ValueError(
                    "Invalid openVINO IR xml file, please check your model_path"
                )
            self.batch_size = int(shape_item.text)
        self.model = InferenceModel(supported_concurrent_num=self.core_num)
        self.model.load_openvino(
            model_path=model_path,
            weight_path=model_path[:model_path.rindex(".")] + ".bin",
            batch_size=batch_size)
Exemple #2
0
    def partition(data, num_shards=None):
        """
        Partition local in memory data and form a SparkXShards
        :param data: np.ndarray, a tuple, list, dict of np.ndarray, or a nested structure
        made of tuple, list, dict with ndarray as the leaf value
        :param num_shards: the number of shards that the data will be partitioned into
        :return: a SparkXShards
        """
        sc = init_nncontext()
        node_num, core_num = get_node_and_core_number()
        shard_num = node_num * core_num if num_shards is None else num_shards
        import numpy as np
        type_err_msg = """
The types supported in zoo.orca.data.XShards.partition are
1. np.ndarray
2. a tuple, list, dict of np.ndarray
3. nested structure made of tuple, list, dict with ndarray as the leaf value

But got data of type {}
        """.format(type(data))
        supported_types = {list, tuple, dict}
        if isinstance(data, np.ndarray):
            if data.shape[0] < shard_num:
                raise ValueError(
                    "The length of data {} is smaller than the total number "
                    "of shards {}. Please adjust the num_shards option to be "
                    "at most {}.".format(data.shape[0], shard_num,
                                         data.shape[0]))
            arrays = np.array_split(data, shard_num)
            rdd = sc.parallelize(arrays)
        else:
            assert type(data) in supported_types, type_err_msg
            flattened = nest.flatten(data)
            data_length = len(flattened[0])
            data_to_be_shard = []
            if data_length < shard_num:
                raise ValueError(
                    "The length of data {} is smaller than the total number "
                    "of shards {}. Please adjust the num_shards option to be "
                    "at most {}.".format(data_length, shard_num, data_length))
            for i in range(shard_num):
                data_to_be_shard.append([])
            for x in flattened:
                assert len(x) == data_length, \
                    "the ndarrays in data must all have the same size in first dimension, " \
                    "got first ndarray of size {} and another {}".format(data_length, len(x))
                x_parts = np.array_split(x, shard_num)
                for idx, x_part in enumerate(x_parts):
                    data_to_be_shard[idx].append(x_part)

            data_to_be_shard = [
                nest.pack_sequence_as(data, shard)
                for shard in data_to_be_shard
            ]
            rdd = sc.parallelize(data_to_be_shard)

        data_shards = SparkXShards(rdd)
        return data_shards
Exemple #3
0
    def __init__(self, dataset, batch_size,
                 batch_per_thread,
                 validation_dataset=None, intra_threads=None, inter_threads=None):

        node_num, core_num = get_node_and_core_number()

        self.intra_threads = intra_threads
        self.inter_threads = inter_threads
        if intra_threads is None:
            self.intra_threads = core_num

        if inter_threads is None:
            self.inter_threads = 1

        if batch_size > 0:
            num_parts = dataset.xshards.num_partitions()
            if num_parts != node_num:
                dataset.xshards = dataset.xshards.repartition(node_num)
            assert batch_size % node_num == 0, \
                "batch_size should be a multiple of num_shards, got" \
                " batch_size {}, node_num {}".format(batch_size, node_num)
            batch_per_shard = batch_size // node_num
            self.drop_remainder = True
        elif batch_per_thread > 0:
            batch_per_shard = batch_per_thread
            self.drop_remainder = False
        else:
            raise ValueError("one of batch_size or batch_per_thread must be larger than 0")

        self.rdd = dataset.as_graph_rdd(batch_per_shard,
                                        drop_remainder=self.drop_remainder).cache()
        meta_info = self.rdd.map(lambda x: x[1]).first()
        tensor_structure = meta_info["tensor_structure"]
        self.init_op_name = meta_info["init_op_name"]
        self.output_names = meta_info["output_names"]
        self.output_types = meta_info["output_types"]
        self.table_init_op = meta_info["table_init_op"]

        if validation_dataset is not None:
            self.val_rdd = validation_dataset.as_graph_rdd(batch_per_shard, False).cache()
            meta_info = self.val_rdd.map(lambda x: x[1]).first()
            self.val_init_op_name = meta_info["init_op_name"]
            self.val_output_names = meta_info["output_names"]
            self.val_output_types = meta_info["output_types"]
        else:
            self.val_rdd = None
            self.val_init_op_name = None
            self.val_output_names = None
            self.val_output_types = None

        super().__init__(tensor_structure, batch_size=batch_size,
                         batch_per_thread=batch_per_thread,
                         hard_code_batch_size=False)
        self.shard_index_op_name = None
        self.validation_dataset = validation_dataset
Exemple #4
0
    def load(self, model_path):
        """
        Load an openVINO model.

        :param model_path: String. The file path to the OpenVINO IR xml file.
        :return:
        """
        self.node_num, self.core_num = get_node_and_core_number()
        assert isinstance(model_path, str), "The model_path should be string."
        assert os.path.exists(model_path), "The model_path should be exist."
        with open(model_path, 'rb') as file:
            self.model_bytes = file.read()

        with open(model_path[:model_path.rindex(".")] + ".bin", 'rb') as file:
            self.weight_bytes = file.read()
Exemple #5
0
    def partition(data):
        """
        Partition local in memory data and form a SparkXShards
        :param data: np.ndarray, a tuple, list, dict of np.ndarray, or a nested structure
        made of tuple, list, dict with ndarray as the leaf value
        :return: a SparkXShards
        """
        sc = init_nncontext()
        node_num, core_num = get_node_and_core_number()
        total_core_num = node_num * core_num
        import numpy as np
        type_err_msg = """
The types supported in zoo.orca.data.XShards.partition are
1. np.ndarray
2. a tuple, list, dict of np.ndarray
3. nested structure made of tuple, list, dict with ndarray as the leaf value

But got data of type {}
        """.format(type(data))
        supported_types = {list, tuple, dict}
        if isinstance(data, np.ndarray):
            arrays = np.array_split(data, total_core_num)
            rdd = sc.parallelize(arrays)
        else:
            assert type(data) in supported_types, type_err_msg
            flattened = nest.flatten(data)
            data_length = len(flattened[0])
            data_to_be_shard = []
            for i in range(total_core_num):
                data_to_be_shard.append([])
            for x in flattened:
                assert len(x) == data_length, \
                    "the ndarrays in data must all have the same size in first dimension, " \
                    "got first ndarray of size {} and another {}".format(data_length, len(x))
                x_parts = np.array_split(x, total_core_num)
                for idx, x_part in enumerate(x_parts):
                    data_to_be_shard[idx].append(x_part)

            data_to_be_shard = [
                nest.pack_sequence_as(data, shard)
                for shard in data_to_be_shard
            ]
            rdd = sc.parallelize(data_to_be_shard)

        data_shards = SparkXShards(rdd)
        return data_shards
Exemple #6
0
 def __init__(self, *, model_path, batch_size=0):
     self.node_num, self.core_num = get_node_and_core_number()
     self.path = model_path
     if batch_size != 0:
         self.batch_size = batch_size
     else:
         import xml.etree.ElementTree as ET
         tree = ET.parse(model_path)
         root = tree.getroot()
         shape_item = root.find('./layers/layer/output/port/dim[1]')
         if shape_item is None:
             raise ValueError(
                 "Invalid openVINO IR xml file, please check your model_path"
             )
         self.batch_size = int(shape_item.text)
     self.model = InferenceModel(supported_concurrent_num=self.core_num)
     self.model.load_openvino(
         model_path=model_path,
         weight_path=model_path[:model_path.rindex(".")] + ".bin",
         batch_size=batch_size)