Example #1
0
def load_param_into_net(net, parameter_dict):
    """
    Loads parameters into network.

    Args:
        net (Cell): Cell network.
        parameter_dict (dict): Parameter dict.

    Raises:
        TypeError: Argument is not a Cell, or parameter_dict is not a Parameter dict.
    """
    if not isinstance(net, nn.Cell):
        logger.error("Failed to combine the net and the parameters.")
        msg = ("Argument net should be a Cell, but got {}.".format(type(net)))
        raise TypeError(msg)

    if not isinstance(parameter_dict, dict):
        logger.error("Failed to combine the net and the parameters.")
        msg = ("Argument parameter_dict should be a dict, but got {}.".format(
            type(parameter_dict)))
        raise TypeError(msg)

    logger.info("Execute load parameter into net process.")
    net.init_parameters_data()
    param_not_load = []
    for _, param in net.parameters_and_names():
        if param.name in parameter_dict:
            new_param = parameter_dict[param.name]
            if not isinstance(new_param, Parameter):
                logger.error("Failed to combine the net and the parameters.")
                msg = (
                    "Argument parameter_dict element should be a Parameter, but got {}."
                    .format(type(new_param)))
                raise TypeError(msg)
            param.init_data()
            _update_param(param, new_param)
        else:
            param_not_load.append(param.name)

    if param_not_load:
        _load_dismatch_prefix_params(net, parameter_dict, param_not_load)

    logger.debug("Params not matched(in net but not in parameter_dict):")
    for param_name in param_not_load:
        logger.debug("%s", param_name)

    logger.info(
        "Load parameter into net finish, {} parameters has not been loaded.".
        format(len(param_not_load)))
Example #2
0
def _fill_image_summary(tag: str,
                        np_value,
                        summary_image,
                        input_format='NCHW'):
    """
    Package the image summary.

    Args:
        tag (str): Summary tag describe.
        np_value (Type): Summary data type.
        summary_image (Tensor): The tensor of summary.
        input_format (str): Data sort order index. Default: 'NCHW'.

    Returns:
        Summary, return image summary content.
    """
    logger.debug(f"Set({tag}) the image summary value")
    if np_value.ndim != 4 or np_value.shape[1] not in (1, 3):
        logger.error(
            f"The value is not Image, tag = {tag}, ndim = {np_value.ndim}, shape={np_value.shape}"
        )
        return False

    if np_value.ndim != len(input_format):
        logger.error(
            f"The tensor with dim({np_value.ndim}) can't convert the format({input_format}) because dim not same"
        )
        return False

    # convert the tensor format
    tensor = _convert_image_format(np_value, input_format)

    # convert the tensor dtype
    # Do not assume that user passes in values in [0, 255], use data type to detect
    scale_factor = 1
    if tensor.dtype == np.uint8:
        scale_factor = 1
    elif np.max(tensor) <= 1 and np.min(tensor) >= 0:
        scale_factor = 255
    tensor = tensor.astype(np.float32)
    tensor = (tensor * scale_factor).astype(np.uint8)

    # create the image summary
    height, width, channel, image_string = _make_image(tensor)
    summary_image.height = height
    summary_image.width = width
    summary_image.colorspace = channel
    summary_image.encoded_image = image_string
    return True
Example #3
0
 def _updata(param):
     if param in replace:
         return replace[param]
     layout = None
     set_sliced = False
     if auto_parallel_mode:
         set_sliced = True
         if param.name not in self.parameter_layout_dict:
             logger.debug("Layout dict does not contain the key %s.",
                          param.name)
         else:
             layout = self.parameter_layout_dict[param.name]
     new_p = param.init_data(layout, set_sliced=set_sliced)
     replace[param] = new_p
     return new_p
Example #4
0
    def record(self, step, train_network=None, plugin_filter=None):
        """
        Record the summary.

        Args:
            step (int): Represents training step number.
            train_network (Cell): The network to call the callback.
            plugin_filter (Optional[Callable[[str], bool]]): The filter function, \
                which is used to filter out plugins from being written by returning False.

        Returns:
            bool, whether the record process is successful or not.

        Examples:
            >>> with SummaryRecord(log_dir="./summary_dir", file_prefix="xxx_", file_suffix="_yyy") as summary_record:
            >>>     summary_record.record(step=2)
        """
        logger.debug("SummaryRecord step is %r.", step)
        if self._closed:
            logger.error("The record writer is closed.")
            return False
        if not isinstance(step, int) or isinstance(step, bool):
            raise ValueError("`step` should be int")
        # Set the current summary of train step
        if self.network is not None and not self.has_graph:
            graph_proto = self.network.get_func_graph_proto()
            if graph_proto is None and train_network is not None:
                graph_proto = train_network.get_func_graph_proto()
            if graph_proto is None:
                logger.error("Failed to get proto for graph")
            else:
                self._event_writer.write({'graph': [{'step': step, 'value': graph_proto}]})
                self.has_graph = True
                if not _summary_tensor_cache:
                    return True

        if self._mode == 'train':
            self._add_summary_tensor_data()

        if not plugin_filter:
            self._event_writer.write(self._consume_data_pool(step))
        else:
            filtered = {}
            for plugin, datalist in self._consume_data_pool(step).items():
                if plugin_filter(plugin):
                    filtered[plugin] = datalist
            self._event_writer.write(filtered)
        return True
Example #5
0
def _parse_tag_format(tag: str):
    """
    Parse the tag.

    Args:
        tag (str): Format: xxx[:Scalar] xxx[:Image] xxx[:Tensor].

    Returns:
        Tuple, (SummaryType, summary_tag).
    """

    summary_type = SummaryType.INVALID
    summary_tag = tag
    if tag is None:
        logger.error("The tag is None")
        return summary_type, summary_tag

    # search the slice
    slice_begin = FORMAT_BEGIN_SLICE
    slice_end = FORMAT_END_SLICE
    index = tag.rfind(slice_begin)
    if index is -1:
        logger.error("The tag(%s) have not the key slice.", tag)
        return summary_type, summary_tag

    # slice the tag
    summary_tag = tag[:index]

    # check the slice end
    if tag[-1:] != slice_end:
        logger.error("The tag(%s) end format is error", tag)
        return summary_type, summary_tag

    # check the type
    type_str = tag[index + 2:-1]
    logger.debug("The summary_tag is = %r", summary_tag)
    logger.debug("The type_str value is = %r", type_str)
    if type_str == FORMAT_SCALAR_STR:
        summary_type = SummaryType.SCALAR
    elif type_str == FORMAT_TENSOR_STR:
        summary_type = SummaryType.TENSOR
    elif type_str == FORMAT_IMAGE_STR:
        summary_type = SummaryType.IMAGE
    else:
        logger.error("The tag(%s) type is invalid.", tag)
        summary_type = SummaryType.INVALID

    return summary_type, summary_tag
Example #6
0
 def get_namespace_symbol(self, var: str):
     """Get symbol type and namespace and symbol."""
     if var in self.closure_namespace:
         logger.debug("in closure_namespace")
         return self.closure_namespace, var
     if var in self.global_namespace:
         logger.debug("in global_namespace")
         value = self.global_namespace[var]
         if isinstance(
                 value, type(abs)
         ) and self.global_namespace[var] not in convert_object_map:
             error_info = f"The builtin function '{var}' is not supported in graph mode."
             return None, var, error_info
         return self.global_namespace, var
     error_info = f"The name '{var}' is not defined."
     return None, var, error_info
Example #7
0
def load_param_into_net(net, parameter_dict):
    """
    Loads parameters into network.

    Args:
        net (Cell): Cell network.
        parameter_dict (dict): Parameter dict.

    Raises:
        TypeError: Argument is not a Cell, or parameter_dict is not a Parameter dict.
    """
    if not isinstance(net, nn.Cell):
        logger.error("Failed to combine the net and the parameters.")
        msg = ("Argument net should be a Cell, but got {}.".format(type(net)))
        raise TypeError(msg)

    if not isinstance(parameter_dict, dict):
        logger.error("Failed to combine the net and the parameters.")
        msg = ("Argument parameter_dict should be a dict, but got {}.".format(type(parameter_dict)))
        raise TypeError(msg)

    logger.info("Execute parameter into net process.")
    param_name_net_not_have = []
    for name in parameter_dict:
        b_par_dict_have_par_of_net = False
        for _, param in net.parameters_and_names():
            if name == param.name:
                b_par_dict_have_par_of_net = True
                # layerwise parallel parameter data loaded from checkpoint file,
                # was a complete(merged) data, need to be splited
                if param.layerwise_parallel:
                    new_param = parameter_dict[param.name]
                    _load_tensor_for_layerwise(new_param, param)
                break
        if not b_par_dict_have_par_of_net:
            param_name_net_not_have.append(name)

    param_name_param_dict_not_have = []
    for _, param in net.parameters_and_names():
        if param.name in parameter_dict:
            new_param = parameter_dict[param.name]

            if not isinstance(new_param, Parameter):
                logger.error("Failed to combine the net and the parameters.")
                msg = ("Argument parameter_dict element should be a Parameter, but got {}.".format(type(new_param)))
                raise TypeError(msg)
            _update_param(param, new_param)
        else:
            param_name_param_dict_not_have.append(param.name)

    logger.debug("Params not matched(in net but not in parameter_dict):")
    for paramname in param_name_param_dict_not_have:
        logger.debug("%s", paramname)
    logger.debug("Params not matched(in parameter_dict but not in net):")
    for paramname in param_name_net_not_have:
        logger.debug("%s", paramname)
    logger.info("Load parameter into net process finish.")
Example #8
0
def package_summary_event(data_id, step):
    """
    Package the summary to event protobuffer.

    Args:
        data_id (Number): Summary data id.
        step (Number): The recode step index.

    Returns:
        Summary, the summary event.
    """
    data_list = get_summary_data(data_id)
    if data_list is None:
        logger.error("The step(%r) does not have record data.", step)
    del_summary_data(data_id)
    # create the event of summary
    summary_event = Event()
    summary = summary_event.summary

    for value in data_list:
        tag = value["name"]
        data = value["data"]
        summary_type = value["type"]

        # get the summary type and parse the tag
        if summary_type is SummaryType.SCALAR:
            logger.debug("Now process Scalar summary, tag = %r", tag)
            summary_value = summary.value.add()
            summary_value.tag = tag
            summary_value.scalar_value = _get_scalar_summary(tag, data)
        elif summary_type is SummaryType.TENSOR:
            logger.debug("Now process Tensor summary, tag = %r", tag)
            summary_value = summary.value.add()
            summary_value.tag = tag
            summary_tensor = summary_value.tensor
            _get_tensor_summary(tag, data, summary_tensor)
        elif summary_type is SummaryType.IMAGE:
            logger.debug("Now process Image summary, tag = %r", tag)
            summary_value = summary.value.add()
            summary_value.tag = tag
            summary_image = summary_value.image
            _get_image_summary(tag, data, summary_image,
                               MS_IMAGE_TENSOR_FORMAT)
        elif summary_type is SummaryType.HISTOGRAM:
            logger.debug("Now process Histogram summary, tag = %r", tag)
            summary_value = summary.value.add()
            summary_value.tag = tag
            summary_histogram = summary_value.histogram
            _fill_histogram_summary(tag, data, summary_histogram)
        else:
            # The data is invalid ,jump the data
            logger.error("Summary type is error, tag = %r", tag)
            continue

    summary_event.wall_time = time.time()
    summary_event.step = int(step)
    return summary_event
Example #9
0
def package_summary_event(data_list, step):
    """
    Package the summary to event protobuffer.

    Args:
        data_id (Number): Summary data id.
        step (Number): The recode step index.

    Returns:
        Summary, the summary event.
    """
    # create the event of summary
    summary_event = Event()
    summary = summary_event.summary
    summary_event.wall_time = time.time()
    summary_event.step = int(step)

    for value in data_list:
        summary_type = value["_type"]
        data = value["data"]
        tag = value["name"]

        logger.debug("Now process %r summary, tag = %r", summary_type, tag)

        summary_value = summary.value.add()
        summary_value.tag = tag
        # get the summary type and parse the tag
        if summary_type == 'Scalar':
            if not _fill_scalar_summary(tag, data, summary_value):
                del summary.value[-1]
        elif summary_type == 'Tensor':
            _fill_tensor_summary(tag, data, summary_value.tensor)
        elif summary_type == 'Image':
            if not _fill_image_summary(tag, data, summary_value.image,
                                       MS_IMAGE_TENSOR_FORMAT):
                del summary.value[-1]
        elif summary_type == 'Histogram':
            _fill_histogram_summary(tag, data, summary_value.histogram)
        else:
            # The data is invalid ,jump the data
            logger.error("Summary type(%r) is error, tag = %r", summary_type,
                         tag)
            del summary.value[-1]

    return summary_event
Example #10
0
 def parse(self):
     """Parse the function or method."""
     logger.debug("fn = %r", self.fn)
     tree = None
     if isinstance(self.fn, (types.FunctionType, types.MethodType)):
         original_src = inspect.getsource(self.fn)
         hexstr = hashlib.sha256(original_src.encode()).hexdigest()
         tree = Parser.ast_cache.get(hexstr)
         if not tree:
             src = dedent(original_src)
             self.col_offset = \
                 len(original_src.split('\n')[0]) - len(src.split('\n')[0])
             logger.debug("get source = %s", src)
             tree = asttokens.ASTTokens(src, parse=True).tree
             Parser.ast_cache[hexstr] = tree
     else:
         logger.error("Fn type is invalid")
     return tree
Example #11
0
def get_object_key(obj):
    """Return the function key: module + name."""
    obj_key = ""
    if hasattr(obj, "__name__"):
        if hasattr(obj, "cell_init_args"):
            obj_key = "%s_ID" % (str(obj.__class__.__name__) + str(obj.__name__) + obj.cell_init_args)
        obj_id = "%s_ID%d" % (str(obj.__class__.__name__) + str(obj.__name__), id(obj))
    else:
        if hasattr(obj, "cell_init_args"):
            obj_key = "%s_ID" % (str(obj.__class__.__name__) + obj.cell_init_args)
        obj_id = "%s_ID%d" % (str(obj.__class__.__name__), id(obj))
    logger.debug("obj_key %s obj_id = %s", obj_key, obj_id)

    # method has same id of different instance
    if isinstance(obj, types.MethodType):
        method_instance = obj.__self__
        instance_id = "%s_ID%d" % (str(method_instance.__class__.__name__), id(method_instance))
        obj_id = instance_id + obj_id + str(obj.__hash__())
    return obj_id, obj_key
Example #12
0
    def record(self, step, train_network=None):
        """
        Record the summary.

        Args:
            step (int): Represents training step number.
            train_network (Cell): The network that called the callback.

        Returns:
            bool, whether the record process is successful or not.

        Examples:
            >>> with SummaryRecord(log_dir="./summary_dir", file_prefix="xxx_", file_suffix="_yyy") as summary_record:
            >>>     summary_record.record(step=2)
        """
        logger.debug("SummaryRecord step is %r.", step)
        if self._closed:
            logger.error("The record writer is closed.")
            return False
        if not isinstance(step, int) or isinstance(step, bool):
            raise ValueError("`step` should be int")
        # Set the current summary of train step
        if self.network is not None and not self.has_graph:
            graph_proto = self.network.get_func_graph_proto()
            if graph_proto is None and train_network is not None:
                graph_proto = train_network.get_func_graph_proto()
            if graph_proto is None:
                logger.error("Failed to get proto for graph")
            else:
                self._event_writer.write(
                    {'graph': [{
                        'step': step,
                        'value': graph_proto
                    }]})
                self.has_graph = True
                if not _summary_tensor_cache:
                    return True

        if self._mode == 'train':
            self._add_summary_tensor_data()

        self._event_writer.write(self._consume_data_pool(step))
        return True
Example #13
0
def _make_directory(path: str):
    """Make directory."""
    if path is None or not isinstance(path, str) or path.strip() == "":
        logger.error("The path(%r) is invalid type.", path)
        raise TypeError("Input path is invaild type")

    path = os.path.realpath(path)
    logger.debug("The abs path is %r", path)

    if os.path.exists(path):
        real_path = path
    else:
        logger.debug("The directory(%s) doesn't exist, will create it", path)
        try:
            os.makedirs(path, exist_ok=True)
            real_path = path
        except PermissionError as e:
            logger.error("No write permission on the directory(%r), error = %r", path, e)
            raise TypeError("No write permission on the directory.")
    return real_path
Example #14
0
def _fill_histogram_summary(tag: str, np_value: np.ndarray, summary) -> None:
    """
    Package the histogram summary.

    Args:
        tag (str): Summary tag describe.
        np_value (np.ndarray): Summary data.
        summary (summary_pb2.Summary.Histogram): Summary histogram data.
    """
    logger.debug("Set(%r) the histogram summary value", tag)
    # Default bucket for tensor with no valid data.
    ma_value = np.ma.masked_invalid(np_value)
    total, valid = np_value.size, ma_value.count()
    invalids = []
    for isfn in np.isnan, np.isposinf, np.isneginf:
        if total - valid > sum(invalids):
            count = np.count_nonzero(isfn(np_value))
            invalids.append(count)
        else:
            invalids.append(0)

    summary.count = total
    summary.nan_count, summary.pos_inf_count, summary.neg_inf_count = invalids
    if not valid:
        logger.warning(
            'There are no valid values in the ndarray(size=%d, shape=%d)',
            total, np_value.shape)
        # summary.{min, max, sum} are 0s by default, no need to explicitly set
    else:
        summary.min = ma_value.min()
        summary.max = ma_value.max()
        summary.sum = ma_value.sum()
        bins = _calc_histogram_bins(valid)
        range_ = summary.min, summary.max
        hists, edges = np.histogram(np_value, bins=bins, range=range_)

        for hist, edge1, edge2 in zip(hists, edges, edges[1:]):
            bucket = summary.buckets.add()
            bucket.width = edge2 - edge1
            bucket.count = hist
            bucket.left = edge1
Example #15
0
    def expand_expr_statement(self, node):
        """
        Process the expr statement and expand it.

        Returns:
            tuple, (True, expr.value, x)/(False, None, None).
        """
        if isinstance(node, ast.Expr) and hasattr(node, "value"):
            expr_value = node.value
            if isinstance(expr_value, ast.Call):
                func = expr_value.func
                if isinstance(func, ast.Attribute) and \
                        hasattr(func, "attr") and \
                        hasattr(func, "value"):
                    method = func.attr
                    target = func.value
                    if method in parse_expr_statement_white_list:
                        logger.debug("Expand expr, target:%s, method:%s", target, method)
                        return True, expr_value, target
                return True, expr_value
        return False, None, None
Example #16
0
def test_cifar10():
    """
    dataset parameter
    """
    logger.info("Test dataset parameter")
    data_dir_10 = "../data/dataset/testCifar10Data"
    num_repeat = 2
    batch_size = 32
    limit_dataset = 100
    # apply dataset operations
    data1 = ds.Cifar10Dataset(data_dir_10, num_samples=limit_dataset)
    data1 = data1.repeat(num_repeat)
    data1 = data1.batch(batch_size, True)
    num_epoch = 5
    # iter1 will always assume there is a next epoch and never shutdown.
    iter1 = data1.create_tuple_iterator()
    epoch_count = 0
    sample_count = 0
    for _ in range(num_epoch):
        row_count = 0
        for _ in iter1:
            # in this example, each dictionary has keys "image" and "label"
            row_count += 1
        assert row_count == int(limit_dataset * num_repeat / batch_size)
        logger.debug("row_count: ", row_count)
        epoch_count += 1
        sample_count += row_count
    assert epoch_count == num_epoch
    logger.debug("total epochs: ", epoch_count)
    assert sample_count == int(
        limit_dataset * num_repeat / batch_size) * num_epoch
    logger.debug("total sample: ", sample_count)
Example #17
0
def _convert_function_arguments(fn, *args):
    """
    Process the fn default parameters.

    Args:
        fn (Function): The function to be parsed.
        args (tuple): The parameters of the function.
    """
    arguments_dict = OrderedDict()
    parse_method = None
    if isinstance(fn, (types.FunctionType, types.MethodType)):
        parse_method = fn.__name__
        index = 0
        for value in args:
            arguments_dict[f'arg{index}'] = value
            index = index + 1
        logger.debug("fn(%r) full parameters dict is: %r", fn, arguments_dict)
        converted = True
    else:
        logger.warning("Find error: fn isn't function or method")
        converted = False
    return converted, arguments_dict, parse_method
Example #18
0
    def load_parameter_slice(self, params):
        """
        Replace parameters with sliced tensors by parallel strategies.

        Please refer to the usage in source code of `mindspore.common._Executor.compile`.

        Args:
            params (dict): The parameters dictionary used for init data graph.
        """
        if params is None:
            for key in self.parameters_dict():
                tensor = self.parameters_dict()[key].data
                if key not in self.parameter_layout_dict:
                    logger.info("layout dict does not contain the key %s", key)
                    continue
                if self.parameters_dict()[key].sliced:
                    logger.debug("Param %s is already sliced.", key)
                    continue
                layout = self.parameter_layout_dict[key]
                new_tensor = _load_tensor_by_layout(tensor, layout)
                self.parameters_dict()[key].set_parameter_data(new_tensor)
                self.parameters_dict()[key].sliced = True
        elif isinstance(params, OrderedDict):
            for key in params:
                tensor = params[key].data
                if key not in self.parameter_layout_dict:
                    logger.info("layout dict does not contain the key %s", key)
                    continue
                if params[key].sliced:
                    logger.debug("Param %s is already sliced.", key)
                    continue
                layout = self.parameter_layout_dict[key]
                new_tensor = _load_tensor_by_layout(tensor, layout)
                params[key].set_parameter_data(new_tensor)
                params[key].sliced = True
        else:
            raise TypeError(
                'Parameters need OrderedDict type, but got {}'.format(
                    type(params)))
Example #19
0
def _load_dismatch_prefix_params(net, parameter_dict, param_not_load):
    """When some net parameter did not load, try to continue load."""
    prefix_name = ""
    longest_name = param_not_load[0]
    while prefix_name != longest_name and param_not_load:
        logger.debug("Count: {} parameters has not been loaded, try to load continue.".format(len(param_not_load)))
        longest_name = sorted(param_not_load, key=len, reverse=True)[0]
        prefix_name = longest_name
        for net_param_name in param_not_load:
            for dict_name in parameter_dict:
                if dict_name.endswith(net_param_name):
                    tmp_name = dict_name[:-len(net_param_name)]
                    prefix_name = prefix_name if len(prefix_name) < len(tmp_name) else tmp_name

        if prefix_name != longest_name:
            logger.info("Remove parameter prefix name: {}, continue to load.".format(prefix_name))
            for _, param in net.parameters_and_names():
                new_param_name = prefix_name + param.name
                if param.name in param_not_load and new_param_name in parameter_dict:
                    new_param = parameter_dict[new_param_name]
                    _update_param(param, new_param)
                    param_not_load.remove(param.name)
Example #20
0
def _get_image_summary(tag: str, np_value, summary_image, input_format='NCHW'):
    """
    Package the image summary.

    Args:
        tag (str): Summary tag describe.
        np_value (Type): Summary data type.
        summary_image (Tensor): The tensor of summary.
        input_format (str): Data sort order index. Default: 'NCHW'.

    Returns:
        Summary, return image summary content.
    """
    logger.debug("Set(%r) the image summary value", tag)
    if np_value.ndim != 4:
        logger.error("The value is not Image, tag = %r, Value = %r", tag,
                     np_value)

    # convert the tensor format
    tensor = _convert_image_format(np_value, input_format)

    # convert the tensor dtype
    # Do not assume that user passes in values in [0, 255], use data type to detect
    scale_factor = 1
    if tensor.dtype == np.uint8:
        scale_factor = 1
    elif np.max(tensor) <= 1 and np.min(tensor) >= 0:
        scale_factor = 255
    tensor = tensor.astype(np.float32)
    tensor = (tensor * scale_factor).astype(np.uint8)

    # create the image summary
    height, width, channel, image_string = _make_image(tensor)
    summary_image.height = height
    summary_image.width = width
    summary_image.colorspace = channel
    summary_image.encoded_image = image_string
    return summary_image
Example #21
0
def _fill_scalar_summary(tag: str, np_value, summary):
    """
    Package the scalar summary.

    Args:
        tag (str): Summary tag describe.
        np_value (Object): Scalary object.

    Returns:
        Summary, return scalar summary content.
    """
    logger.debug(f"Set({tag}) the scalar summary value")
    if np_value.size == 1:
        # is scalar
        summary.scalar_value = np_value.item()
        return True
    if np_value.size > 1:
        logger.warning(
            f"The tensor is not a single scalar, tag = {tag}, ndim = {np_value.ndim}, shape = {np_value.shape}")
        summary.scalar_value = next(np_value.flat).item()
        return True
    logger.error(f"There no values inside tensor, tag = {tag}, size = {np_value.size}")
    return False
Example #22
0
def make_directory(path: str):
    """Make directory."""
    if path is None or not isinstance(path, str) or path.strip() == "":
        logger.error("The path(%r) is invalid type.", path)
        raise TypeError("Input path is invalid type")

    # convert the relative paths
    path = os.path.realpath(path)
    logger.debug("The abs path is %r", path)

    # check the path is exist and write permissions?
    if os.path.exists(path):
        real_path = path
    else:
        # All exceptions need to be caught because create directory maybe have some limit(permissions)
        logger.debug("The directory(%s) doesn't exist, will create it", path)
        try:
            os.makedirs(path, exist_ok=True)
            real_path = path
        except PermissionError as e:
            logger.error("No write permission on the directory(%r), error = %r", path, e)
            raise TypeError("No write permission on the directory.")
    return real_path
    def send_res(self, res, keep_format=True):
        """
        Send result to remote

        Args:
            keep_format: True or False
        """
        logger.debug(f"[OUT] {str(res)}")
        if keep_format:
            res_str = str(res).replace('\n', '[LF]').replace('\r', '[CR]').replace(' ', '[SP]')
        else:
            res_str = str(res).replace('\n', '').replace('\r', '').replace(' ', '')
        tag = '[~]' # The same as client kTAG

        # Not write by print(tag + res_str, flush=True) any more
        try:
            self.fout.write(tag + res_str + "\n")
            self.fout.flush()
        except BrokenPipeError as err:
            logger.info(f"[TRACE] Write {str(err)}")
            self.exit()
        finally:
            pass
Example #24
0
def _make_directory(path):
    """Make directory."""
    real_path = None
    if path is None or not isinstance(path, str) or path.strip() == "":
        raise ValueError(f"Input path `{path}` is invalid type")

    # convert the relative paths
    path = os.path.realpath(path)
    logger.debug("The absolute path is %r", path)

    # check whether the path is already existed and has written permissions
    if os.path.exists(path):
        real_path = path
    else:
        # All exceptions need to be caught because create directory maybe have some limit(permissions)
        logger.debug("The directory(%s) doesn't exist, will create it", path)
        try:
            os.makedirs(path)
            real_path = path
        except PermissionError as e:
            logger.error(f"No write permission on the directory `{path}, error = {e}")
            raise ValueError(f"No write permission on the directory `{path}`.")
    return real_path
Example #25
0
def resolve_symbol(namespace, symbol):
    """
    Resolve a symbol.

    Note:
        Can't get function when use closure function. So save the fn on namespace.

    Args:
        namespace (Object): Symbol's namespace.
        symbol (str): Need resolve symbol.

    Returns:
        Object, resolve result of symbol.
    """
    # All exceptions need to be caught in this function
    try:
        resolve_ = namespace[symbol]

        # list and dict is not hashable ,it can not be key for the map, just return the result
        if isinstance(resolve_, (list, dict)):
            return resolve_

        # dataclass may not be hashable
        if getattr(resolve_, "__hash__") is None:
            return resolve_

        # If need trope the obj
        if resolve_ in convert_object_map:
            resolve_ = convert_object_map.get(resolve_)
            logger.debug("convert resolve = %r", resolve_)
            if resolve_ == NO_IMPLEMENT:
                raise NotImplementedError("not implemented for ", str(symbol))
    except Exception as e:
        if isinstance(e, NotImplementedError):
            raise e
        resolve_ = None
        logger.debug("resolve exception occurred, value = %r", e)
        logger.debug("resolve type is invalid, namespace = %s, symbol = %s",
                     namespace.__str__(), symbol)
    if isinstance(resolve_, _MindSporeFunction):
        logger.debug("resolve class _MindSporeFunction, resolve fn instead.")
        resolve_ = resolve_.fn
    return resolve_
Example #26
0
    def init_timeline(self, all_reduce_info, framework_info, aicpu_info,
                      min_cycle_counter, source_path):
        """
        Init timeline metadata, adding all collected info.

        Args:
            all_reduce_info (list[list]): The metadata of AllReduce operator.
            framework_info (dict): The framework metadata.
            aicpu_info (dict): The metadata of AI CPU operator.
            min_cycle_counter (float): The minimum cycle counter of the timeline.
        """
        if min_cycle_counter == float('inf'):
            min_cycle_counter = 0

        logger.info('Initiating timeline...')
        timeline_list = self._load_timeline_data()
        cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir,
                                                      self._device_id)
        cpu_timeline_list = cpu_timeline_generator.get_timeline_data()
        if cpu_timeline_list:
            self._clock_synchronize_to_host(timeline_list, source_path)
            timeline_list.extend(cpu_timeline_list)
        timeline_list.sort(key=lambda x: float(x[2]))
        self._timeline_summary['op_exe_times'] = len(timeline_list)

        # Add AllReduce info to timeline temp list and sort by start time.
        if all_reduce_info:
            logger.debug(
                'AllReduce info found. Start adding info into timeline...')
            timeline_list.extend(all_reduce_info)
            timeline_list.sort(key=lambda x: float(x[2]))

        # Add AI CPU data into timeline temp list and sort by start time.
        aicpu_data = aicpu_info.get('info')
        if aicpu_data:
            timeline_list.extend(aicpu_data)
            timeline_list.sort(key=lambda x: float(x[2]))
            self._timeline_summary['op_exe_times'] += aicpu_info.get(
                'op_exe_times', 0)
            self._timeline_summary['num_of_streams'] += aicpu_info.get(
                'num_of_streams', 0)
            self._timeline_summary['num_of_ops'] += aicpu_info.get(
                'num_of_ops', 0)
            self._timeline_summary['total_time'] += aicpu_info.get(
                'total_time', 0)

        # Init a dict for counting the num of streams.
        stream_count_dict = {}
        for timeline in timeline_list:
            self._parse_timeline_data(timeline, min_cycle_counter)
            # Updating the collection of streams.
            if len(timeline) == 4:
                self._update_num_of_streams(timeline, stream_count_dict)

        # Get framework metadata.
        framework_obj_list = framework_info.get('object')
        # The length of list is the number of operators.
        self._timeline_summary['num_of_ops'] += len(framework_obj_list)
        self._add_framework_info(framework_obj_list)
        logger.info('Finished adding info into timeline...')

        # Update timeline summary info
        self._timeline_summary['num_of_streams'] += len(
            stream_count_dict.keys())
Example #27
0
    def compile(self,
                obj,
                *args,
                phase='predict',
                params=None,
                do_convert=True):
        """
        Compiles graph.

        Args:
            obj (Function/Cell): The function or cell instance need compile.
            args (tuple): Function or cell input arguments.
            phase (str): The name of compile phase. Default: 'predict'.
            params (OrderedDict): The parameters dictionary used for init data graph. Default: None.
            do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph.

        Return:
            Str, the full phase of the cell.
            Bool, if the graph has been compiled before, return False, else return True.
        """
        obj.check_names()
        args_names, args_list = _generate_pip_args(obj, *args)
        dic = dict(zip(args_names, args_list))
        key = generate_key(phase, dic)
        self.phase_prefix = str(key[1])
        if phase == 'export':
            phase = phase + '.' + str(obj.create_time)
        else:
            phase = self.phase_prefix + phase + '.' + str(obj.create_time)
        enable_debug_runtime = context.get_context("enable_debug_runtime")
        enable_ge = context.get_context("enable_ge")

        use_vm = not enable_ge or (enable_debug_runtime
                                   and context.get_context("mode")
                                   == context.PYNATIVE_MODE)

        if phase in self.compile_cache.keys():
            logger.debug("%r graph has existed.", phase)
            return phase, False

        result = self._executor.compile(obj, args_list, phase, use_vm)
        self.compile_cache[phase] = phase
        if not result:
            raise RuntimeError("Executor compile failed.")
        graph = self._executor.get_func_graph(phase)

        if graph is None:
            logger.error("%r graph compile failed.", phase)
        if not do_convert:
            return phase, True
        if not enable_debug_runtime or enable_ge:
            if _get_parallel_mode() in ["auto_parallel", "semi_auto_parallel"]:
                obj.parameter_layout_dict = self._executor.get_parameter_layout(
                    phase)
                obj.load_parameter_slice(params)

        # the following GE init process is not needed when use vm or ms backend
        if enable_ge:
            # decide whether to sink based on whether the inputs is virtual or not
            if args_list and isinstance(args_list[0],
                                        Tensor) and args_list[0].virtual_flag:
                _set_dataset_mode_config('sink')
            else:
                _set_dataset_mode_config('normal')

            self._build_data_graph(obj, params, phase)

            if "export" not in phase:
                init_phase = "init_subgraph" + "." + str(obj.create_time)
                _exec_init_graph(obj, init_phase)
        elif not enable_ge and "export" in phase:
            self._build_data_graph(obj, params, phase)

        return phase, True
Example #28
0
def _fill_histogram_summary(tag: str, np_value: np.array,
                            summary_histogram) -> None:
    """
    Package the histogram summary.

    Args:
        tag (str): Summary tag describe.
        np_value (np.array): Summary data.
        summary_histogram (summary_pb2.Summary.Histogram): Summary histogram data.
    """
    logger.debug("Set(%r) the histogram summary value", tag)
    # Default bucket for tensor with no valid data.
    default_bucket_left = -0.5
    default_bucket_width = 1.0

    if np_value.size == 0:
        bucket = summary_histogram.buckets.add()
        bucket.left = default_bucket_left
        bucket.width = default_bucket_width
        bucket.count = 0

        summary_histogram.nan_count = 0
        summary_histogram.pos_inf_count = 0
        summary_histogram.neg_inf_count = 0

        summary_histogram.max = 0
        summary_histogram.min = 0
        summary_histogram.sum = 0

        summary_histogram.count = 0

        return

    summary_histogram.nan_count = np.count_nonzero(np.isnan(np_value))
    summary_histogram.pos_inf_count = np.count_nonzero(np.isposinf(np_value))
    summary_histogram.neg_inf_count = np.count_nonzero(np.isneginf(np_value))
    summary_histogram.count = np_value.size

    masked_value = np.ma.masked_invalid(np_value)
    tensor_max = masked_value.max()
    tensor_min = masked_value.min()
    tensor_sum = masked_value.sum()

    # No valid value in tensor.
    if tensor_max is np.ma.masked:
        bucket = summary_histogram.buckets.add()
        bucket.left = default_bucket_left
        bucket.width = default_bucket_width
        bucket.count = 0

        summary_histogram.max = np.nan
        summary_histogram.min = np.nan
        summary_histogram.sum = 0

        return

    bin_number = _calc_histogram_bins(masked_value.count())
    counts, edges = np.histogram(np_value,
                                 bins=bin_number,
                                 range=(tensor_min, tensor_max))

    for ind, count in enumerate(counts):
        bucket = summary_histogram.buckets.add()
        bucket.left = edges[ind]
        bucket.width = edges[ind + 1] - edges[ind]
        bucket.count = count

    summary_histogram.max = tensor_max
    summary_histogram.min = tensor_min
    summary_histogram.sum = tensor_sum
Example #29
0
def load_param_into_net(net, parameter_dict, strict_load=False):
    """
    Loads parameters into network.

    Args:
        net (Cell): Cell network.
        parameter_dict (dict): Parameter dictionary.
        strict_load (bool): Whether to strict load the parameter into net. If False, it will load parameter
                           in the param_dict into net with the same suffix. Default: False

    Raises:
        TypeError: Argument is not a Cell, or parameter_dict is not a Parameter dictionary.

    Examples:
        >>> net = Net()
        >>> ckpt_file_name = "./checkpoint/LeNet5-1_32.ckpt"
        >>> param_dict = load_checkpoint(ckpt_file_name, filter_prefix="conv1")
        >>> param_not_load = load_param_into_net(net, param_dict)
        >>> print(param_not_load)
        ['conv1.weight']
    """
    if not isinstance(net, nn.Cell):
        logger.error("Failed to combine the net and the parameters.")
        msg = ("Argument net should be a Cell, but got {}.".format(type(net)))
        raise TypeError(msg)

    if not isinstance(parameter_dict, dict):
        logger.error("Failed to combine the net and the parameters.")
        msg = ("Argument parameter_dict should be a dict, but got {}.".format(
            type(parameter_dict)))
        raise TypeError(msg)

    strict_load = Validator.check_bool(strict_load)
    logger.info("Execute the process of loading parameters into net.")
    net.init_parameters_data()
    param_not_load = []
    for _, param in net.parameters_and_names():
        if param.name in parameter_dict:
            new_param = parameter_dict[param.name]
            if not isinstance(new_param, Parameter):
                logger.error("Failed to combine the net and the parameters.")
                msg = (
                    "Argument parameter_dict element should be a Parameter, but got {}."
                    .format(type(new_param)))
                raise TypeError(msg)
            _update_param(param, new_param)
        else:
            param_not_load.append(param.name)

    if param_not_load and not strict_load:
        _load_dismatch_prefix_params(net, parameter_dict, param_not_load)

    logger.debug("Params not matched(in net but not in parameter_dict):")
    for param_name in param_not_load:
        logger.debug("%s", param_name)

    logger.info("Loading parameters into net is finished.")
    if param_not_load:
        logger.warning("{} parameters in the net are not loaded.".format(
            len(param_not_load)))
    return param_not_load
Example #30
0
    def compile(self,
                obj,
                *args,
                phase='predict',
                do_convert=True,
                auto_parallel_mode=False):
        """
        Compiles graph.

        Args:
            obj (Function/Cell): The function or cell instance need compile.
            args (tuple): Function or cell input arguments.
            phase (str): The name of compile phase. Default: 'predict'.
            do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph.
            auto_parallel_mode: When set to True, use auto parallel mode to compile graph.

        Return:
            Str, the full phase of the cell.
            Bool, if the graph has been compiled before, return False, else return True.
        """
        from mindspore import nn
        from mindspore.ops.composite import GradOperation

        class InputsToAttrCell(nn.Cell):
            """The cell that converts non-tensor inputs to attr."""
            def __init__(self, net, args_names, non_tensor_inputs):
                super(InputsToAttrCell, self).__init__()
                self.net = net
                self.args_names = args_names
                self.non_tensor_inputs = non_tensor_inputs
                self.inputs_to_attr = True

            def construct(self, *tensor_inputs):
                real_inputs = ()
                index = 0
                for i in args_names:
                    if i in self.non_tensor_inputs.keys():
                        real_inputs += (self.non_tensor_inputs[i], )
                    else:
                        real_inputs += (tensor_inputs[index], )
                        index += 1
                return self.net(*real_inputs)

        args_names, args_list = _generate_pip_args(obj, *args)
        if not hasattr(obj, "inputs_to_attr"):
            dic = dict(zip(args_names, args_list))
            key = generate_key(phase, dic)
            obj.phase_prefix = str(key[1])
            if 'export' in phase:
                phase = phase + '.' + obj.phase_prefix + '.' + str(
                    obj.create_time)
            else:
                phase = obj.phase_prefix + phase + '.' + str(obj.create_time)

            if phase in self.compile_cache.keys():
                logger.debug("%r graph has existed.", phase)
                return phase, False

        if getattr(obj, "support_non_tensor_inputs", None):
            for i in obj.__dict__.values():
                if isinstance(i, GradOperation):
                    raise ValueError(
                        "Not support set 'support_non_tensor_inputs' to the 'True' for grad net, "
                        "only support forward net.")
            attrs = {}
            inputs = []
            for key, value in dic.items():
                if not isinstance(value, (Tensor, MetaTensor)):
                    attrs[key] = value
                else:
                    inputs.append(value)
            if attrs:
                inputs_to_attr_cell = InputsToAttrCell(obj, args_names, attrs)
                return self.compile(inputs_to_attr_cell, *inputs, phase=phase)

        obj.check_names()
        _check_full_batch()
        self._set_dataset_mode(args_list)

        is_sink_mode = args and isinstance(args[0],
                                           Tensor) and args[0].virtual_flag
        if auto_parallel_mode and _need_to_full(
        ) and not is_sink_mode and obj.auto_parallel_compile_and_run():
            args_full = _to_full_tensor(args, _get_device_num(),
                                        _get_global_rank())
            _, args_list = _generate_pip_args(obj, *args_full)

        enable_debug_runtime = context.get_context("enable_debug_runtime")
        enable_ge = context.get_context("enable_ge")
        use_vm = not enable_ge or (enable_debug_runtime
                                   and context.get_context("mode")
                                   == context.PYNATIVE_MODE)
        result = self._executor.compile(obj, args_list, phase, use_vm)
        self.compile_cache[phase] = phase
        if not result:
            raise RuntimeError("Executor compile failed.")
        graph = self._executor.get_func_graph(phase)

        if graph is None:
            logger.error("%r graph compile failed.", phase)
        if not do_convert:
            return phase, True

        if auto_parallel_mode:
            obj.parameter_layout_dict = self._executor.get_parameter_layout(
                phase)
        replace = obj.init_parameters_data(
            auto_parallel_mode=auto_parallel_mode)
        if not enable_debug_runtime or enable_ge:
            if auto_parallel_mode:
                obj.load_parameter_slice(None)

        self._updata_param_node_default_input(phase, replace)

        # set parallel inputs in sink mode
        if auto_parallel_mode and is_sink_mode:
            obj.set_parallel_input_with_inputs(*args)

        # the following GE init process is not needed when use vm or ms backend
        if enable_ge:
            self._build_data_graph(obj, phase)

            if "export" not in phase:
                init_phase = "init_subgraph" + "." + str(obj.create_time)
                _exec_init_graph(obj, init_phase)
        elif not enable_ge and "export" in phase:
            self._build_data_graph(obj, phase)
        elif BROADCAST_PHASE not in phase and _get_parameter_broadcast():
            auto_split_param_names = []
            if auto_parallel_mode:
                auto_split_param_names = self._get_auto_split_param_names(
                    obj.parameter_layout_dict)

            broadcast_params_dict = obj.parameters_broadcast_dict()
            if auto_split_param_names and broadcast_params_dict:
                broadcast_params_dict = OrderedDict()
                for param_name, param in obj.parameters_broadcast_dict().items(
                ):
                    if param_name not in auto_split_param_names:
                        broadcast_params_dict[param_name] = param
            broadcast_phase = "_broadcast_subgraph"
            self._build_broadcast_graph(broadcast_params_dict, broadcast_phase)

        return phase, True