def _validate(index_dict):
     if "meta" not in index_dict:
         raise IndexReaderException("meta section is not present")
     if len(index_dict["meta"]) == 0:
         raise IndexReaderException("meta section is empty")
     if "tensor_payload" not in index_dict:
         raise IndexReaderException("tensor_payload section is not present")
 def _validate(index_dict):
     if "meta" not in index_dict:
         raise IndexReaderException("meta section is not present")
     if len(index_dict["meta"]) == 0:
         raise IndexReaderException("meta section is empty")
     if "tensor_payload" not in index_dict and "shape_payload" not in index_dict:
         raise IndexReaderException(
             "neither tensor_payload nor shape_payload sections are present"
         )
    def _update_tensors_from_json(
            self, index_tensors_dict, step, response: bytes, path,
            worker) -> Dict[str, Dict[int, Dict[str, TensorLocation]]]:
        """Return a triply nested dict referring to tensor data.

        Example:
        {
            'dense/bias:0': {
                0: {
                    'tensor_location': <TensorLocation object>
                },
                2: { ... },
                ...
            },
            'conv2d/kernel:0': { ... },
            ...
        }
        """
        try:
            index_dict = json.loads(response)
        except ValueError:
            raise IndexReaderException("Empty/Corrupt Index File")
        IndexReader._validate(index_dict)
        index_meta = index_dict["meta"]
        mode = index_meta["mode"]
        mode = ModeKeys[mode.strip()]
        mode_step = index_meta["mode_step"]
        event_file_name = os.path.join(path, index_meta["event_file_name"])
        tensors = index_dict["tensor_payload"]
        for tensor in tensors:
            tensor_name = tensor["tensorname"]
            start_idx = tensor["start_idx"]
            length = tensor["length"]
            tensor_location = TensorLocation(tensor_name, mode, mode_step,
                                             event_file_name, start_idx,
                                             length, worker)
            if tensor_name in index_tensors_dict:
                if step in index_tensors_dict[tensor_name]:
                    index_tensors_dict[tensor_name][step].update(
                        {worker: {
                            "tensor_location": tensor_location
                        }})
                else:
                    index_tensors_dict[tensor_name].update(
                        {step: {
                            worker: {
                                "tensor_location": tensor_location
                            }
                        }})
            else:
                index_tensors_dict[tensor_name] = {
                    step: {
                        worker: {
                            "tensor_location": tensor_location
                        }
                    }
                }
        return index_tensors_dict
Example #4
0
def parse_worker_name_from_file(filename: str) -> str:
    """
    Extracts the worker name from the index or event file.
    Index / Event files can currently have two formats:
        1. (path_prefix)/(step_prefix)_worker_0.json
        2. (path_prefix)/(step_prefix)__replica-0_task-1_device-GPU-0.json
    The double underscore after step prefix is used to indicate
    a distributed TF job worker in MirroredStrategy that needs to be deserialized.
    :param filename: str
    :return: worker_name: str
    """
    # worker_2 = /tmp/ts-logs/index/000000001/000000001230_worker_2.json
    worker_name_regex = re.compile(r".+\/\d+_(.+)\.(json|csv|tfevents)$")
    worker_name_regex_match = re.match(worker_name_regex, filename)
    if worker_name_regex_match is None:
        raise IndexReaderException(f"Invalid File Found: {filename}")
    worker_name = worker_name_regex_match.group(1)
    if "__" in filename:
        # /replica:0/task:0/device:GPU:0 = replica-0_task-0_device-GPU-0.json
        worker_name = deserialize_tf_device(worker_name)
    return worker_name
    def _update_tensors_from_json(
        self, index_tensors_dict, step, response: bytes, path, worker
    ) -> Dict[str, Dict[int, Dict[str, TensorLocation]]]:
        """Return a triply nested dict referring to tensor data.

        Example:
        {
            'dense/bias:0': {
                0: {
                    'tensor_location': <TensorLocation object>
                },
                2: { ... },
                ...
            },
            'conv2d/kernel:0': { ... },
            ...
        }
        """
        try:
            index_dict = json.loads(response)
        except ValueError:
            raise IndexReaderException("Empty/Corrupt Index File")
        IndexReader._validate(index_dict)
        index_meta = index_dict["meta"]
        mode = index_meta["mode"]
        mode = ModeKeys[mode.strip()]
        mode_step = index_meta["mode_step"]

        to_update_index_dict = []

        if "tensor_payload" in index_dict and len(index_dict["tensor_payload"]):
            event_file_name = os.path.join(path, index_meta["event_file_name"])
            for tensor in index_dict["tensor_payload"]:
                tensor_name = tensor["tensorname"]
                start_idx = tensor["start_idx"]
                length = tensor["length"]
                tensor_location = TensorLocation(
                    tensor_name, mode, mode_step, event_file_name, start_idx, length, worker
                )
                to_update_index_dict.append((tensor_name, step, tensor_location))

        if "shape_payload" in index_dict and len(index_dict["shape_payload"]):
            for tensor in index_dict["shape_payload"]:
                tensor_name = tensor["tensorname"]
                original_name = tensor["originalname"]
                shape = tensor["shape"]
                ts = TensorShape(tensor_name, mode, mode_step, shape, original_name)
                to_update_index_dict.append((tensor_name, step, ts))

        for tu in to_update_index_dict:
            tensor_name, step, obj = tu
            if isinstance(obj, TensorLocation):
                obj_dict = {"tensor_location": obj}
            elif isinstance(obj, TensorShape):
                obj_dict = {"tensor_shape": obj}
            if tensor_name in index_tensors_dict:
                if step in index_tensors_dict[tensor_name]:
                    index_tensors_dict[tensor_name][step].update({worker: obj_dict})
                else:
                    index_tensors_dict[tensor_name].update({step: {worker: obj_dict}})
            else:
                index_tensors_dict[tensor_name] = {step: {worker: obj_dict}}
        return index_tensors_dict