def _validate(index_dict): if "meta" not in index_dict: raise IndexReaderException("meta section is not present") if len(index_dict["meta"]) == 0: raise IndexReaderException("meta section is empty") if "tensor_payload" not in index_dict: raise IndexReaderException("tensor_payload section is not present")
def _validate(index_dict): if "meta" not in index_dict: raise IndexReaderException("meta section is not present") if len(index_dict["meta"]) == 0: raise IndexReaderException("meta section is empty") if "tensor_payload" not in index_dict and "shape_payload" not in index_dict: raise IndexReaderException( "neither tensor_payload nor shape_payload sections are present" )
def _update_tensors_from_json( self, index_tensors_dict, step, response: bytes, path, worker) -> Dict[str, Dict[int, Dict[str, TensorLocation]]]: """Return a triply nested dict referring to tensor data. Example: { 'dense/bias:0': { 0: { 'tensor_location': <TensorLocation object> }, 2: { ... }, ... }, 'conv2d/kernel:0': { ... }, ... } """ try: index_dict = json.loads(response) except ValueError: raise IndexReaderException("Empty/Corrupt Index File") IndexReader._validate(index_dict) index_meta = index_dict["meta"] mode = index_meta["mode"] mode = ModeKeys[mode.strip()] mode_step = index_meta["mode_step"] event_file_name = os.path.join(path, index_meta["event_file_name"]) tensors = index_dict["tensor_payload"] for tensor in tensors: tensor_name = tensor["tensorname"] start_idx = tensor["start_idx"] length = tensor["length"] tensor_location = TensorLocation(tensor_name, mode, mode_step, event_file_name, start_idx, length, worker) if tensor_name in index_tensors_dict: if step in index_tensors_dict[tensor_name]: index_tensors_dict[tensor_name][step].update( {worker: { "tensor_location": tensor_location }}) else: index_tensors_dict[tensor_name].update( {step: { worker: { "tensor_location": tensor_location } }}) else: index_tensors_dict[tensor_name] = { step: { worker: { "tensor_location": tensor_location } } } return index_tensors_dict
def parse_worker_name_from_file(filename: str) -> str: """ Extracts the worker name from the index or event file. Index / Event files can currently have two formats: 1. (path_prefix)/(step_prefix)_worker_0.json 2. (path_prefix)/(step_prefix)__replica-0_task-1_device-GPU-0.json The double underscore after step prefix is used to indicate a distributed TF job worker in MirroredStrategy that needs to be deserialized. :param filename: str :return: worker_name: str """ # worker_2 = /tmp/ts-logs/index/000000001/000000001230_worker_2.json worker_name_regex = re.compile(r".+\/\d+_(.+)\.(json|csv|tfevents)$") worker_name_regex_match = re.match(worker_name_regex, filename) if worker_name_regex_match is None: raise IndexReaderException(f"Invalid File Found: {filename}") worker_name = worker_name_regex_match.group(1) if "__" in filename: # /replica:0/task:0/device:GPU:0 = replica-0_task-0_device-GPU-0.json worker_name = deserialize_tf_device(worker_name) return worker_name
def _update_tensors_from_json( self, index_tensors_dict, step, response: bytes, path, worker ) -> Dict[str, Dict[int, Dict[str, TensorLocation]]]: """Return a triply nested dict referring to tensor data. Example: { 'dense/bias:0': { 0: { 'tensor_location': <TensorLocation object> }, 2: { ... }, ... }, 'conv2d/kernel:0': { ... }, ... } """ try: index_dict = json.loads(response) except ValueError: raise IndexReaderException("Empty/Corrupt Index File") IndexReader._validate(index_dict) index_meta = index_dict["meta"] mode = index_meta["mode"] mode = ModeKeys[mode.strip()] mode_step = index_meta["mode_step"] to_update_index_dict = [] if "tensor_payload" in index_dict and len(index_dict["tensor_payload"]): event_file_name = os.path.join(path, index_meta["event_file_name"]) for tensor in index_dict["tensor_payload"]: tensor_name = tensor["tensorname"] start_idx = tensor["start_idx"] length = tensor["length"] tensor_location = TensorLocation( tensor_name, mode, mode_step, event_file_name, start_idx, length, worker ) to_update_index_dict.append((tensor_name, step, tensor_location)) if "shape_payload" in index_dict and len(index_dict["shape_payload"]): for tensor in index_dict["shape_payload"]: tensor_name = tensor["tensorname"] original_name = tensor["originalname"] shape = tensor["shape"] ts = TensorShape(tensor_name, mode, mode_step, shape, original_name) to_update_index_dict.append((tensor_name, step, ts)) for tu in to_update_index_dict: tensor_name, step, obj = tu if isinstance(obj, TensorLocation): obj_dict = {"tensor_location": obj} elif isinstance(obj, TensorShape): obj_dict = {"tensor_shape": obj} if tensor_name in index_tensors_dict: if step in index_tensors_dict[tensor_name]: index_tensors_dict[tensor_name][step].update({worker: obj_dict}) else: index_tensors_dict[tensor_name].update({step: {worker: obj_dict}}) else: index_tensors_dict[tensor_name] = {step: {worker: obj_dict}} return index_tensors_dict