def source_code_location(event: _ProfilerEvent): while (event is not None): match = re.search(r"\.py\(.*\)", event.name()) if (match is None): event = event.parent continue return event.name() return "No source code location found"
def match(self, event: _ProfilerEvent): if event.name() != "aten::conv2d": return False if len(input_dtypes(event)) < 3 or input_dtypes(event)[2] == "": return False # This means bias=True event = self.go_up_until( event, lambda e: e.name().startswith("nn.Module: Conv2d")) if not event: return False event = self.next_of(event) if not event: return False return event.name().startswith("nn.Module: BatchNorm2d")
def match(self, event: _ProfilerEvent): if event.name() != "aten::select": return False if event.id in self.visited: return False repeat_count = 1 _, next = self.siblings_of(event) if len(next) <= 1: return False # Custom event list matching def same_ops(list1, list2): if len(list1) != len(list2): return False for op1, op2 in zip(list1, list2): if op1.name() != op2.name(): return False return True # Record the ops between two aten::select next_select_idx = index_of_first_match( next, lambda e: e.name() == "aten::select") if next_select_idx is None: return False indexing_ops = [event] + next[:next_select_idx] next = next[len(indexing_ops) - 1:] for i in range(0, len(next), len(indexing_ops)): if same_ops(indexing_ops, next[i:i + len(indexing_ops)]): repeat_count += 1 self.visited.add(next[i].id) else: break return repeat_count >= 10
def match(self, event: _ProfilerEvent): def is_dataloader_function(name: str, function_name: str): return name.startswith( os.path.join("torch", "utils", "data", "dataloader.py")) and name.endswith(function_name) if not is_dataloader_function(event.name(), "__iter__"): return False if not event.children: return False event = event.children[0] if not is_dataloader_function(event.name(), "_get_iterator"): return False if not event.children: return False event = event.children[0] return not is_dataloader_function(event.name(), "check_worker_number_rationality")
def match(self, event: _ProfilerEvent): # If we saw this pattern once, we don't need to match it again if event.tag != _EventType.TorchOp: return False assert isinstance(event.extra_fields, _ExtraFields_TorchOp) if event.name() == "aten::mm": if event.extra_fields.allow_tf32_cublas is False: return True return False
def match(self, event: _ProfilerEvent): if not event.name().endswith(": zero_grad"): return False if not event.children: return False for sub_event in eventTreeDFS(event.children): if sub_event.name( ) == "aten::zero_" and sub_event.parent.name() != "aten::zeros": return True # TODO: We should also check if the optimizer's numerical behavior will change. return False
def match(self, event: _ProfilerEvent): def mutiple_of(shapes, multiple): return all(dim % multiple == 0 for shape in shapes for dim in shape[-2:]) if event.name() not in ("aten::mm", "aten::bmm", "aten::addmm"): return False if not input_dtypes(event): return False arg_dtype = input_dtypes(event)[0] # TODO: Have a better way to check dtype_size if (arg_dtype.endswith("c10::BFloat16") or arg_dtype.endswith("c10::Half")) and not mutiple_of( input_shapes(event), 8): return True return False
def match(self, event: _ProfilerEvent): for optimizer in self.optimizers_with_foreach: if event.name().endswith(f"_single_tensor_{optimizer}"): return True return False
def match(self, event: _ProfilerEvent): return re.search(self.name, event.name()) is not None