Beispiel #1
0
    def _collect_torch_profiling_data_if_profiler_enabled(self):
        if self.autograd_profiler_enabled is False:
            return
        if is_pt_1_8():
            records = torch.autograd._disable_profiler_legacy()
        else:
            records = torch.autograd._disable_profiler()
        self.autograd_profiler_enabled = False
        if is_pt_1_7():
            function_events = torch.autograd.profiler.EventList(
                torch.autograd.profiler.parse_event_records(records), use_cuda=self.use_cuda
            )
        elif is_pt_1_8():
            function_events = torch.autograd.profiler.EventList(
                torch.autograd.profiler.parse_legacy_records(records), use_cuda=self.use_cuda
            )
        else:
            function_events = torch.autograd.profiler.EventList(
                torch.autograd.profiler.parse_cpu_trace(records), use_cuda=self.use_cuda
            )

        for index, event in enumerate(function_events):
            if is_pt_1_8():
                cpu_time = event.time_range.start + self.start_profiler_time_us
                duration = event.time_range.elapsed_us() / float(CONVERT_TO_MICROSECS)
            else:
                cpu_time = event.cpu_interval.start + self.start_profiler_time_us
                # event.cpu_interval.start is in microseconds
                duration = event.cpu_interval.elapsed_us() / float(CONVERT_TO_MICROSECS)
            # timestamp is expected in seconds for record_trace_events
            timestamp = cpu_time / float(CONVERT_TO_MICROSECS)
            self.record_trace_events(
                training_phase="cpu_functions",
                op_name=event.name,
                phase="X",
                timestamp=timestamp,
                duration=duration,
                tid=event.thread,
                step_num=self.step,
                device="cpu",
            )
            for k in event.kernels:
                self.record_trace_events(
                    training_phase="gpu_functions-dev:" + str(k.device),
                    op_name=k.name,
                    phase="X",
                    timestamp=(k.interval.start + self.start_profiler_time_us)
                    / float(
                        CONVERT_TO_MICROSECS
                    ),  # timestamp expected is in seconds for record_trace_events
                    duration=k.interval.elapsed_us() / float(CONVERT_TO_MICROSECS),
                    tid=k.device,
                    step_num=self.step,
                    event_name=event.name,
                    device=k.device,
                    start_cpu_thread=event.thread,
                    cpu_thread_start_time=cpu_time,
                )
def test_pytorch_profiler_rnn(pytorch_profiler_config_parser, out_dir):
    train_model(out_dir)
    lt = LocalAlgorithmMetricsReader(out_dir)
    lt.refresh_event_file_list()
    events = lt.get_events(0, time.time() * 1000000)
    print(f"Number of events {len(events)}")
    if is_pt_1_5():
        assert len(events) <= 64
    elif is_pt_1_6() or is_pt_1_7():
        assert len(events) <= 85
    shutil.rmtree(out_dir, ignore_errors=True)
Beispiel #3
0
def test_pytorch_profiler(pytorch_profiler_config_parser, out_dir):
    device = torch.device("cpu")
    model = Net().to(device)
    hook = Hook(out_dir=out_dir)
    hook.register_hook(model)
    optimizer = optim.SGD(model.parameters(), lr=0.001)
    train(model, device, optimizer, hook)
    hook.close()
    lt = LocalAlgorithmMetricsReader(out_dir)
    lt.refresh_event_file_list()
    events = lt.get_events(0, time.time() * 1000000)
    print(f"Number of events {len(events)}")
    if is_pt_1_5():
        assert len(events) == 386
    elif is_pt_1_6():
        assert len(events) == 672
    elif is_pt_1_7():
        assert 220 <= len(events)
Beispiel #4
0
    def forward_pre_hook(self, module, inputs):
        # Write the gradients of the past step if the writer is still available.
        if self.writer is not None:
            self._close_writers()
        self._close_tb_writer()

        if not self.prepared_collections:
            # at this point we need all collections to be ready
            # this may not be the case at creation of hook
            # as user's code after hook might add collections
            self._prepare_collections()
            self.prepared_collections = True

        self._increment_step()

        ## prepararing for step metrics
        # last operation can be forward( eval loop is running or multiple forward for example RNN can have multiple call to forward of module)
        # or last operation can be backward (train backward loop just finished and we are at forward again)

        # we will log all outstanding forward and backward events
        self.log_outstanding_timeline_metrics()

        self.step_event = self._TraceEventData(
            phase="Step:" + str(self.mode),
            op_name="Step:" + str(self.mode),
            start_time=time.time(),
            dur=
            0,  # end time of step_event will be updated every time a forward event or backward is called after this
            pid=os.getpid(),
            step_num=str(self.mode_steps[self.mode]),
        )
        self.parent_forward_event = self._TraceEventData(
            phase="Forward",
            op_name=module._module_name,
            start_time=time.time(),
            dur=
            0,  # end time of parent_forward_event will be updated every time a forward event is called after this
            pid=os.getpid(),
            step_num=str(self.mode_steps[self.mode]),
        )

        self.profiler_config_parser.load_config()
        self.profiler_config_parser.handle_step_start_python_profiling(
            self.mode, self.step)

        if (self.autograd_profiler_enabled and not self.profiler_config_parser.
                config.detailed_profiling_config.disabled):
            self._collect_torch_profiling_data_if_profiler_enabled()

        # should we re-enable profiling for this step?
        if (self.profiler_config_parser.should_save_metrics(
                MetricsCategory.DETAILED_PROFILING, self.step)
                and not self.autograd_profiler_enabled):
            self.autograd_profiler_enabled = True
            if is_pt_1_5():
                torch.autograd._enable_profiler(
                    torch.autograd.ProfilerConfig(self.profiler, False))
                self.start_profiler_time_us = time.time(
                ) * CONVERT_TO_MICROSECS
            elif is_pt_1_7():
                torch.autograd._enable_profiler(
                    torch.autograd.ProfilerConfig(self.profiler, False, False,
                                                  False))
                self.start_profiler_time_us = time.time(
                ) * CONVERT_TO_MICROSECS
            elif is_pt_1_8():
                torch.autograd._enable_profiler_legacy(
                    torch.autograd.ProfilerConfig(self.profiler, False, False,
                                                  False, False))
                self.start_profiler_time_us = time.time(
                ) * CONVERT_TO_MICROSECS
            elif is_pt_1_6():
                torch.autograd._enable_profiler(
                    torch.autograd.ProfilerConfig(self.profiler, False, False))
                self.start_profiler_time_us = time.time(
                ) * CONVERT_TO_MICROSECS
            else:
                self.logger.warn(
                    f"The detailed profiling using autograd profiler is not supported for torch version "
                    f"{torch.__version__}")
                self.autograd_profiler_enabled = False

        if self.is_smdataparallel_profiling:
            # Stop smdataparallel profiling at end step
            stop_smdataparallel_profiler(
                smdataparallel, self.profiler_config_parser.config.local_path)
        self.is_smdataparallel_profiling = False
        if self.profiler_config_parser.should_save_metrics(
                MetricsCategory.SMDATAPARALLEL_PROFILING, self.step):
            start_smdataparallel_profiler(
                smdataparallel, self.profiler_config_parser.config.local_path)
            self.is_smdataparallel_profiling = True

        if self._get_collections_to_save_for_step():
            self._initialize_writers()
            self._log_params(module)

        if self.last_saved_step is not None and not self.exported_collections:
            self.export_collections()
            self.exported_collections = True

        self.first_forward_submodule_name = None