def _backward(self, loss): self.optimizer.zero_grad() loss.backward() if self.should_clip_gradients: clip_gradients(self.model, self.current_iteration, self.writer, self.config) # visualization of parameters and their grads' distribution if self.current_iteration % 100 == 0 and hasattr(self, "tb_writer"): data = {key: [] for key in secs} grad = {key: [] for key in secs} for p in self.model.named_parameters(): if p[1].data is not None and p[1].grad is not None and p[1].data.shape != torch.Size([]): for sec in secs: if sec in p[0]: data[sec].append(p[1].data.flatten()) grad[sec].append(p[1].grad.flatten()) for sec in secs: if len(data[sec]) != 0 and len(grad[sec]) != 0: self.tb_writer.add_histogram(sec + "_data_dis", torch.cat(data[sec], dim=0), global_step=self.current_iteration) self.tb_writer.add_histogram(sec + "_grad_dis", torch.cat(grad[sec], dim=0), global_step=self.current_iteration) self.optimizer.step() self.profile("Backward time")
def _backward(self, loss): self.optimizer.zero_grad() loss.backward() if self.should_clip_gradients: clip_gradients(self.model, self.current_iteration, self.writer, self.config) self.optimizer.step() self.profile("Backward time")
def _backward(self, loss): self.optimizer.zero_grad() # self.model.context_feature_embeddings_list[0][0].image_attention_model.module.transform.module.lc.weight.retain_grad() # self.model.image_feature_embeddings_list[0][0].image_attention_model.module.transform.module.lc.weight.retain_grad() loss.backward() # print(self.model.context_feature_embeddings_list[0][0].image_attention_model.module.transform.module.lc.weight.sum()) # print(self.model.image_feature_embeddings_list[0][0].image_attention_model.module.transform.module.lc.weight.sum()) # print(self.model.context_feature_embeddings_list[0][0].image_attention_model.module.transform.module.lc.weight.grad.sum()) # print(self.model.image_feature_embeddings_list[0][0].image_attention_model.module.transform.module.lc.weight.grad.sum()) if self.should_clip_gradients: clip_gradients(self.model, self.current_iteration, self.writer, self.config) self.optimizer.step() self.profile("Backward time")