def train(self): job_path = JobPath(self.job_id) work_dir = job_path.client_work_dir(self.step, self.client.address) os.makedirs(work_dir, exist_ok=True) with WorkDirContext(work_dir): self._pre_train() self._train() self._post_train()
def train(self): self._pre_train() job_path = JobPath(self.job_id) work_dir = job_path.client_work_dir(self.job.cur_round, self.client.address) os.makedirs(work_dir, exist_ok=True) with WorkDirContext(work_dir): # self._pre_train() self._train() self._post_train() # 完成指定轮次的训练之后保存当前模型的训练状态 StandaloneSend.send_partial_params(self.client.address, self.job_id, self.job.cur_round, self.model.state_dict()) StandaloneSend.send(self.client.address, self.job_id, self.job.cur_round, "report", self.reports) # 初始化self.__model_params_path,准备下一轮训练 self.__model_params_path = None
def validate(self): job_path = JobPath(self.job_id) work_dir = job_path.client_work_dir(self.job.cur_round, self.client.address) os.makedirs(work_dir, exist_ok=True) with WorkDirContext(work_dir): self._validate()