def acquire_job_list(): job_str_list = [] job_list = JobManager.get_job_list(JOB_PATH) for job in job_list: job_str = json.dumps(job, cls=JobEncoder) job_str_list.append(job_str) return job_str_list, 200
def aggregate(self): """ :return: """ job_list = JobManager.get_job_list(self.job_path) WAITING_BROADCAST_AGGREGATED_JOB_ID_LIST.clear() for job in job_list: job_model_pars, fed_step = self.load_model_pars( os.path.join(self.base_model_path, "models_{}".format(job.get_job_id())), self.fed_step.get(job.get_job_id())) # print("fed_step: {}, self.fed_step: {}, job_model_pars: {}".format(fed_step, self.fed_step.get(job.get_job_id()), job_model_pars)) job_fed_step = 0 if self.fed_step.get( job.get_job_id()) is None else self.fed_step.get( job.get_job_id()) if job_fed_step != fed_step and job_model_pars is not None: self.logger.info("Aggregating......") self._exec(job_model_pars, self.base_model_path, job.get_job_id(), fed_step) self.fed_step[job.get_job_id()] = fed_step WAITING_BROADCAST_AGGREGATED_JOB_ID_LIST.append( job.get_job_id()) if job.get_epoch() <= self.fed_step[job.get_job_id()]: self._save_final_model_pars( job.get_job_id(), os.path.join(self.base_model_path, "models_{}".format(job.get_job_id()), "tmp_aggregate_pars"), self.fed_step[job.get_job_id()]) if self.work_mode == WorkModeStrategy.WORKMODE_CLUSTER: self._broadcast(WAITING_BROADCAST_AGGREGATED_JOB_ID_LIST, CONNECTED_TRAINER_LIST, self.base_model_path)
def register_trainer(ip, port, client_id): trainer_host = ip + ":" + port if trainer_host not in CONNECTED_TRAINER_LIST: job_list = JobManager.get_job_list(JOB_PATH) for job in job_list: job_model_client_dir = os.path.join( BASE_MODEL_PATH, "models_{}".format(job.get_job_id()), "models_{}".format(client_id)) if not os.path.exists(job_model_client_dir): os.makedirs(job_model_client_dir) CONNECTED_TRAINER_LIST.append(trainer_host) return 'register_success', 200 else: return 'already connected', 201
super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5, 1) self.conv2 = nn.Conv2d(20, 50, 5, 1) self.fc1 = nn.Linear(4 * 4 * 50, 500) self.fc2 = nn.Linear(500, 10) self.softmax = nn.Softmax(dim=1) def forward(self, x): x = F.relu(self.conv1(x)) x = F.max_pool2d(x, 2, 2) x = F.relu(self.conv2(x)) x = F.max_pool2d(x, 2, 2) x = x.view(-1, 4 * 4 * 50) x = F.relu(self.fc1(x)) x = self.fc2(x) return x if __name__ == "__main__": model = Net() job_manager = JobManager() job = job_manager.generate_job( work_mode=strategy.WorkModeStrategy.WORKMODE_CLUSTER, fed_strategy=strategy.FederateStrategy.FED_DISTILLATION, epoch=3, model=Net, distillation_alpha=0.5, l2_dist=True) job_manager.submit_job(job, model)
self.conv2 = nn.Conv2d(6, 16, 5, 1) self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) self.softmax = nn.Softmax(dim=1) def forward(self, x): x = F.relu(self.conv1(x)) x = F.max_pool2d(x, 2, 2) x = F.relu(self.conv2(x)) x = F.max_pool2d(x, 2, 2) x = x.view(-1, 16 * 5 * 5) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) # need to return logits return x if __name__ == "__main__": model = Net() job_manager = JobManager() job = job_manager.generate_job( work_mode=strategy.WorkModeStrategy.WORKMODE_STANDALONE, fed_strategy=strategy.FederateStrategy.FED_AVG, epoch=50, model=Net) job_manager.submit_job(job, model)