Ejemplo n.º 1
0
def acquire_job_list():
    job_str_list = []
    job_list = JobManager.get_job_list(JOB_PATH)
    for job in job_list:
        job_str = json.dumps(job, cls=JobEncoder)
        job_str_list.append(job_str)
    return job_str_list, 200
Ejemplo n.º 2
0
    def aggregate(self):
        """

        :return:
        """

        job_list = JobManager.get_job_list(self.job_path)
        WAITING_BROADCAST_AGGREGATED_JOB_ID_LIST.clear()
        for job in job_list:
            job_model_pars, fed_step = self.load_model_pars(
                os.path.join(self.base_model_path,
                             "models_{}".format(job.get_job_id())),
                self.fed_step.get(job.get_job_id()))
            # print("fed_step: {}, self.fed_step: {}, job_model_pars: {}".format(fed_step, self.fed_step.get(job.get_job_id()), job_model_pars))
            job_fed_step = 0 if self.fed_step.get(
                job.get_job_id()) is None else self.fed_step.get(
                    job.get_job_id())
            if job_fed_step != fed_step and job_model_pars is not None:
                self.logger.info("Aggregating......")
                self._exec(job_model_pars, self.base_model_path,
                           job.get_job_id(), fed_step)
                self.fed_step[job.get_job_id()] = fed_step
                WAITING_BROADCAST_AGGREGATED_JOB_ID_LIST.append(
                    job.get_job_id())
                if job.get_epoch() <= self.fed_step[job.get_job_id()]:
                    self._save_final_model_pars(
                        job.get_job_id(),
                        os.path.join(self.base_model_path,
                                     "models_{}".format(job.get_job_id()),
                                     "tmp_aggregate_pars"),
                        self.fed_step[job.get_job_id()])
                if self.work_mode == WorkModeStrategy.WORKMODE_CLUSTER:
                    self._broadcast(WAITING_BROADCAST_AGGREGATED_JOB_ID_LIST,
                                    CONNECTED_TRAINER_LIST,
                                    self.base_model_path)
Ejemplo n.º 3
0
def register_trainer(ip, port, client_id):
    trainer_host = ip + ":" + port
    if trainer_host not in CONNECTED_TRAINER_LIST:
        job_list = JobManager.get_job_list(JOB_PATH)
        for job in job_list:
            job_model_client_dir = os.path.join(
                BASE_MODEL_PATH, "models_{}".format(job.get_job_id()),
                "models_{}".format(client_id))
            if not os.path.exists(job_model_client_dir):
                os.makedirs(job_model_client_dir)
        CONNECTED_TRAINER_LIST.append(trainer_host)
        return 'register_success', 200
    else:
        return 'already connected', 201
Ejemplo n.º 4
0
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4 * 4 * 50, 500)
        self.fc2 = nn.Linear(500, 10)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4 * 4 * 50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


if __name__ == "__main__":

    model = Net()
    job_manager = JobManager()
    job = job_manager.generate_job(
        work_mode=strategy.WorkModeStrategy.WORKMODE_CLUSTER,
        fed_strategy=strategy.FederateStrategy.FED_DISTILLATION,
        epoch=3,
        model=Net,
        distillation_alpha=0.5,
        l2_dist=True)
    job_manager.submit_job(job, model)
Ejemplo n.º 5
0
        self.conv2 = nn.Conv2d(6, 16, 5, 1)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        # need to return logits
        return x


if __name__ == "__main__":

    model = Net()

    job_manager = JobManager()
    job = job_manager.generate_job(
        work_mode=strategy.WorkModeStrategy.WORKMODE_STANDALONE,
        fed_strategy=strategy.FederateStrategy.FED_AVG,
        epoch=50,
        model=Net)
    job_manager.submit_job(job, model)