def __init__(self, gpu, rank, world_size, dataset, model, batch_size, lr, client_epoch, seed, exp_id): super().__init__() torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True self.iid = False logger.add(f"logs/fed/{world_size}_{dataset}_{batch_size}_{lr}" f"_{exp_id}_{rank}.log") self.rank = rank self.lr = lr self.client_epoch = client_epoch self.device = f"cuda:{gpu}" if torch.cuda.is_available() else "cpu" self.model = load_model( model, class_num=100 if dataset == "cifar100" else 10).to(self.device) self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.) self.loss_func = nn.CrossEntropyLoss() self.train_loader, _ = partition_dataset(dataset, world_size - 1, rank - 1, batch_size, seed, self.iid)
def __init__(self, gpu, world_size, dataset, batch_size, lr, model, max_epoch, client_epoch, seed, exp_id): super().__init__() torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True self.iid = False logger.add(f"logs/fed/{world_size}_{dataset}_{batch_size}_{lr}" f"_{model}_{max_epoch}_{client_epoch}_PS{exp_id}.log") if wandb_enable: wandb.init( project="Async_FedAvg", name= f"vanilla_{world_size}_{batch_size}_{max_epoch}_{client_epoch}" f"_{lr}_{dataset}_{model}_{'iid' if self.iid else 'noniid'}", config={ "method": "vanilla", "world size": world_size, "dataset": dataset, "iid": self.iid, "model": model, "batch size": batch_size, "learning rate": lr, "momentum": 0., "lambda": 0., "global epoch": max_epoch, "client epoch": client_epoch, "seed": seed }) self.max_epoch = max_epoch self.client_epoch = client_epoch self.curr_epoch = -1 self.lr = lr self.world_size = world_size self.device = f"cuda:{gpu}" if torch.cuda.is_available() else "cpu" self.model = load_model( model, class_num=100 if dataset == "cifar100" else 10).to(self.device) self.aggregation = [DataAggregation(r) for r in range(1, world_size)] self.embedding_list = [] # linear.bias self.client_counter = 0 self.fetch_time = 0. self.sync_time = 0. self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.) _, self.test_loader = partition_dataset(dataset, world_size - 1, 0, batch_size, seed, self.iid)
def __init__(self, gpu, rank, world_size, dataset, model, batch_size, lr, seed, exp_id): super().__init__() torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True self.iid = False logger.add(f"logs/asyncfed/{world_size}_{dataset}_{batch_size}_{lr}" f"_{exp_id}_{rank}.log") self.rank = rank self.device = f"cuda:{gpu}" if torch.cuda.is_available() else "cpu" self.lr = lr if dataset == "cifar100": class_num = 100 elif dataset == "emnist": class_num = 62 else: class_num = 10 self.model_name = model self.model = load_model(model, class_num).to(self.device) self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.) if model == "transformer": self.loss_func = nn.NLLLoss() else: self.loss_func = nn.CrossEntropyLoss() self.sync_future_all = None self.train_loader, self.test_loader = partition_dataset(dataset, world_size - 1, rank - 1, batch_size, seed, iid=self.iid) self.latest_model = {} self.prefetch_model = {} self.extra_work = 0. self.extra_work_iter = ExtraWorkLoader(self.train_loader)
def __init__(self, gpu, world_size, dataset, batch_size, lr, mom, lambd, model, max_epoch, client_epoch, seed, exp_id, early_stop_round, early_stop_metric): super().__init__() torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True self.iid = False logger.add(f"logs/asyncfed/{world_size}_{dataset}_{batch_size}_{lr}" f"_{mom}_{lambd}_{model}_{max_epoch}_{client_epoch}_PS{exp_id}.log") if wandb_enable: wandb.init(project="Async_FedAvg", name=f"async_{world_size}_{batch_size}_{max_epoch}_{client_epoch}_{lr}_{lambd}" f"_{mom}_{dataset}_{model}_{'iid' if self.iid else 'noniid'}", config={ "method": "async", "world size": world_size, "dataset": dataset, "iid": self.iid, "model": model, "batch size": batch_size, "learning rate": lr, "momentum": mom, "lambda": lambd, "global epoch": max_epoch, "client epoch": client_epoch, "seed": seed, "mom_metho": "normal", }) self.max_epoch = max_epoch * client_epoch self.client_epoch = client_epoch self.world_size = world_size self.mom = mom self.device = f"cuda:{gpu}" if torch.cuda.is_available() else "cpu" self.model = load_model(model, class_num=100 if dataset == "cifar100" else 10).to(self.device) self.lr = lr self.lambd = lambd # self.learning_rate = learning_rate self.aggregation = [DataAggregation(r) for r in range(1, world_size)] self.embedding_list = [] self.dyn_task = np.array([0. for _ in range(self.world_size - 1)]) self.dyn_timer = np.array([0. for _ in range(self.world_size - 1)]) self.client_counter = 0 self.wtminus1 = {} self.mom_buffer = {} self.gminus1 = {} self.broadcast_fut_all = None self.cluster_is_ready = True self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=0.) _, self.test_loader = partition_dataset(dataset, world_size - 1, 0, batch_size, seed, iid=self.iid) self.acc_list = [] self.early_stop_round = early_stop_round self.early_stop_metric = early_stop_metric