def do_training(config: TorchFederatedLearnerCIFAR100Config): config_technical = TorchFederatedLearnerTechnicalConfig( STORE_OPT_ON_DISK=False, STORE_MODEL_IN_RAM=False, ) name = f"{config.SERVER_OPT}: {config.SERVER_LEARNING_RATE} - {config.CLIENT_OPT_STRATEGY} - {config.CLIENT_OPT}: {config.CLIENT_LEARNING_RATE}" logging.info(name) experiment = Experiment(workspace="federated-learning", project_name=project_name) experiment.set_name(name) learner = TorchFederatedLearnerCIFAR100(experiment, config, config_technical) learner.train()
CLIENT_OPT=common.get_name(client_opt), CLIENT_OPT_ARGS=common.get_args(client_opt), # CLIENT_OPT_L2=1e-4, CLIENT_OPT_STRATEGY=client_opt_strategy, SERVER_OPT=common.get_name(server_opt), SERVER_OPT_ARGS=common.get_args(server_opt), SERVER_LEARNING_RATE=server_lr, IS_IID_DATA=is_iid, BATCH_SIZE=B, CLIENT_FRACTION=C, N_CLIENTS=NC, N_EPOCH_PER_CLIENT=E, MAX_ROUNDS=max_rounds, MODEL=model, SCAFFOLD=True) config_technical = TorchFederatedLearnerTechnicalConfig( BREAK_ROUND=300, EVAL_ROUND=1, TEST_LAST=1, # STORE_OPT_ON_DISK=False, # STORE_MODEL_IN_RAM=False, ) name = f"{config.SERVER_OPT}: {config.SERVER_LEARNING_RATE} - {config.CLIENT_OPT_STRATEGY} - {config.CLIENT_OPT}: {config.CLIENT_LEARNING_RATE}" experiment = Experiment(workspace="federated-learning-emnist-m", project_name=project_name) try: common.do_training_emnist(experiment, name, config, config_technical) except ToLargeLearningRateExcpetion: pass
client_opt = "SGD" client_opt_strategy = "reinit" # image_norm = "tflike" # TODO a paraméterek helytelen nevére nem adott hibát config = TorchFederatedLearnerCIFAR100Config( BREAK_ROUND=1500, CLIENT_LEARNING_RATE=client_lr, CLIENT_OPT=client_opt, # CLIENT_OPT_ARGS=common.get_args(client_opt), CLIENT_OPT_L2=1e-4, CLIENT_OPT_STRATEGY=client_opt_strategy, SERVER_OPT=server_opt, SERVER_OPT_ARGS=common.get_args(server_opt), SERVER_LEARNING_RATE=server_lr, IS_IID_DATA=is_iid, BATCH_SIZE=B, CLIENT_FRACTION=C, N_CLIENTS=NC, N_EPOCH_PER_CLIENT=E, MAX_ROUNDS=max_rounds, IMAGE_NORM="recordwisefull", NORM="group", INIT="tffed", AUG="basicf") config_technical = TorchFederatedLearnerTechnicalConfig(BREAK_ROUND=300, EVAL_ROUND=100) name = f"{config.SERVER_OPT}: {config.SERVER_LEARNING_RATE} - {config.CLIENT_OPT_STRATEGY} - {config.CLIENT_OPT}: {config.CLIENT_LEARNING_RATE}" experiment = Experiment(workspace="federated-learning", project_name=project_name) common.do_training(experiment, name, config, config_technical)
server_lr = 0.01 client_lr = 0.01 server_opt = "SGD" client_opt = "SGD" client_opt_strategy = "reinit" project_name = f"10c40e-s-{server_opt}-c-{client_opt}" max_rounds = 30 # 1500 C = 0.5 # 10 / 500 NC = 10 # 500 E = 40 B = 20 is_iid = False config_technical = TorchFederatedLearnerTechnicalConfig() config = TorchFederatedLearnerCIFAR100Config( BREAK_ROUND=300, CLIENT_LEARNING_RATE=client_lr, CLIENT_OPT=client_opt, # CLIENT_OPT_ARGS=common.get_args(client_opt), CLIENT_OPT_L2=1e-4, CLIENT_OPT_STRATEGY=client_opt_strategy, SERVER_OPT=server_opt, # SERVER_OPT_ARGS=common.get_args(server_opt), SERVER_LEARNING_RATE=server_lr, IS_IID_DATA=is_iid, BATCH_SIZE=B, CLIENT_FRACTION=C, N_CLIENTS=NC,
server_lr = 1.0 client_lr = 0.1 server_opt = "SGD" client_opt = "SGD" client_opt_strategy = "reinit" max_rounds = 30 n_clients_per_round = 10 NC = 10 C = n_clients_per_round / NC E = 10 B = 20 is_iid = False project_name = f"{NC}c{E}e{max_rounds}r{n_clients_per_round}f-{server_opt}-{client_opt_strategy[0]}-{client_opt}" config_technical = TorchFederatedLearnerTechnicalConfig( BREAK_ROUND=3, STORE_OPT_ON_DISK=False, STORE_MODEL_IN_RAM=False) config = TorchFederatedLearnerCIFAR100Config( CLIENT_LEARNING_RATE=client_lr, CLIENT_OPT=common.get_name(client_opt), CLIENT_OPT_ARGS=common.get_args(client_opt), CLIENT_OPT_L2=1e-4, CLIENT_OPT_STRATEGY=client_opt_strategy, SERVER_OPT=common.get_name(server_opt), SERVER_OPT_ARGS=common.get_args(server_opt), SERVER_LEARNING_RATE=server_lr, IS_IID_DATA=is_iid, BATCH_SIZE=B, CLIENT_FRACTION=C, N_CLIENTS=NC, N_EPOCH_PER_CLIENT=E,
CLIENT_OPT=common.get_name(client_opt), CLIENT_OPT_ARGS=common.get_args(client_opt), # CLIENT_OPT_L2=1e-4, CLIENT_OPT_STRATEGY=client_opt_strategy, SERVER_OPT=common.get_name(server_opt), SERVER_OPT_ARGS=common.get_args(server_opt), SERVER_LEARNING_RATE=server_lr, IS_IID_DATA=is_iid, BATCH_SIZE=B, CLIENT_FRACTION=C, N_CLIENTS=NC, N_EPOCH_PER_CLIENT=E, MAX_ROUNDS=max_rounds, MODEL=model, ) config_technical = TorchFederatedLearnerTechnicalConfig( BREAK_ROUND=300, EVAL_ROUND=10, TEST_LAST=20, STORE_OPT_ON_DISK=True, STORE_MODEL_IN_RAM=True, ) name = f"{config.SERVER_OPT}: {config.SERVER_LEARNING_RATE} - {config.CLIENT_OPT_STRATEGY} - {config.CLIENT_OPT}: {config.CLIENT_LEARNING_RATE}" experiment = Experiment(workspace="federated-learning-emnist-s", project_name=project_name) try: common.do_training_emnist(experiment, name, config, config_technical) except ToLargeLearningRateExcpetion: pass
config_changes = [ ("SGD", "SGD", 1, 0.1, "nothing"), ("Yogi", "SGD", 0.1, 0.01, "nothing"), ("Yogi", "Yogi", 0.1, 0.0001, "avg"), ("Yogi", "Yogi", 0.1, 0.0001, "reinit"), ("Yogi", "Yogi", 0.1, 0.0001, "nothing"), ] for values in config_changes: config = TorchFederatedLearnerCIFAR100Config( BREAK_ROUND=1500, CLIENT_OPT_L2=1e-4, IS_IID_DATA=is_iid, BATCH_SIZE=B, CLIENT_FRACTION=C, N_CLIENTS=NC, N_EPOCH_PER_CLIENT=E, MAX_ROUNDS=max_rounds, IMAGE_NORM="recordwisefull", NORM="group", INIT="tffed", AUG="flipf", ) for k, v in zip(param_names, values): setattr(config, k, v) config_technical = TorchFederatedLearnerTechnicalConfig( SAVE_CHP_INTERVALL=5) name = f"{config.SERVER_OPT}: {config.SERVER_LEARNING_RATE} - {config.CLIENT_OPT_STRATEGY} - {config.CLIENT_OPT}: {config.CLIENT_LEARNING_RATE}" experiment = Experiment(workspace="federated-learning", project_name=project_name) common.do_training(experiment, name, config, config_technical)
("SGD", "SGD", 1, 0.1, "reinit"), ("Yogi", "SGD", 0.1, 0.01, "reinit"), ("Yogi", "Yogi", 0.1, 0.0001, "avg"), ("Yogi", "Yogi", 0.1, 0.0001, "reinit"), ("Yogi", "Yogi", 0.1, 0.0001, "nothing"), ] for values in config_changes: project_name = f"{NC}c{E}e-{values[0]}-{values[4]}-{values[1]}" config = TorchFederatedLearnerCIFAR100Config( BREAK_ROUND=5, CLIENT_OPT_L2=1e-4, IS_IID_DATA=is_iid, BATCH_SIZE=B, CLIENT_FRACTION=C, N_CLIENTS=NC, N_EPOCH_PER_CLIENT=E, MAX_ROUNDS=max_rounds, IMAGE_NORM="recordwisefull", NORM="group", INIT="tffed", AUG="flipf", ) for k, v in zip(param_names, values): setattr(config, k, v) config_technical = TorchFederatedLearnerTechnicalConfig( SAVE_CHP_INTERVALL=5, STORE_OPT_ON_DISK=False, STORE_MODEL_IN_RAM=False) explore_lr(project_name, TorchFederatedLearnerCIFAR100, config, config_technical)
CLIENT_LEARNING_RATE=client_lr, CLIENT_OPT=common.get_name(client_opt), CLIENT_OPT_ARGS=common.get_args(client_opt), CLIENT_OPT_L2=1e-4, # CLIENT_OPT_STRATEGY=client_opt_strategy, SERVER_OPT=common.get_name(server_opt), SERVER_OPT_ARGS=common.get_args(server_opt), SERVER_LEARNING_RATE=server_lr, IS_IID_DATA=is_iid, BATCH_SIZE=B, CLIENT_FRACTION=C, N_CLIENTS=NC, N_EPOCH_PER_CLIENT=E, MAX_ROUNDS=max_rounds, IMAGE_NORM="recordwisefull", NORM="group", INIT="tffed", AUG="basicf", ) if len(param_names) == 1: setattr(config, param_names[0], values) else: for k, v in zip(param_names, values): setattr(config, k, v) config_technical = TorchFederatedLearnerTechnicalConfig( SAVE_CHP_INTERVALL=5, BREAK_ROUND=3) name = f"{config.SERVER_OPT}: {config.SERVER_LEARNING_RATE} - {config.CLIENT_OPT_STRATEGY} - {config.CLIENT_OPT}: {config.CLIENT_LEARNING_RATE}" experiment = Experiment(workspace="federated-learning-hpopt", project_name=project_name) common.do_training(experiment, name, config, config_technical)
E = 1 project_name = f"{model}{NC}c{E}e{max_rounds}r{n_clients_per_round}f-{server_opt}-{client_opt_strategy[0]}-{client_opt}" config = TorchFederatedLearnerEMNISTConfig( CLIENT_LEARNING_RATE=client_lr, CLIENT_OPT=common.get_name(client_opt), CLIENT_OPT_ARGS=common.get_args(client_opt), CLIENT_OPT_L2=1e-4, CLIENT_OPT_STRATEGY=client_opt_strategy, SERVER_OPT=common.get_name(server_opt), SERVER_OPT_ARGS=common.get_args(server_opt), SERVER_LEARNING_RATE=server_lr, IS_IID_DATA=is_iid, BATCH_SIZE=B, CLIENT_FRACTION=C, N_CLIENTS=NC, N_EPOCH_PER_CLIENT=E, MAX_ROUNDS=max_rounds, MODEL=model, # SCAFFOLD=True, ) config_technical = TorchFederatedLearnerTechnicalConfig( BREAK_ROUND=300, EVAL_ROUND=10, TEST_LAST=20, STORE_OPT_ON_DISK=False) name = f"{config.SERVER_OPT}: {config.SERVER_LEARNING_RATE} - {config.CLIENT_OPT_STRATEGY} - {config.CLIENT_OPT}: {config.CLIENT_LEARNING_RATE}" experiment = Experiment(workspace="federated-learning-scaffold", project_name=project_name) try: common.do_training_emnist(experiment, name, config, config_technical) except ToLargeLearningRateExcpetion: pass
for l2 in [1e-3, 1e-2, 1e-1, 1e-1, 1e1]: s_opt_args = common.get_args(server_opt) s_opt_args["weight_decay"] = l2 config = TorchFederatedLearnerCIFAR100Config( BREAK_ROUND=300, CLIENT_LEARNING_RATE=client_lr, CLIENT_OPT=client_opt, CLIENT_OPT_ARGS=common.get_args(client_opt), CLIENT_OPT_L2=l2, CLIENT_OPT_STRATEGY=client_opt_strategy, SERVER_OPT=server_opt, SERVER_OPT_ARGS=s_opt_args, SERVER_LEARNING_RATE=server_lr, IS_IID_DATA=is_iid, BATCH_SIZE=B, CLIENT_FRACTION=C, N_CLIENTS=NC, N_EPOCH_PER_CLIENT=E, MAX_ROUNDS=max_rounds, DL_N_WORKER=0, NORM="group", # IMAGE_NORM=image_norm, INIT="tffed", ) config_technical = TorchFederatedLearnerTechnicalConfig(HIST_SAMPLE=0) name = f"{config.SERVER_OPT}: {config.SERVER_LEARNING_RATE} - {config.CLIENT_OPT_STRATEGY} - {config.CLIENT_OPT}: {config.CLIENT_LEARNING_RATE}" experiment = Experiment(workspace="federated-learning", project_name=project_name) common.do_training(experiment, name, config, config_technical)