def run_one_scenario(cost_models, list_algos, filename: str, batch_size: int = 1, stochastic: bool = True, nb_epoch: int = 250, step_size=None, compression: CompressionModel = None, use_averaging: bool = False, fraction_sampled_workers: int = 1) -> None: all_descent = {} stochasticity = 'sto' if stochastic else "full" if stochastic: experiments_settings = "{0}-b{1}".format(stochasticity, batch_size) else: experiments_settings = stochasticity for type_params in tqdm(list_algos): multiple_sg_descent = multiple_run_descent( type_params, cost_models=cost_models, compression_model=compression, use_averaging=use_averaging, stochastic=stochastic, nb_epoch=nb_epoch, step_formula=step_size, batch_size=batch_size, logs_file=filename, fraction_sampled_workers=fraction_sampled_workers) all_descent[type_params.name()] = multiple_sg_descent res = ResultsOfSeveralDescents(all_descent, len(cost_models)) pickle_saver(res, "{0}/descent-{1}".format(filename, experiments_settings))
def prepare_noniid_dataset(data, pivot_label: str, data_path: str, pickle_path: str, nb_cluster: int, double_check: bool =False): # The TSNE representation is independent of the number of devices. tsne_file = "{0}-tsne".format(data_path) if not file_exist("{0}.pkl".format(tsne_file)): # Running TNSE to obtain a 2D representation of data logging.debug("The TSNE representation ({0}) doesn't exist." .format(tsne_file)) embedded_data = tsne(data) pickle_saver(embedded_data, tsne_file) tsne_cluster_file = "{0}/tsne-cluster".format(pickle_path) if not file_exist("{0}.pkl".format(tsne_cluster_file)): # Finding clusters in the TNSE. logging.debug("Finding non-iid clusters in the TNSE represesentation: {0}.pkl".format(tsne_file)) embedded_data = pickle_loader("{0}".format(tsne_file)) logging.debug("Saving found clusters : {0}.pkl".format(tsne_cluster_file)) predicted_cluster = find_cluster(embedded_data, nb_cluster) pickle_saver(predicted_cluster, "{0}".format(tsne_cluster_file)) predicted_cluster = pickle_loader("{0}".format(tsne_cluster_file)) # With the found clusters, splitting data. X, Y = clustering_data(data, predicted_cluster, pivot_label, nb_cluster) if double_check: logging.debug("Checking data cluserization, wait until completion before seeing the plots.") # Checking that splitting data by cluster is valid. check_data_clusterisation(X, Y, nb_cluster) return X, Y
def run_one_scenario(cost_models, list_algos, logs_file: str, experiments_settings: str, batch_size: int = 1, stochastic: bool = True, nb_epoch: int = 250, step_size=None, compression: CompressionModel = None, use_averaging: bool = False, fraction_sampled_workers: int = 1, modify_run=None) -> None: pickle_file = "{0}/descent-{1}".format(logs_file, experiments_settings) if modify_run is None: if file_exist(pickle_file + ".pkl"): remove_file(pickle_file + ".pkl") algos = list_algos else: algos = [list_algos[i] for i in modify_run] for type_params in tqdm(algos): multiple_sg_descent = multiple_run_descent( type_params, cost_models=cost_models, compression_model=compression, use_averaging=use_averaging, stochastic=stochastic, nb_epoch=nb_epoch, step_formula=step_size, batch_size=batch_size, logs_file=logs_file, fraction_sampled_workers=fraction_sampled_workers) if logs_file: logs = open("{0}/logs.txt".format(logs_file), "a+") logs.write( "{0} size of the multiple SG descent: {1:.2e} bits\n".format( type_params.name(), asizeof.asizeof(multiple_sg_descent))) logs.close() if file_exist(pickle_file + ".pkl"): res = pickle_loader(pickle_file) res.add_descent(multiple_sg_descent, type_params.name(), deep_learning_run=False) else: res = ResultsOfSeveralDescents(len(cost_models)) res.add_descent(multiple_sg_descent, type_params.name(), deep_learning_run=False) pickle_saver(res, pickle_file) del res del multiple_sg_descent
images = images.to(device) labels = labels.to(device) # Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() test_loss_val, test_acc_val = accuracy_and_loss(model, test_loader, criterion, device) run.update_run(loss.item(), test_loss_val, test_acc_val) pickle_saver(run, "run_cifar10") if (i+1) % 100 == 0: with open("log_cifar10.txt", 'a') as f: print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}" .format(epoch+1, num_epochs, i+1, total_step, loss.item()), file=f) # Decay learning rate if (epoch+1) % 20 == 0: curr_lr /= 3 update_lr(optimizer, curr_lr) # Test the model model.eval() with torch.no_grad(): correct = 0
def run_experiments_in_deeplearning(dataset: str): with open("log.txt", 'a') as f: print("==== NEW RUN ====", file=f) fraction_sampled_workers = 1 batch_size = 128 nb_devices = 20 algos = "mcm-vs-existing" iid = "non-iid" data_path, pickle_path, algos_pickle_path, picture_path = create_path_and_folders( nb_devices, dataset, iid, algos, fraction_sampled_workers) default_up_compression = SQuantization(quantization_levels[dataset], norm=norm_quantization[dataset]) default_down_compression = SQuantization(quantization_levels[dataset], norm=norm_quantization[dataset]) exp_name = "{0}_m{1}_lr{2}_sup{3}_sdwn{4}_b{5}_wd{6}".format( models[dataset].__name__, momentums[dataset], optimal_steps_size[dataset], default_up_compression.level, default_down_compression.level, batch_size, weight_decay[dataset]) all_descent = {} for type_params in [VanillaSGD(), Diana(), Artemis(), MCM()]: print(type_params) torch.cuda.empty_cache() params = type_params.define( cost_models=None, n_dimensions=None, nb_epoch=200, nb_devices=nb_devices, batch_size=batch_size, fraction_sampled_workers=1, up_compression_model=default_up_compression, down_compression_model=default_down_compression) params = cast_to_DL(params, dataset, models[dataset], optimal_steps_size[dataset], weight_decay[dataset]) params.log_file = "log.txt" params.momentum = momentums[dataset] params.print() with open(params.log_file, 'a') as f: print(type_params, file=f) print("Optimal step size: ", params.optimal_step_size, file=f) multiple_sg_descent = run_tuned_exp(params) all_descent[type_params.name()] = multiple_sg_descent res = ResultsOfSeveralDescents(all_descent, nb_devices) # res.add_descent(multiple_sg_descent, type_params.name()) pickle_saver(res, "{0}/{1}".format(algos_pickle_path, exp_name)) res = pickle_loader("{0}/{1}".format(algos_pickle_path, exp_name)) # Plotting without averaging plot_error_dist(res.get_loss(np.array(0), in_log=True), res.names, res.nb_devices_for_the_run, batch_size=batch_size, all_error=res.get_std(np.array(0), in_log=True), x_legend="Number of passes on data", ylegends="train_loss", picture_name="{0}/{1}_train_losses".format( picture_path, exp_name)) plot_error_dist(res.get_loss(np.array(0), in_log=True), res.names, res.nb_devices_for_the_run, batch_size=batch_size, x_points=res.X_number_of_bits, ylegends="train_loss", all_error=res.get_std(np.array(0), in_log=True), x_legend="Communicated bits", picture_name="{0}/{1}_train_losses_bits".format( picture_path, exp_name)) plot_error_dist(res.get_test_accuracies(), res.names, res.nb_devices_for_the_run, ylegends="accuracy", all_error=res.get_test_accuracies_std(), x_legend="Number of passes on data", batch_size=batch_size, picture_name="{0}/{1}_test_accuracies".format( picture_path, exp_name)) plot_error_dist(res.get_test_losses(in_log=True), res.names, res.nb_devices_for_the_run, batch_size=batch_size, all_error=res.get_test_losses_std(in_log=True), x_legend="Number of passes on data", ylegends="test_loss", picture_name="{0}/{1}_test_losses".format( picture_path, exp_name))
def run_for_different_scenarios(cost_models, list_algos, values, labels, experiments_settings: str, logs_file: str, batch_size: int = 1, stochastic: bool = True, nb_epoch: int = 250, step_formula=None, compression: CompressionModel = None, scenario: str = "step") -> None: assert scenario in ["step", "compression", "alpha"], "There is three possible scenarios : to analyze by step size," \ " by compression operators, or by value of alpha." nb_devices_for_the_run = len(cost_models) all_kind_of_compression_res = [] all_descent_various_gamma = {} descent_by_algo_and_step_size = {} # Corresponds to descent with optimal gamma for each algorithm optimal_descents = {} for param_algo in tqdm(list_algos): losses_by_algo, losses_avg_by_algo, norm_ef_by_algo, dist_model_by_algo = [], [], [], [] h_i_to_optimal_grad_by_algo, var_models_by_algo = [], [] descent_by_step_size = {} for (value, label) in zip(values, labels): if scenario == "step": multiple_sg_descent = multiple_run_descent( param_algo, cost_models=cost_models, use_averaging=True, stochastic=stochastic, batch_size=batch_size, step_formula=value, nb_epoch=nb_epoch, compression_model=compression, logs_file=logs_file) if scenario in ["compression", "alpha"]: multiple_sg_descent = multiple_run_descent( param_algo, cost_models=cost_models, use_averaging=True, stochastic=stochastic, batch_size=batch_size, step_formula=step_formula, compression_model=value, nb_epoch=nb_epoch, logs_file=logs_file) descent_by_step_size[label] = multiple_sg_descent losses_by_label, losses_avg_by_label, norm_ef_by_label, dist_model_by_label = [], [], [], [] h_i_to_optimal_grad_by_label, var_models_by_label = [], [] # Picking the minimum values for each of the run. for seq_losses, seq_losses_avg, seq_norm_ef, seq_dist_model, seq_h_i_optimal, seq_var_models in \ zip(multiple_sg_descent.train_losses, multiple_sg_descent.averaged_train_losses, multiple_sg_descent.norm_error_feedback, multiple_sg_descent.dist_to_model, multiple_sg_descent.h_i_to_optimal_grad, multiple_sg_descent.var_models): losses_by_label.append(min(seq_losses)) losses_avg_by_label.append(min(seq_losses_avg)) norm_ef_by_label.append(seq_norm_ef[-1]) dist_model_by_label.append(seq_dist_model[-1]) var_models_by_label.append(seq_var_models[-1]) h_i_to_optimal_grad_by_label.append(seq_h_i_optimal[-1]) losses_by_algo.append(losses_by_label) losses_avg_by_algo.append(losses_avg_by_label) norm_ef_by_algo.append(norm_ef_by_label) dist_model_by_algo.append(dist_model_by_label) var_models_by_algo.append(var_models_by_label) h_i_to_optimal_grad_by_algo.append(h_i_to_optimal_grad_by_label) res_by_algo_and_step_size = ResultsOfSeveralDescents( nb_devices_for_the_run) res_by_algo_and_step_size.add_dict_of_descent(descent_by_step_size) descent_by_algo_and_step_size[ param_algo.name()] = res_by_algo_and_step_size # Find optimal descent for the algo: min_loss_desc = 10e12 opt_desc = None for desc in descent_by_step_size.values(): if min_loss_desc > min([ desc.train_losses[j][-1] for j in range(len(desc.train_losses)) ]): min_loss_desc = min([ desc.train_losses[j][-1] for j in range(len(desc.train_losses)) ]) opt_desc = desc # Adding the optimal descent to the dict of optimal descent optimal_descents[param_algo.name()] = opt_desc artificial_multiple_descent = AverageOfSeveralIdenticalRun() artificial_multiple_descent.append_list(losses_by_algo, losses_avg_by_algo, norm_ef_by_algo, dist_model_by_algo, h_i_to_optimal_grad_by_algo, var_models_by_algo) all_descent_various_gamma[ param_algo.name()] = artificial_multiple_descent all_kind_of_compression_res.append(all_descent_various_gamma) res_various_gamma = ResultsOfSeveralDescents(nb_devices_for_the_run) res_various_gamma.add_dict_of_descent(all_descent_various_gamma, deep_learning_run=False) pickle_saver( res_various_gamma, "{0}/{1}-{2}".format(logs_file, scenario, experiments_settings)) res_opt_gamma = ResultsOfSeveralDescents(nb_devices_for_the_run) res_opt_gamma.add_dict_of_descent(optimal_descents, deep_learning_run=False) pickle_saver( res_opt_gamma, "{0}/{1}-optimal-{2}".format(logs_file, scenario, experiments_settings)) pickle_saver( descent_by_algo_and_step_size, "{0}/{1}-descent_by_algo-{2}".format(logs_file, scenario, experiments_settings))
def run_experiments_in_deeplearning(dataset: str, plot_only: bool = False) -> None: """Runs and plots experiments for a given dataset using an appropriate neural network. :param dataset: Name of the dataset :param plot_only: True if the goal is not to rerun all experiments but only to regenerate figures. """ fraction_sampled_workers = 1 batch_size = batch_sizes[dataset] nb_devices = 20 algos = sys.argv[2] iid = sys.argv[3] stochastic = True create_folder_if_not_existing(algos) log_file = algos + "/log_" + dataset + "_" + iid + ".txt" with open(log_file, 'a') as f: print("==== NEW RUN ====", file=f) with open(log_file, 'a') as f: print( "stochastic -> {0}, iid -> {1}, batch_size -> {2}, norm -> {3}, s -> {4}, momentum -> {5}, model -> {6}" .format(stochastic, iid, batch_size, norm_quantization[dataset], quantization_levels[dataset], momentums[dataset], models[dataset].__name__), file=f) data_path, pickle_path, algos_pickle_path, picture_path = create_path_and_folders( nb_devices, dataset, iid, algos, fraction_sampled_workers) default_up_compression = SQuantization(quantization_levels[dataset], norm=norm_quantization[dataset]) default_down_compression = SQuantization(quantization_levels[dataset], norm=norm_quantization[dataset]) loaders = create_loaders(dataset, iid, nb_devices, batch_size, stochastic) _, train_loader_workers_full, _ = loaders dim = next(iter(train_loader_workers_full[0]))[0].shape[1] if optimal_steps_size[dataset] is None: L = compute_L(train_loader_workers_full) optimal_steps_size[dataset] = 1 / L print("Step size:", optimal_steps_size[dataset]) exp_name = name_of_the_experiments(dataset, stochastic) pickle_file = "{0}/{1}".format(algos_pickle_path, exp_name) list_algos = choose_algo(algos, stochastic, fraction_sampled_workers) if not plot_only: if file_exist(pickle_file + ".pkl"): remove_file(pickle_file + ".pkl") for type_params in list_algos: print(type_params) torch.cuda.empty_cache() params = type_params.define( cost_models=None, n_dimensions=dim, nb_epoch=300, nb_devices=nb_devices, stochastic=stochastic, batch_size=batch_size, fraction_sampled_workers=fraction_sampled_workers, up_compression_model=default_up_compression, down_compression_model=default_down_compression) params = cast_to_DL(params, dataset, models[dataset], optimal_steps_size[dataset], weight_decay[dataset], iid) params.log_file = log_file params.momentum = momentums[dataset] params.criterion = criterion[dataset] params.print() with open(params.log_file, 'a') as f: print(type_params, file=f) print("Optimal step size: ", params.optimal_step_size, file=f) multiple_descent = AverageOfSeveralIdenticalRun() seed_everything(seed=42) start = time.time() for i in range(NB_RUN): print('Run {:3d}/{:3d}:'.format(i + 1, NB_RUN)) fixed_params = copy.deepcopy(params) try: training = Train(loaders, fixed_params) multiple_descent.append_from_DL(training.run_training()) except ValueError as err: print(err) continue with open(log_file, 'a') as f: print("Time of the run: {:.2f}s".format(time.time() - start), file=f) with open(params.log_file, 'a') as f: print("{0} size of the multiple SG descent: {1:.2e} bits\n". format(type_params.name(), asizeof.asizeof(multiple_descent)), file=f) if file_exist(pickle_file + ".pkl"): res = pickle_loader(pickle_file) res.add_descent(multiple_descent, type_params.name(), deep_learning_run=True) else: res = ResultsOfSeveralDescents(nb_devices) res.add_descent(multiple_descent, type_params.name(), deep_learning_run=True) pickle_saver(res, pickle_file) # obj_min_cvx = pickle_loader("{0}/obj_min".format(pickle_path)) obj_min = 0 #pickle_loader("{0}/obj_min".format(pickle_path)) res = pickle_loader(pickle_file) # obj_min = min(res.get_loss(np.array(0), in_log=False)[0]) # print("Obj min in convex:", obj_min_cvx) print("Obj min in dl:", obj_min) # Plotting plot_error_dist(res.get_loss(np.array(obj_min)), res.names, all_error=res.get_std(np.array(obj_min)), x_legend="Number of passes on data", ylegends="train_loss", picture_name="{0}/{1}_train_losses".format( picture_path, exp_name)) plot_error_dist(res.get_loss(np.array(obj_min)), res.names, x_points=res.X_number_of_bits, ylegends="train_loss", all_error=res.get_std(np.array(obj_min)), x_legend="Communicated bits", picture_name="{0}/{1}_train_losses_bits".format( picture_path, exp_name)) plot_error_dist(res.get_test_accuracies(), res.names, ylegends="accuracy", all_error=res.get_test_accuracies_std(), x_legend="Number of passes on data", picture_name="{0}/{1}_test_accuracies".format( picture_path, exp_name)) plot_error_dist(res.get_test_losses(in_log=True), res.names, all_error=res.get_test_losses_std(in_log=True), x_legend="Number of passes on data", ylegends="test_loss", picture_name="{0}/{1}_test_losses".format( picture_path, exp_name))