def train_single(u, g, flops, seed, train_data, eval_data, tangle_name, malicious_node, poison_type): # Suppress tf warnings tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) random.seed(1 + seed) np.random.seed(12 + seed) tf.compat.v1.set_random_seed(123 + seed) client = build_client(u, g, flops, train_data, eval_data) tangle = Tangle.fromfile(tangle_name) if malicious_node: node = Node(client, tangle, poison_type) else: node = Node(client, tangle) args = parse_args() tx, metrics, comp = node.process_next_batch(args.num_epochs, args.batch_size, args.num_tips, args.sample_size, args.reference_avg_top) sys_metrics = { BYTES_WRITTEN_KEY: 0, BYTES_READ_KEY: 0, LOCAL_COMPUTATIONS_KEY: 0 } sys_metrics[BYTES_READ_KEY] += node.client.model.size sys_metrics[BYTES_WRITTEN_KEY] += node.client.model.size sys_metrics[LOCAL_COMPUTATIONS_KEY] = comp return tx, metrics, u, sys_metrics
def save_metric_csv(my_round, micro_acc, stack_list): args = parse_args() tot_center = [0., 0., 0., 0.] for center in stack_list: # This one is a macro acc, we cant compute micro acc here # as there is no weights for num samples. device_accuracy = np.average( [center[k]["accuracy"] for k in center.keys()]) device_loss = np.average([center[k]["loss"] for k in center.keys()]) device_microf1 = np.average( [center[k]["microf1"] for k in center.keys()]) device_macrof1 = np.average( [center[k]["macrof1"] for k in center.keys()]) tot_center[0] += device_accuracy tot_center[1] += device_loss tot_center[2] += device_microf1 tot_center[3] += device_macrof1 if len(stack_list) > 1: avg_center = [v / len(stack_list) for v in tot_center] else: avg_center = tot_center with open(args.metric_file, 'a+') as tsvfile: writer = csv.writer(tsvfile, delimiter='\t', lineterminator='\n') writer.writerow([ my_round, micro_acc, avg_center[0], avg_center[1], avg_center[2], avg_center[3] ])
def save_metric_csv(my_round, eval_to_use, stack_list, cluster): f_metric = 'metrics_' args = parse_args() if args.metric_file != '': f_metric = args.metric_file else: f_metric += "{}-{}-K{}.tsv".format(args.dataset, args.model, args.num_clusters) tot_micro_acc = 0. tot_micro_loss = 0. tot_micro_f1 = 0. tot_macro_f1 = 0. w_s = [c[0] for c in cluster if c[0] > 1] tot_ws = [c[0] for c in cluster if c[0] > 1] for center, w in zip(stack_list, w_s): device_micro_accuracy = np.average( [center[k]["accuracy"] for k in center.keys()]) device_micro_loss = np.average( [center[k]["loss"] for k in center.keys()]) device_microf1 = np.average([center[k]["f1"] for k in center.keys()]) device_macrof1 = np.average([center[k]["f1"] for k in center.keys()]) / len(w_s) tot_micro_acc += (device_micro_accuracy * w) / np.sum(tot_ws) tot_micro_loss += (device_micro_loss * w) / np.sum(tot_ws) tot_micro_f1 += (device_microf1 * w) / np.sum(tot_ws) tot_macro_f1 += device_macrof1 with open(f_metric, 'a+') as tsvfile: writer = csv.writer(tsvfile, delimiter='\t', lineterminator='\n') writer.writerow([ my_round, eval_to_use, tot_micro_acc, tot_micro_loss, tot_micro_f1, tot_macro_f1 ])
def main(): args, device = parse_args() """ set seeds """ random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) train(args, device)
def get_nsp_score_batch(nsp_predictor, predictions): """ Get NSP scores of a batch. """ import argparse from collections import namedtuple from readers.nsp_reader import NSPReader from utils.args import parse_args from tasks.next_sentence_prediction import NextSentencePrediction parser = argparse.ArgumentParser() NextSentencePrediction.add_cmdline_args(parser) parser.add_argument("--num_samples", type=int, default=None) parser.add_argument("--config_path", type=str, required=True) parser.add_argument("--mem_efficient", type=str2bool, default=False) args = parse_args(parser, allow_unknown=True) args.load(args.config_path) if not args.mem_efficient: if args.num_samples: args.batch_size *= args.num_samples if args.latent_type_size: args.batch_size *= args.latent_type_size args.tokenized_input = True reader = NSPReader(args) def __reader__(): headers = ["src", "tgt", "data_id"] Example = namedtuple("Example", headers) for i, info in enumerate(predictions): context = post_process_context(info["context_token_ids"], reader, merge=False) context_tokenized_input = " [SEP] ".join(" ".join(utt) for utt in context) _, response = post_process_response(info["response_token_ids"], reader, merge=False) response_tokenized_input = " ".join(response) example = Example(src=context_tokenized_input, tgt=response_tokenized_input, data_id=i) record = reader._convert_example_to_record(example, is_infer=True) yield record return generator = reader.data_generator( reader=__reader__, is_infer=True, phase="test", ) steps = 0 for data in generator(): outputs = nsp_predictor(data) for probs, data_id in zip(outputs[0], outputs[-1]): data_id = data_id[0] info = predictions[data_id] info["nsp_score"] = float(probs[1]) return
def setup_args(): parser = argparse.ArgumentParser() DialogReader.add_cmdline_args(parser) parser.add_argument("--input_file", type=str, required=True) parser.add_argument("--output_file", type=str, required=True) args = parse_args(parser) return args
def __init__(self, env): self.args = parse_args() self.env = env # Set the random seed if provided (affects client sampling, and batching) random.seed(1 + self.args.seed) np.random.seed(12 + self.args.seed) tf.set_random_seed(123 + self.args.seed) self.model_path = '%s/%s.py' % (self.args.dataset, self.args.model) if not os.path.exists(self.model_path): print('Please specify a valid dataset and a valid model.') self.model_path = '%s.%s' % (self.args.dataset, self.args.model) print( '############################## %s ##############################' % self.model_path) self.mod = importlib.import_module(self.model_path) self.ClientModel = getattr(self.mod, 'ClientModel') self.tup = MAIN_PARAMS[self.args.dataset][self.args.t] self.num_rounds = self.args.num_rounds if self.args.num_rounds != -1 else self.tup[ 0] self.eval_every = self.args.eval_every if self.args.eval_every != -1 else self.tup[ 1] # Suppress tf warnings tf.logging.set_verbosity(tf.logging.WARN) # Create 2 models self.model_params = MODEL_PARAMS[self.model_path] if self.args.lr != -1: self.model_params_list = list(self.model_params) self.model_params_list[0] = self.args.lr self.model_params = tuple(self.model_params_list) # Create client model, and share params with server model tf.reset_default_graph() self.client_model = self.ClientModel(self.args.seed, *self.model_params) # Create clients self.clients = setup_clients(self.args.aggregation, self.args.e, self.env, self.args.dataset, self.client_model) # Create server self.server = Server(self.client_model, len(self.clients)) self.client_ids, self.client_groups, self.client_num_samples = self.server.get_clients_info( self.clients) print('Clients in Total: %d' % len(self.clients)) self.replica = self.args.replica self.segment = self.args.segment self.client_num = len(self.clients) self.main_proc = env.process(self.main_process())
def main(): args = parse_args() train_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'train') test_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'test') users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) trainer = MlheadTrainer(args, users, groups, train_data, test_data) trainer.train(args) trainer.finish(args)
def bi_cluster(g_soln, clients, updated_w): ''' This takes a collection of clients then fit them, features input will be the delta of weight update of each client should divde them base on cosine sim Note, using sklearn's AgglomerativeClustering model to fit ''' args = parse_args() model_path = '%s.%s' % (args.dataset, args.model) model_params = MODEL_PARAMS[model_path] pos = len(model_params) - 1 op_name = model_params[pos] def get_x(model_params, g_soln): for var, m_p, g_p in zip(all_vars, model_params, g_soln): if var.op.name == op_name: #print("shape of layer:", m_p.shape) z = m_p - g_p return np.array(z).flatten() # need to see if g_soln is correct c = clients[0] with c.model.graph.as_default(): all_vars = tf.trainable_variables() ''' for var, g_value, c_value in zip(allv, g_soln, z): if var.op.name == op_name: y = np.array(g_value).flatten() y_s = np.sort(y) z = np.array(c_value).flatten() z_s = np.sort(z) print("Global\n",y_s[:20]) print("Client\n",z_s[:20]) ''' X = [ get_x(updated_w[idx][1], g_soln) for idx, client in enumerate(clients) ] clustering = AgglomerativeClustering(n_clusters=2, affinity='cosine', linkage='average').fit(X) c1 = list() c2 = list() X_c1 = list() X_c2 = list() labels = clustering.labels_ for counter, c in enumerate(clients): if labels[counter] == 1: c1.append(c) X_c1.append(X[counter]) else: c2.append(c) X_c2.append(X[counter]) return c1, c2, X_c1, X_c2
def setup_args(): """ Setup arguments. """ parser = argparse.ArgumentParser() models.add_cmdline_args(parser) DialogGeneration.add_cmdline_args(parser) args = parse_args(parser) args.load(args.config_path, "Model") args.run_infer = True # only build infer program print(json.dumps(args, indent=2)) return args
def setup_args(): """ Setup arguments. """ parser = argparse.ArgumentParser() models.add_cmdline_args(parser) tasks.add_cmdline_args(parser) parser.add_argument("--nsp_inference_model_path", type=str, required=True) args = parse_args(parser) args.load(args.config_path, "Model") args.run_infer = True # only build infer program print(json.dumps(args, indent=2)) return args
def main(): # hyper max-cross simi: ''' Three parts to run this algorithm: 1. call Fed to solve the problem 2. clustering into two c1, c2 3. if already solved terminate else recurrsively call itself cluster_fed with c1 and c2 ''' args = parse_args() tf.reset_default_graph() init_soln = get_init_param(args) clients = None cluster_fed(init_soln, clients, args) print("Test on result")
def main(): args = parse_args() sym = get_resnet101_test(args) data_path = "/mnt/dataset/car" label_path = Path(data_path, "annotations", "%s.csv" % args.imageset) assert label_path.exists(), "label_path not exists %s " % label_path result_path = Path('%s-%s.csv' % ("results", "%s" % (time.strftime("%Y-%m-%d-%H-%M")))) print("create results file: %s" % result_path) with label_path.open('r') as csvfile: reader = csv.DictReader(csvfile) for row in reader: img_path = Path(data_path, "images", args.imageset, row['name']) assert img_path.exists(), "img_path not exists %s " % img_path args.image = img_path.as_posix() demo_net(sym, ["__BG__", "car"], args, result_path)
def create_clients(aggregation, e, env, users, groups, train_data, test_data, model): args = parse_args() if len(groups) == 0: groups = [[] for _ in users] a = [i for i in range(len(users))] if args.algorithm == 'fedavg': clients = [ Client(aggregation, e, env, j, len(users), u, g, train_data[u], test_data[u], model) for j, u, g in zip(a, users, groups) ] else: clients = [ Client(aggregation, e, env, j, len(users), u, g, train_data[u], test_data[u], model) for j, u, g in zip(a, users, groups) ] # clients = [Client(env,j, args.clients_per_round, u, g, train_data[u], test_data[u], model) for j, u, g in zip(a, users, groups)] # clients = clients[:args.clients_per_round] return clients
def __init__(self, aggregation, e, env, idx, clients_num, client_id, group=None, train_data={'x': [], 'y': []}, eval_data={'x': [], 'y': []}, model=None): self._model = model self.aggregation = aggregation self.e = e self.id = client_id # integer self.idx = int(idx) self.clients_num = clients_num self.group = group self.train_data = train_data self.eval_data = eval_data self.pridict_bandwidth = [] # 初始化该client到其他所有节点的预测带宽 for i in range(self.clients_num): if i ==self.idx: self.pridict_bandwidth.append([1]) else: self.pridict_bandwidth.append([init_pridict_bandwidth]) self.updates = [] self.model_para = model.get_params() self.train_time = [] self.transfer_time = [] self.updates_flat = [] self.exit_bw = simpy.Container(env, init=CAPACITY, capacity=CAPACITY) self.record_time = {0:0} # self.training_time = [env.now] # self.seg_transfer_time = [0] * clients_num self.send_que = simpy.Container(env, init=0, capacity=1000) self.sigal = False # self.max_seg_transfer_time = 0 self.round_signal = False self.training_time = 0 self.metrics = {} self.local_update = model.get_params() for i in range(clients_num): a = [] for j in range(clients_num): a.append(-1) self.transfer_time.append(a) self.args = parse_args() self.test_signal =False self.model_shape = np.array(self.flat_updates(self.model_para)).shape self.m = np.zeros(self.model_shape) self.v = np.zeros(self.model_shape)
def setup_args(): """Setup arguments.""" parser = argparse.ArgumentParser() group = parser.add_argument_group("Model") group.add_argument("--init_from_ckpt", type=str, default="") group.add_argument("--vocab_size", type=int, default=8001) group.add_argument("--latent_type_size", type=int, default=20) group.add_argument("--num_layers", type=int, default=24) group = parser.add_argument_group("Task") group.add_argument("--is_cn", type=str2bool, default=False) args, _ = parser.parse_known_args() NSPReader.add_cmdline_args(parser) args = parse_args(parser) args.batch_size *= args.latent_type_size #print(json.dumps(args, indent=2)) return args
def setup_args(): """ Setup arguments. """ parser = argparse.ArgumentParser() parser.add_argument("--is_distributed", type=str2bool, default=False) parser.add_argument("--save_path", type=str, default="output") parser.add_argument("--infer_file", type=str, required=True) parser.add_argument("--output_name", type=str, required=True) parser.add_argument("--skip_steps", type=int, default=1) models.add_cmdline_args(parser) tasks.add_cmdline_args(parser) args = parse_args(parser) args.load(args.config_path, "Model") args.run_infer = True # only build infer program print(json.dumps(args, indent=2)) return args
def setup_args(): """ Setup arguments. """ parser = argparse.ArgumentParser() parser.add_argument("--is_distributed", type=str2bool, default=False) parser.add_argument("--save_path", type=str, default="output") parser.add_argument("--train_file", type=str, required=True) parser.add_argument("--valid_file", type=str, required=True) parser.add_argument("--num_epochs", type=int, default=20) parser.add_argument("--log_steps", type=int, default=100) parser.add_argument("--validation_steps", type=int, default=1000) parser.add_argument("--save_steps", type=int, default=5000) models.add_cmdline_args(parser) tasks.add_cmdline_args(parser) args = parse_args(parser) args.load(args.config_path, "Model") print(json.dumps(args, indent=2)) return args
def main(): args = parse_args() # client_id = sys.argv[1] # tangle_name = sys.argv[2] client_id = 'f0044_12' tangle_name = 120 train_data_dir = os.path.join('leaf', 'data', args.dataset, 'data', 'train_sm') test_data_dir = os.path.join('leaf', 'data', args.dataset, 'data', 'test_sm') print("Loading data...") users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) print("Loading data... complete") print( train_single(client_id, None, 1, 0, train_data[client_id], test_data[client_id], tangle_name))
def test_single(u, g, flops, seed, train_data, eval_data, tangle_name, set_to_use): # Suppress tf warnings tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) random.seed(1 + seed) np.random.seed(12 + seed) tf.compat.v1.set_random_seed(123 + seed) client = build_client(u, g, flops, train_data, eval_data) tangle = Tangle.fromfile(tangle_name) node = Node(client, tangle) args = parse_args() reference_txs, reference, reference_poison_score = node.obtain_reference_params( avg_top=args.reference_avg_top) node.client.model.set_params(reference) metrics = node.client.test(set_to_use) metrics['consensus_round'] = np.average( [tangle.transactions[tx].tag for tx in reference_txs]) metrics['consensus_poisoning'] = reference_poison_score metrics['norm'] = 0 parents = [tangle.transactions[tx].parents for tx in reference_txs] parents = set.union(*parents) if len(parents) == 2: p1, p2 = parents pw1 = tangle.transactions[p1].load_weights() pw2 = tangle.transactions[p2].load_weights() partial_norms = [ np.linalg.norm(np.array(weights)[0] - np.array(weights)[1]) for weights in zip(pw1, pw2) ] metrics['norm'] = np.linalg.norm(partial_norms) return u, metrics
def build_client(u, g, flops, train_data, eval_data): args = parse_args() model_path = '%s.%s' % (args.dataset, args.model) mod = importlib.import_module(model_path) ClientModel = getattr(mod, 'ClientModel') tup = MAIN_PARAMS[args.dataset][args.t] num_rounds = args.num_rounds if args.num_rounds != -1 else tup[0] eval_every = args.eval_every if args.eval_every != -1 else tup[1] clients_per_round = args.clients_per_round if args.clients_per_round != -1 else tup[ 2] model_params = MODEL_PARAMS[model_path] if args.lr != -1: model_params_list = list(model_params) model_params_list[0] = args.lr model_params = tuple(model_params_list) # Create client model, and share params with server model tf.reset_default_graph() client_model = ClientModel(1234, *model_params) client_model.flops = flops return Client(u, g, train_data, eval_data, client_model)
def print_stats(env, num_round, server, clients, num_samples, args, writer): args = parse_args() if args.algorithm == 'fedavg': train_stat_metrics = server.test_model(clients, set_to_use='train') print_metrics(train_stat_metrics, num_samples, prefix='train_') writer(num_round, train_stat_metrics, 'train') test_stat_metrics = server.test_model(clients, set_to_use='test') print_metrics(test_stat_metrics, num_samples, prefix='test_') writer(num_round, test_stat_metrics, 'test') else: train_stat_metrics = {} for client in clients: #client.model1 = server.model c_metrics = client.test(num_round, 'train') train_stat_metrics[client.id] = c_metrics print_metrics(train_stat_metrics, num_samples, prefix='train_') writer(num_round, train_stat_metrics, 'train') test_stat_metrics = {} for client in clients: #client.model1 = server.model c_metrics = client.test(num_round, 'test') test_stat_metrics[client.id] = c_metrics print_metrics(test_stat_metrics, num_samples, prefix='test_') writer(num_round, test_stat_metrics, 'test')
def main(): args = parse_args() print("ARGS") print(args) # Set the random seed if provided (affects client sampling, and batching) random.seed(1 + args.seed) np.random.seed(12 + args.seed) tf.set_random_seed(123 + args.seed) print("one-shot with {} groups".format(args.num_groups)) model_path = '%s/%s.py' % (args.dataset, args.model) if not os.path.exists(model_path): print('Please specify a valid dataset and a valid model.') model_path = '%s.%s' % (args.dataset, args.model) print('############################## %s ##############################' % model_path) mod = importlib.import_module(model_path) ClientModel = getattr(mod, 'ClientModel') tup = MAIN_PARAMS[args.dataset][args.t] num_rounds = args.num_rounds if args.num_rounds != -1 else tup[0] eval_every = args.eval_every if args.eval_every != -1 else tup[1] clients_per_round = args.clients_per_round if args.clients_per_round != -1 else tup[ 2] # Suppress tf warnings tf.logging.set_verbosity(tf.logging.WARN) # Create 2 models model_params = MODEL_PARAMS[model_path] if args.lr != -1: model_params_list = list(model_params) model_params_list[0] = args.lr model_params = tuple(model_params_list) # Create client model, and share params with server model tf.reset_default_graph() # client_model = ClientModel(args.seed, *model_params) ## IFCA client_models = [] for g_i in range(args.num_groups): client_model = ClientModel(args.seed + g_i, *model_params) client_models.append(client_model) client_model = client_models[0] ## IFCA end # Create server server = Server(client_models) # Create clients clients = setup_clients(args.dataset, client_model, args.use_val_set) client_ids, client_groups, client_num_samples = server.get_clients_info( clients) print('Clients in Total: %d' % len(clients)) if args.resume: print("---resume all models from {} model zero..".format(args.resume)) if os.path.exists(args.resume): ckpt = pickle.load(open(args.resume, "rb")) # inject the first model weights, but keep the last 4 wieghts (dense layer w b w b) for g_i in range(args.num_groups): for i, weight in enumerate(server.models[g_i]): if i < len(server.models[g_i]) - 2: server.models[g_i][i] = copy.deepcopy( ckpt['params'][0][i]) else: continue else: print("--- {} not found!".format(args.resume)) if args.checkpoint: print("---resume checkpoint from {}...".format(args.checkpoint)) if os.path.exists(args.checkpoint): ckpt = pickle.load(open(args.checkpoint, "rb")) # import ipdb; ipdb.set_trace() # inject the first model weights, but keep the last 4 wieghts (dense layer w b w b) for g_i in range(args.num_groups): for i, weight in enumerate(server.models[g_i]): server.models[g_i][i] = copy.deepcopy( ckpt['params'][g_i][i]) else: print("--- {} not found!".format(args.checkpoint)) print("Run one shot clustering ...") server.one_shot_clustering(clients, args.seed) stats = [] print('--- Random Initialization ---') stat_writer_fn = get_stat_writer_function(client_ids, client_groups, client_num_samples, args) sys_writer_fn = get_sys_writer_function(args) current_stats = print_stats(0, server, clients, client_num_samples, args, stat_writer_fn, args.use_val_set) current_stats['round'] = -1 stats.append(current_stats) # import ipdb; ipdb.set_trace() # Simulate training for i in range(num_rounds): t0 = time.time() # checking if norms of each weight same # for g_i in range(args.num_groups): # print("DEBUG m{} 0 {:.3f} 2 {:.3f} -5 {:.3f} -4 {:.3f} -2 {:.3f}".format(g_i, np.linalg.norm(server.models[g_i][0]), np.linalg.norm(server.models[g_i][2]), np.linalg.norm(server.models[g_i][-5]), np.linalg.norm(server.models[g_i][-4]), np.linalg.norm(server.models[g_i][-2]))) # Select clients to train this round server.select_clients(i, online(clients), num_clients=clients_per_round) c_ids, c_groups, c_num_samples = server.get_clients_info( server.selected_clients) # Simulate server model training on selected clients' data sys_metrics = server.train_model(num_epochs=args.num_epochs, batch_size=args.batch_size, minibatch=args.minibatch) # sys_writer_fn(i + 1, c_ids, sys_metrics, c_groups, c_num_samples) t1 = time.time() # Update server model server.update_model() t2 = time.time() print( '--- Round %d of %d: Trained %d Clients took t %.3f u %.3f sec ---' % (i + 1, num_rounds, clients_per_round, t1 - t0, t2 - t1)) # import ipdb; ipdb.set_trace() # Test model if (i + 1) % eval_every == 0 or (i + 1) == num_rounds: current_stats = print_stats(i + 1, server, clients, client_num_samples, args, stat_writer_fn, args.use_val_set) current_stats['round'] = i stats.append(current_stats) # import ipdb; ipdb.set_trace() # Save server model # ckpt_path = os.path.join('checkpoints', args.dataset) # if not os.path.exists(ckpt_path): # os.makedirs(ckpt_path) # save_path = server.save_model(os.path.join(ckpt_path, '{}.ckpt'.format(args.model))) # print('Model saved in path: %s' % save_path) ckpt = {"params": server.models, "stats": stats} best_accuracy = np.max([st['test']['accuracy'] for st in stats]) print("Best test accuracy : {}".format(best_accuracy)) print("saving results to", args.save) os.makedirs(os.path.dirname(args.save), exist_ok=True) pickle.dump(ckpt, open(args.save, "wb")) # import ipdb; ipdb.set_trace() # Close models server.close_model()
def main(): eventlet.monkey_patch() args = parse_args() ''' config_name = args.config_file while config_name[-4:] == '.cfg': config_name = config_name[:-4] ''' # read config from file cfg = Config('{}.cfg'.format(config_name)) # Set the random seed if provided (affects client sampling, and batching) random.seed(1 + cfg.seed) np.random.seed(12 + cfg.seed) tf.compat.v1.set_random_seed(123 + cfg.seed) model_path = '%s/%s.py' % (cfg.dataset, cfg.model) if not os.path.exists(model_path): logger.error('Please specify a valid dataset and a valid model.') assert False model_path = '%s.%s' % (cfg.dataset, cfg.model) logger.info('############################## %s ##############################' % model_path) mod = importlib.import_module(model_path) ClientModel = getattr(mod, 'ClientModel') ''' tup = MAIN_PARAMS[args.dataset][args.t] num_rounds = args.num_rounds if args.num_rounds != -1 else tup[0] eval_every = args.eval_every if args.eval_every != -1 else tup[1] clients_per_round = args.clients_per_round if args.clients_per_round != -1 else tup[2] ''' num_rounds = cfg.num_rounds eval_every = cfg.eval_every clients_per_round = cfg.clients_per_round # Suppress tf warnings tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # Create 2 models model_params = MODEL_PARAMS[model_path] if cfg.lr != -1: model_params_list = list(model_params) model_params_list[0] = cfg.lr model_params = tuple(model_params_list) # Create client model, and share params with server model tf.reset_default_graph() client_model = ClientModel(cfg.seed, *model_params, cfg.gpu_fraction) # Create clients clients = setup_clients(cfg, client_model) # print(sorted([c.num_train_samples for c in clients])) # Create server server = Server(client_model, clients, cfg = cfg) client_ids, client_groups, client_num_samples = server.get_clients_info(clients) logger.info('Clients in Total: %d' % (len(clients))) # Initial status logger.info('===================== Random Initialization =====================') stat_writer_fn = get_stat_writer_function(client_ids, client_groups, client_num_samples, args) sys_writer_fn = get_sys_writer_function(args) # print_stats(0, server, clients, client_num_samples, args, stat_writer_fn) # Simulate training if num_rounds == -1: import sys num_rounds = sys.maxsize def timeout_handler(signum, frame): raise Exception def exit_handler(signum, frame): os._exit(0) for i in range(num_rounds): round_start_time = time.time() logger.info('===================== Round {} of {} ====================='.format(i+1, num_rounds)) # 1. selection stage logger.info('--------------------- selection stage ---------------------') # 1.1 select clients server.select_clients(i, online(clients), num_clients=clients_per_round) c_ids, c_groups, c_num_samples = server.get_clients_info(server.selected_clients) logger.info("selected client_ids: {}".format(c_ids)) # 1.2 decide deadline for each client deadline = np.random.normal(cfg.round_ddl[0], cfg.round_ddl[1]) while deadline <= 0: deadline = np.random.normal(cfg.round_ddl[0], cfg.round_ddl[1]) deadline = int(deadline) logger.info('selected deadline: {}'.format(deadline)) # 2. configuration stage logger.info('--------------------- configuration stage ---------------------') # 2.1 train(no parallel implementation) sys_metrics = server.train_model(num_epochs=cfg.num_epochs, batch_size=cfg.batch_size, minibatch=cfg.minibatch, deadline = deadline) sys_writer_fn(i + 1, c_ids, sys_metrics, c_groups, c_num_samples) # 3. update stage logger.info('--------------------- report stage ---------------------') # 3.1 update global model server.update_model(cfg.update_frac) # 3.2 total simulation time for this round logger.info("simulating round {} used {} seconds".format(i+1, time.time()-round_start_time)) # 4. Test model(if necessary) if eval_every == -1: continue if (i + 1) % eval_every == 0 or (i + 1) == num_rounds: logger.info('--------------------- test result ---------------------') test_clients = random.sample(clients, 50) sc_ids, sc_groups, sc_num_samples = server.get_clients_info(test_clients) logger.info('number of clients for test: {} of {} '.format(len(test_clients),len(clients))) another_stat_writer_fn = get_stat_writer_function(sc_ids, sc_groups, sc_num_samples, args) # print_stats(i + 1, server, test_clients, client_num_samples, args, stat_writer_fn) print_stats(i + 1, server, test_clients, sc_num_samples, args, another_stat_writer_fn) # Save server model ckpt_path = os.path.join('checkpoints', cfg.dataset) if not os.path.exists(ckpt_path): os.makedirs(ckpt_path) save_path = server.save_model(os.path.join(ckpt_path, '{}.ckpt'.format(cfg.model))) logger.info('Model saved in path: %s' % save_path) # Close models server.close_model()
def main(): args = parse_args() print(args) # Set the random seed if provided (affects client sampling, and batching) random.seed(1 + args.seed) np.random.seed(12 + args.seed) torch.manual_seed(123 + args.seed) model_path = '%s/%s.py' % (args.dataset, args.model) if not os.path.exists(model_path): print('Please specify a valid dataset and a valid model.') model_path = '%s.%s' % (args.dataset, args.model) print('############################## %s ##############################' % model_path) mod = importlib.import_module(model_path) ClientModel = getattr(mod, 'ClientModel') tup = MAIN_PARAMS[args.dataset][args.t] num_rounds = args.num_rounds if args.num_rounds != -1 else tup[0] eval_every = args.eval_every if args.eval_every != -1 else tup[1] clients_per_round = args.clients_per_round if args.clients_per_round != -1 else tup[ 2] # Suppress logging logging.getLogger().setLevel(logging.DEBUG) # Create 2 models model_params = MODEL_PARAMS[model_path] if args.lr != -1: model_params_list = list(model_params) model_params_list[0] = args.lr model_params = tuple(model_params_list) # Create client model, and share params with server model client_model = ClientModel(args.seed, *model_params) # Create server server = Server(client_model) # Create clients clients = setup_clients(args.dataset, client_model, args.use_val_set) client_ids, client_groups, client_num_samples = server.get_clients_info( clients) print('Clients in Total: %d' % len(clients)) # Initial status print('--- Random Initialization ---') stat_writer_fn = get_stat_writer_function(client_ids, client_groups, client_num_samples, args) sys_writer_fn = get_sys_writer_function(args) # print_stats(0, server, clients, client_num_samples, args, stat_writer_fn, args.use_val_set) # Simulate training for i in range(num_rounds): print('--- Round %d of %d: Training %d Clients ---' % (i + 1, num_rounds, clients_per_round)) # Select clients to train this round server.select_clients(i, online(clients), num_clients=clients_per_round) c_ids, c_groups, c_num_samples = server.get_clients_info( server.selected_clients) # Simulate server model training on selected clients' data sys_metrics = server.train_model(num_epochs=args.num_epochs, batch_size=args.batch_size, minibatch=args.minibatch) sys_writer_fn(i + 1, c_ids, sys_metrics, c_groups, c_num_samples) # Update server model server.update_model() # TODO # server.compress_model() # Test model if (i + 1) % eval_every == 0 or (i + 1) == num_rounds: print_stats(i + 1, server, clients, client_num_samples, args, stat_writer_fn, args.use_val_set) # Save server model ckpt_path = os.path.join('checkpoints', args.dataset) if not os.path.exists(ckpt_path): os.makedirs(ckpt_path) save_path = server.save_model( os.path.join(ckpt_path, '{}.pth'.format(args.model))) print('Model saved in path: %s' % save_path) # Close models server.close_model() print("Training finished")
layer_outputs = [] for layer in model.parameters(): curr_layer_outputs = [] for i in range(num_repeats): gaussian_noise = torch.normal(mean=torch.zeros_like(layer), std=std) with torch.no_grad(): layer += gaussian_noise curr_output = model(test_points) layer -= gaussian_noise # import pdb; pdb.set_trace() abs_diff = torch.abs(orig_out - curr_output) mean_abs_diff = torch.mean(abs_diff).data curr_layer_outputs.append(mean_abs_diff) # print(curr_layer_outputs) layer_outputs.append(np.mean(curr_layer_outputs)) print(np.array(layer_outputs) / std) return layer_outputs if __name__ == '__main__': args, device = parse_args() model = args.model(hidden_size=4) test_points = torch.tensor([1., 2., 3.]).reshape(-1, 1) import pdb pdb.set_trace() for _ in range(10): weight_sensitivity_analysis(model, test_points)
mean_sum_loss = sum_loss / forward_times_per_epoch if scheduler_align is not None: scheduler_align.step(mean_sum_loss) if (epoch + 1) % arg.save_interval == 0: torch.save( align.state_dict(), arg.save_folder + 'align_' + arg.dataset + '_' + str(epoch + 1) + '.pth') print('\nepoch: {:0>4d} | loss: {:.10f}'.format( epoch, mean_sum_loss, )) torch.save( align.state_dict(), arg.save_folder + 'align_' + arg.dataset + '_' + str(epoch + 1) + '.pth') print('Training done!') if __name__ == '__main__': arg = parse_args() if not os.path.exists(arg.save_folder): os.mkdir(arg.save_folder) if not os.path.exists(arg.resume_folder): os.mkdir(arg.resume_folder) train(arg)
import sys import datetime import os from utils.config import load_config, load_configs, global_conf from utils.log import Logger from utils.args import parse_args from utils.dataset import datasetUtils from utils.task import taskUtils from utils.data import load_seg_data_paths, load_cla_data_paths_and_labels_specific args = parse_args() log_name = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d-%H-%M-%S") + ".log" sys.stdout = Logger(filename=os.path.join("logs", log_name), write_log=global_conf["write_log"]) def run_task(task): task.start() task.run() task.end() def main(): # Check if the args are valid. assert args.task_type in ( "seg", "cla", "two_phases" ), "Only support segmentation and classification currently." assert args.mode in ("deep", "active", "fed", "lefal", "tefal"), "Mode not supported."
plt.plot(epochs, val_loss_values, 'b', label='Validation loss') # ←------'b' #表示蓝色实线 plt.title('Training and validation loss') plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.show() np.random.seed(123) tf.set_random_seed(456) # arguments args, configs = parse_args() # data load is_cycled = configs['translation']['is_cycled'] feats_dict = load_search_data() print("FORMING SEQ2SEQ MODEL...") features = args.feature # e.g. ['a', 't'] assert len(features) == 2, 'Wrong number of features' # print("PREP FOR TRAINING...") # filename = '_'.join(args.feature) + "_attention_seq2seq_" + \ # str("bi_directional" if configs['translation']['is_bidirectional'] # else '') + \ # "_bimodal.h5"
def main(): args = parse_args() num_rounds = args.num_rounds eval_every = args.eval_every clients_per_round = args.clients_per_round ctx = mx.gpu(args.ctx) if args.ctx >= 0 else mx.cpu() log_dir = os.path.join(args.log_dir, args.dataset, str(args.log_rank)) os.makedirs(log_dir, exist_ok=True) log_fn = "output.%i" % args.log_rank log_file = os.path.join(log_dir, log_fn) log_fp = open(log_file, "w+") # Set the random seed, affects client sampling and batching random.seed(1 + args.seed) np.random.seed(12 + args.seed) mx.random.seed(123 + args.seed) # Import the client model client_path = "%s/client_model.py" % args.dataset if not os.path.exists(client_path): print("Please specify a valid dataset.", file=log_fp, flush=True) return client_path = "%s.client_model" % args.dataset mod = importlib.import_module(client_path) ClientModel = getattr(mod, "ClientModel") # Learning rate, num_classes, and so on param_key = "%s.%s" % (args.dataset, args.model) model_params = MODEL_PARAMS[param_key] if args.lr != -1: model_params_list = list(model_params) model_params_list[0] = args.lr model_params = tuple(model_params_list) num_classes = model_params[1] # Create the shared client model client_model = ClientModel(args.seed, args.dataset, args.model, ctx, *model_params) # Create server server = Server(client_model, args.dataset, args.model, num_classes, ctx) # Create clients clients = setup_clients(client_model, args) _ = server.get_clients_info(clients) client_ids, client_groups, client_num_samples = _ print("Total number of clients: %d" % len(clients), file=log_fp, flush=True) # Display initial status print("--- Random Initialization ---", file=log_fp, flush=True) stat_writer_fn = get_stat_writer_function(client_ids, client_groups, client_num_samples, args) sys_writer_fn = get_sys_writer_function(args) print_stats(0, server, clients, client_num_samples, stat_writer_fn, args.use_val_set, log_fp) # Training simulation for r in range(1, num_rounds + 1): print("--- Round %d of %d: Training %d clients ---" % (r, num_rounds, clients_per_round), file=log_fp, flush=True) # Select clients server.select_clients(r, online(clients), clients_per_round) _ = server.get_clients_info(server.selected_clients) c_ids, c_groups, c_num_samples = _ # Simulate server model training on selected clients' data sys_metrics = server.train_model(r, args.num_epochs, args.batch_size) sys_writer_fn(r, c_ids, sys_metrics, c_groups, c_num_samples) # Update server model server.update_model() # Test model if r % eval_every == 0 or r == num_rounds: print_stats(r, server, clients, client_num_samples, stat_writer_fn, args.use_val_set, log_fp) # Save the top server model server.save_model(log_dir) log_fp.close()