def learn_query_plan(self): h_s = {} h_T = {} e_V = {} candidates = {} for ts in self.hypothesis.G: # print "Searching best path for", ts g = self.hypothesis.G[ts] h_s[ts] = Search(g).final_plan print "Best path for ts", ts, "is", h_s[ts].path, "with cost", h_s[ ts].cost for fold in range(1, 1 + self.K): (G_t, G_v) = partition_data(self.hypothesis.G, fold, self.K) h_T[fold] = get_min_error_path(G_t, h_s) print "For fold", fold, "best plan", h_T[fold].path error_fold = 0 for ts in G_v: path1 = h_s[ts] path2 = QueryPlan(map_input_graph(G_v[ts]), h_T[fold].path) error_fold += (path2.cost - path1.cost) * (path2.cost - path1.cost) e_V[fold] = math.sqrt(error_fold) candidates[fold] = (h_T[fold], e_V[fold]) self.final_plan = min_error(candidates.values())
def get_moon_indices(dataset='CIFAR10', parties = 10, concentration = 0.5, seed=0): with util.add_path('../master_moon'): import utils as moon_utils np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) random.seed(seed) res = moon_utils.partition_data(dataset.lower(), "data", None, "noniid", parties, beta=concentration) labels = res[1] classes = np.unique(labels) idx_map = res[4] idxs = [np.array(idx_map[i]) for i in idx_map] counts = np.zeros((parties, len(classes)), dtype=int) for i,idx in enumerate(idxs): counts[i] = np.bincount(labels[idx], minlength=10) dists = (np.array([(counts[i] / s) for i,s in enumerate(counts.sum(axis=1))])) return idxs, counts, dists
logger.info("Set TF configuration for {} gpus".format(K.tensorflow_backend._get_available_gpus())) logger.info('Begin training active learning policy..') # load random initialised policy policy = getPolicy(k_num, state_dim) policy.save(policyname) # Memory (two lists) to store states and actions states = [] actions = [] for tau in range(0, args.episodes): # partition data logger.info(" * Start episode {}".format(str(tau))) logger.info("[Ep {}] Split data to train, validation and unlabeled".format(str(tau))) x_la, y_la, x_un, y_un = utils.partition_data(data, labels, args.label_data_size, shuffle=True) # Split initial train, validation set x_trn, y_trn, x_val, y_val = utils.partition_data(x_la, y_la, args.initial_training_size, shuffle=True) x_pool = list(x_un) y_pool = list(y_un) logger.info("[Episode {}] Load Policy from path {}".format(str(tau), policyname)) policy = load_model(policyname) # Initilize classifier model = getConv2DClassifier(input_shape=(28, 28, 1), num_classes=NUM_CLASSES, learning_rate=args.classifier_learning_rate, embedding_size=EMBEDDING_SIZE,
dataset = pickle.load(handle) np.random.seed(10) n_Clus = 4 dim = len(dataset['data_x'][0]) * n_Clus l = int(dim) Ptp01, Ptp05 = [], [] S_x = int(l*0.7) # 610 group_member = 40 # 2,5,10,16,20,40 W = np.random.random([dim, l]) U, S, V = np.linalg.svd(W) W = U[:, :l] param = {'encode': 'soft', 'normalize': 3, 'n_Clus': n_Clus} # Assign data to groups data = np.array(dataset['data_x']) data = zscore(np.array(dataset['data_x']), axis=0) # N*d groups = partition_data(data, group_member, partitioning='random') # Compute group representations group_vec, VLAD_Codebook = vlad_group_representation(data, groups, param) group_vec = np.array(group_vec).T group_vec = hashing(group_vec, W, S_x) # The embedding for H0 queries n_q0 = len(dataset['H0_id']) H0_data = zscore(np.array(dataset['H0_x']).T, axis=1) # LFW # H0_data = zscore(np.array(dataset['H0_x']), axis=0) # CFP H0_data = [VLADEncoding(np.expand_dims(H0_data[i, :], axis=0), VLAD_Codebook, encode=param['encode'], normalize=param['normalize']) for i in range(n_q0)] Q0 = np.array(H0_data).T Q0 = hashing(np.array(H0_data).T, W, S_x) H0_claimed_id = np.random.randint(0, len(groups['ind']), size=n_q0).astype(np.int) D00 = np.linalg.norm(Q0 - group_vec[:, H0_claimed_id], axis=0) # The embedding for H1 queries
def main_func(args): use_cuda = not args.no_cuda and torch.cuda.is_available() kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # device = torch.device(args.device if use_cuda else "cpu") device = 'cuda' if use_cuda else 'cpu' """ # hack to make stuff work on GD's machines if torch.cuda.device_count() > 2: device = 'cuda:4' if use_cuda else 'cpu' #device = 'cuda:2' if use_cuda else 'cpu' #device = 'cuda' if use_cuda else 'cpu' else: device = 'cuda' if use_cuda else 'cpu' """ logger.info("Running Attack of the tails with args: {}".format(args)) logger.info(device) logger.info('==> Building model..') torch.manual_seed(args.seed) criterion = nn.CrossEntropyLoss() # add random seed for the experiment for reproducibility seed_experiment(seed=args.rand_seed) import copy # the hyper-params are inspired by the paper "Can you really backdoor FL?" (https://arxiv.org/pdf/1911.07963.pdf) partition_strategy = "h**o" # partition_strategy = "hetero-dir" net_dataidx_map = partition_data(args.dataset, './data', partition_strategy, args.num_nets, 0.5, args) # rounds of fl to conduct # some hyper-params here: local_training_period = args.local_train_period # 5 #1 adversarial_local_training_period = 5 # load poisoned dataset: poisoned_train_loader, vanilla_test_loader, targetted_task_test_loader, \ num_dps_poisoned_dataset, clean_train_loader = load_poisoned_dataset(args=args) # READ_CKPT = False if READ_CKPT: if args.model == "lenet": net_avg = Net(num_classes=10).to(device) with open("./checkpoint/emnist_lenet_10epoch.pt", "rb") as ckpt_file: ckpt_state_dict = torch.load(ckpt_file, map_location=device) elif args.model in ("vgg9", "vgg11", "vgg13", "vgg16"): net_avg = get_vgg_model(args.model).to(device) # net_avg = VGG(args.model.upper()).to(device) # load model here # with open("./checkpoint/trained_checkpoint_vanilla.pt", "rb") as ckpt_file: with open( "./checkpoint/Cifar10_{}_10epoch.pt".format( args.model.upper()), "rb") as ckpt_file: ckpt_state_dict = torch.load(ckpt_file, map_location=device) net_avg.load_state_dict(ckpt_state_dict) logger.info("Loading checkpoint file successfully ...") else: if args.model == "lenet": net_avg = Net(num_classes=10).to(device) elif args.model in ("vgg9", "vgg11", "vgg13", "vgg16"): net_avg = get_vgg_model(args.model).to(device) logger.info( "Test the model performance on the entire task before FL process ... ") test(net_avg, device, vanilla_test_loader, test_batch_size=args.test_batch_size, criterion=criterion, mode="raw-task", dataset=args.dataset) test(net_avg, device, targetted_task_test_loader, test_batch_size=args.test_batch_size, criterion=criterion, mode="targetted-task", dataset=args.dataset, poison_type=args.poison_type) # let's remain a copy of the global model for measuring the norm distance: vanilla_model = copy.deepcopy(net_avg) if args.fl_mode == "fixed-freq": arguments = { # "poisoned_emnist_dataset":poisoned_emnist_dataset, "vanilla_model": vanilla_model, "net_avg": net_avg, "net_dataidx_map": net_dataidx_map, "num_nets": args.num_nets, "dataset": args.dataset, "model": args.model, "part_nets_per_round": args.part_nets_per_round, "fl_round": args.fl_round, "local_training_period": args.local_train_period, # 5 #1 "adversarial_local_training_period": args.adversarial_local_training_period, "args_lr": args.lr, "args_gamma": args.gamma, "attacking_fl_rounds": [i for i in range(1, args.fl_round + 1) if (i - 1) % 10 == 0], "num_dps_poisoned_dataset": num_dps_poisoned_dataset, "poisoned_emnist_train_loader": poisoned_train_loader, # XXX: why is poisoned_train_loader assigned to poisoned_emnist.., it can be for another dataset as well. "clean_train_loader": clean_train_loader, "vanilla_emnist_test_loader": vanilla_test_loader, # XXX: why is vanilla loader assigned to vanilla_emnist.., it can be for another dataset as well. "targetted_task_test_loader": targetted_task_test_loader, "batch_size": args.batch_size, "test_batch_size": args.test_batch_size, "log_interval": args.log_interval, "defense_technique": args.defense_method, "attack_method": args.attack_method, "eps": args.eps, "norm_bound": args.norm_bound, "poison_type": args.poison_type, "device": device, "model_replacement": args.model_replacement, "project_frequency": args.project_frequency, "adv_lr": args.adv_lr, "prox_attack": args.prox_attack, "attack_case": args.attack_case, "stddev": args.stddev, "attacker_pool_size": args.attacker_pool_size } frequency_fl_trainer = FrequencyFederatedLearningTrainer( arguments=arguments) frequency_fl_trainer.run_modified() elif args.fl_mode == "fixed-pool": arguments = { # "poisoned_emnist_dataset":poisoned_emnist_dataset, "vanilla_model": vanilla_model, "net_avg": net_avg, "net_dataidx_map": net_dataidx_map, "num_nets": args.num_nets, "dataset": args.dataset, "model": args.model, "part_nets_per_round": args.part_nets_per_round, "attacker_pool_size": args.attacker_pool_size, "fl_round": args.fl_round, "local_training_period": args.local_train_period, "adversarial_local_training_period": args.adversarial_local_training_period, "args_lr": args.lr, "args_gamma": args.gamma, "num_dps_poisoned_dataset": num_dps_poisoned_dataset, "poisoned_emnist_train_loader": poisoned_train_loader, "clean_train_loader": clean_train_loader, "vanilla_emnist_test_loader": vanilla_test_loader, "targetted_task_test_loader": targetted_task_test_loader, "batch_size": args.batch_size, "test_batch_size": args.test_batch_size, "log_interval": args.log_interval, "defense_technique": args.defense_method, "attack_method": args.attack_method, "eps": args.eps, "norm_bound": args.norm_bound, "poison_type": args.poison_type, "device": device, "model_replacement": args.model_replacement, "project_frequency": args.project_frequency, "adv_lr": args.adv_lr, "prox_attack": args.prox_attack, "attack_case": args.attack_case, "stddev": args.stddev } fixed_pool_fl_trainer = FixedPoolFederatedLearningTrainer( arguments=arguments) return fixed_pool_fl_trainer.run()
classifiername = "{}/{}_classifier.h5".format(args.output, DATASET_NAME) logger.info("Train classifier on dataset {}".format(DATASET_NAME)) logger.info(" * INPUT directory: {}".format(TEXT_DATA_DIR)) logger.info(" * OUTPUT classfier {}".format(classifiername)) # first, build index mapping words in the embeddings set # to their embedding vector embeddings_index = utils.load_embeddings(GLOVE_DIR) # second, prepare text samples and their labels data, labels, word_index = utils.load_data(TEXT_DATA_DIR, MAX_NB_WORDS, MAX_SEQUENCE_LENGTH) # test_data, test_labels, _ = utils.load_data(TEST_DIR, MAX_NB_WORDS, MAX_SEQUENCE_LENGTH) # data set for inisialize the model test_data, test_labels, train_data, train_labels = utils.partition_data( data, labels, 100, shuffle=True) dev_data, dev_labels, train_data, train_labels = utils.partition_data( train_data, train_labels, 5, shuffle=True) logger.info("Dataset size: train = {}, test = {}, dev = {}".format( len(train_data), len(test_data), len(dev_data))) embedding_matrix, num_words = utils.construct_embedding_table( embeddings_index, word_index, MAX_NB_WORDS, EMBEDDING_DIM) logger.info('Set TF configuration') config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = "0" config.log_device_placement = False set_session(tf.Session(config=config)) logger.info('Begin train classifier..')