def get_loaders(self, db_name, encoders, batch_size, num_workers): db_info = get_db_info(db_name) max_nodes_per_graph = None _ = get_db_container(db_name) train_data, val_data, test_data = get_train_val_test_datasets( dataset_name=db_name, train_test_split='use_full_train', encoders=encoders) train_loader = get_dataloader(dataset=train_data, batch_size=batch_size, sampler_class_name='SequentialSampler', num_workers=num_workers, max_nodes_per_graph=max_nodes_per_graph) val_loader = get_dataloader(dataset=val_data, batch_size=batch_size, sampler_class_name='SequentialSampler', num_workers=num_workers, max_nodes_per_graph=max_nodes_per_graph) test_loader = get_dataloader(dataset=test_data, batch_size=batch_size, sampler_class_name='SequentialSampler', num_workers=num_workers, max_nodes_per_graph=max_nodes_per_graph) loaders = { 'train': train_loader, 'val': val_loader, 'test': test_loader } return db_info, loaders
def train(params): model = FCNwithGloRe(params) trainer = Trainer(params) image_size = params["common"]["image_size"][0] train_data_path = params["common"]["train_data_path"] val_data_path = params["common"]["val_data_path"] train_batch_size = params["common"]["train_batch_size"] val_batch_size = params["common"]["val_batch_size"] num_class = params["common"]["num_class"] train_dataloader = get_dataloader(train_data_path, train_batch_size, num_class, image_size, is_train=True) val_dataloder = get_dataloader(val_data_path, val_batch_size, num_class, is_train=False) dt_now = datetime.datetime.now() result_dir = f"./result/{dt_now.year}{dt_now.month:0>2}{dt_now.day:0>2}-{dt_now.hour:0>2}{dt_now.minute:0>2}/" os.makedirs(result_dir, exist_ok=True) with open(f"{result_dir}/params.yaml", "w") as f: f.write(yaml.dump(params, default_flow_style=False)) trainer.train(model, result_dir, train_dataloader=train_dataloader, val_dataloader=val_dataloder)
def run_DA_model(source_name, target_name, args): """Function for running experiments using the domain adaptation model.""" # Get the DataLoaders train_loader = get_dataloader([source_name, target_name], True, args) test_source_loader = get_dataloader(source_name, False, args) test_target_loader = get_dataloader(target_name, False, args) # Train the model results_df = train_DA(train_loader, test_source_loader, test_target_loader, args) save_name = f"{source_name}-{target_name}_epoch={args.epochs}_lr={args.lr}_lambda={args.lambd}" save_results('da', save_name, results_df)
def run_LWF_model(source_name, target_name, args): """Function for running experiments using the learning without forgetting model.""" # Get the DataLoaders train_source_loader = get_dataloader(source_name, True, args) train_target_loader = get_dataloader(target_name, True, args) test_source_loader = get_dataloader(source_name, False, args) test_target_loader = get_dataloader(target_name, False, args) # Train the model results_df = train_LWF(train_source_loader, train_target_loader, test_target_loader, test_source_loader, args) save_name = f"{source_name}-{target_name}_epoch={args.epochs}_lr={args.lr}_lambda={args.lambd}" save_results('lwf', save_name, results_df)
def main(args, **model_kwargs): device = torch.device(args.device) args.device = device if args.dataset == 'abilene_tm': args.nNodes = 12 args.day_size = 288 elif args.dataset == 'geant_tm': args.nNodes = 22 args.day_size = 96 elif args.dataset == 'brain_tm': args.nNodes = 9 args.day_size = 1440 elif 'sinet' in args.dataset: args.nNodes = 74 args.day_size = 288 else: raise ValueError('Dataset not found!') train_loader, val_loader, test_loader, graphs = utils.get_dataloader(args) X = test_loader.dataset.X.cpu().numpy() lamda_ratios = [] for i in range(0, X.shape[0], args.seq_len_y): max_max = np.max(X[i:i + args.seq_len_y]) sum_max = np.sum(np.max(X[i:i + args.seq_len_y], axis=0)) if max_max != 0: _r = sum_max / max_max else: _r = 0 lamda_ratios.append(_r) print(i, ': ', _r) lamda_ratios = np.array(lamda_ratios)
def gen_adv_examples(model, attack, arguments, total=TOTAL): model.eval() fb_model = foolbox.models.PyTorchModel(model, (-1,1), 10, cuda=arguments['--cuda']) attack_instance = attack(fb_model) # Lousy programmer retrieve dataset exp_name = arguments['<exp_name>'] dataset_id = get_dataset_id(exp_name) _, valloader, _, _ = utils.get_dataloader(dataset_id, 1) ad_labels = [] true_labels = [] adv_examples = [] for data, label in valloader: if len(adv_examples) == total: break # import pdb; pdb.set_trace() label = label.type(torch.LongTensor) adversarial = attack_instance(data.numpy()[0], label=label.numpy()[0]) if adversarial is not None: adv_examples.append(adversarial) adv_ex = Variable(torch.Tensor(adversarial)) if arguments['--cuda']: adv_ex = adv_ex.cuda() ad_label = model(adv_ex) ad_labels.append(ad_label.data.cpu().numpy()) true_labels.append(label.numpy()) print("Adv Fail Rate: {}".format(np.mean(np.array(ad_labels) == np.array(true_labels)))) return np.array(adv_examples), np.array(ad_labels), np.array(true_labels)
def train(self, model_save_path, data=None): """ questions = ['Q1', 'Q2', 'Q3', ....] labels = [1,2,3,1,4,8,9,10] generated_ques = {'Q1' : ['GQ1-1', 'GQ1-2', ...] , 'Q2' : ['GQ2-1', ...]} bs : 32 n : 4 model_save_path : './models/model_first' data --> a dict {'question_to_label' : mapping from question to label, 'bs': batch_size for training 'n' : num_of_classes to sample in a batch (bs%n ==0), } model_save_path = path of folder to save the model This function will fit you data using a batch hard triplet loss, and save the model to the folder specified the folder should be empty or not created in the beginning!!! if data is NONE we will just use the presaved data """ if (data is None): data = { 'question_to_label': self.question_to_label, "bs": 32, "n": 4 } data['model'] = self.model train_dataloader = get_dataloader(**data) train_loss = losses.BatchHardTripletLoss(sentence_embedder=self.model) self.model.fit( train_objectives=[(train_dataloader, train_loss)], epochs=1, evaluator=None, output_path=model_save_path, )
def predict(model, eval_dataset, args): """ Get predicted scores (un-normalized) for examples in dataset. Args: - model: BertModelForLangID - eval_dataset: BertDatasetForTesting - args Returns: predicted scores, tensor of shape (nb examples, nb classes) """ assert type(eval_dataset) == BertDatasetForTesting dataloader = get_dataloader(eval_dataset, args.eval_batch_size, args.local_rank) scores = [] model.eval() for step, batch in enumerate(tqdm(dataloader, desc="Prediction")): # Unpack batch batch = tuple(t.to(args.device) for t in batch) input_ids = batch[0] input_mask = batch[1] segment_ids = batch[2] with torch.no_grad(): lid_scores = model(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids) scores.append(lid_scores) scores_tensor = torch.cat(scores, dim=0) return scores_tensor
def main(): args = update_parser_args(task="downstream") config = get_config(args) downstream_dataset = CustomDataset(config.data) train_ds, test_ds = downstream_dataset.get_dataset(config.dataset_name) num_classes = len(train_ds.classes) if args.frac: #use fraction of train labels samples = list(range(0, len(train_ds))) random.shuffle(samples) samples = samples[:int( args.frac * len(train_ds))] #might want to rethink when classes are imbalanced train_ds = torch.utils.data.Subset( train_ds, samples ) #https://stackoverflow.com/a/58703467/5536853 https://discuss.pytorch.org/t/how-to-get-a-part-of-datasets/82161 train_loader, test_loader = get_dataloader(args, train_ds, test_ds) model = Downstream_NN(args, config, num_classes=num_classes) optimizer = eval("torch.optim." + args.opt)(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=len(train_loader), eta_min=0, last_epoch=-1) with torch.cuda.device(args.gpu_index): downstream_task = Downstream_Eval(kind=args.downstream, model=model, optimizer=optimizer, scheduler=scheduler, args=args, config=config) downstream_task.train(train_loader, test_loader)
def train(p, dataset, model, criterion, optimizer): loader = get_dataloader(p, dataset) model.train() epoch_loss = 0 for step, (images, labels) in enumerate(loader): torch.cuda.empty_cache() optimizer.zero_grad() images = images.to(DEVICE) labels = labels.to(DEVICE) with torch.cuda.amp.autocast(): embds = model(images) loss = criterion(labels, embds, margin=0.8) epoch_loss += loss.item() # loss.backward() scaler.scale(loss).backward() # optimizer.step() scaler.step(optimizer) scaler.update() # print(f"Loss: {loss.item():.3f}, ", end=" ") return epoch_loss / len(loader)
def main(): # test(fc_model, test_loader) batch_size = 1 # plese note: For deep fool batch_size MUST be 1 train_loader, test_loader = get_dataloader(batch_size) fc_model = FC_model() # Base model, used for classification, also used for crafting adversarial samples defender = AE() # Defender: input goes through this before going to the base model load_model(fc_model, './pretrained_models/fc_model.pth') load_model(defender, './pretrained_models/autoencoder_pretrained.pth') fc_model.to(device) defender.to(device) criterion = nn.CrossEntropyLoss() # craft adversarial examples for epsilon value in [0,1] at step size of 0.05 ''' acc_list= [] for i in range(21): acc_list.append(adv_attack(fc_model, defender, test_loader, criterion, i*0.05)) print(acc_list) ''' # defender = None # FGSM attack adv_attack(fc_model, defender, test_loader, criterion, attack_type="fgsm") # deep fool attack adv_attack(fc_model, defender, test_loader, criterion, attack_type="deepfool") # universal attack adv_attack(fc_model, defender, test_loader, criterion, attack_type="universal")
def main(): train_loader, test_loader = get_dataloader(256) # Train the base model and save the parameters train_base_model(fc_model, train_loader, test_loader, save_path='./pretrained_models/fc_model.pth')
def attack(): dataloader = get_dataloader(args) target_cls = network_initialization(args) attack_module = globals()[args.attack_name.lower()] attack_func = getattr(attack_module, args.attack_name) attacker = attack_func(target_cls, args) save_path = os.path.join("Adv_examples", args.dataset.lower()) attacker.inference(data_loader=dataloader, save_path=save_path, file_name=args.attack_name + ".pt")
def main(): batch_size = 200 train_loader, test_loader = get_dataloader(batch_size) model = FC_model() load_model(model=model, model_path='./pretrained_models/fc_model.pth') criterion = nn.CrossEntropyLoss() model.to(device) createDatasetForAE(model, train_loader, test_loader, criterion)
def main(args, **model_kwargs): device = torch.device(args.device) args.device = device if args.dataset == 'abilene_tm': args.nNodes = 12 args.day_size = 288 elif args.dataset == 'geant_tm': args.nNodes = 22 args.day_size = 96 elif args.dataset == 'brain_tm': args.nNodes = 9 elif 'sinet' in args.dataset: args.nNodes = 73 args.day_size = 288 else: raise ValueError('Dataset not found!') test_loader = utils.get_dataloader(args) args.test_size, args.nSeries = test_loader.dataset.gt_data_set.shape in_dim = 1 args.in_dim = in_dim model = models.get_model(args) logger = utils.Logger(args) engine = utils.Trainer.from_args(model, test_loader.dataset.scaler, args) utils.print_args(args) if not args.test: test_met_df, x_gt, y_gt, y_real, yhat = engine.test( test_loader, engine.model, args.out_seq_len) test_met_df.round(6).to_csv( os.path.join(logger.log_dir, 'test_metrics.csv')) print('Prediction Accuracy:') print(utils.summary(logger.log_dir)) np.save(os.path.join(logger.log_dir, 'x_gt'), x_gt) np.save(os.path.join(logger.log_dir, 'y_gt'), y_gt) np.save(os.path.join(logger.log_dir, 'y_real'), y_real) np.save(os.path.join(logger.log_dir, 'yhat'), yhat) else: x_gt = np.load(os.path.join(logger.log_dir, 'x_gt.npy')) y_gt = np.load(os.path.join(logger.log_dir, 'y_gt.npy')) y_real = np.load(os.path.join(logger.log_dir, 'y_real.npy')) yhat = np.load(os.path.join(logger.log_dir, 'yhat.npy')) if args.plot: logger.plot(x_gt, y_real, yhat) # run TE if args.run_te: run_te(x_gt, y_gt, yhat, args)
def generate_feature_vectors(model: nn.Module, train=True): dataloader = utils.get_dataloader(args.data_path, IMG_SIZE, args.batch_size, train) feature_vectors, labels = [], [] for i, data in enumerate(dataloader): images, label = data feature_vector = model(images.to(device)) if args.generate_from == 'd': feature_vector = feature_vector[1] feature_vectors.append(feature_vector.detach().cpu().numpy()) labels.append(label.detach().cpu().numpy()) return np.vstack(feature_vectors), np.hstack(labels)
def test_memorize_minibatch(self): for db_name in self.db_names: db_info = get_db_info(db_name) train_data, val_data, _ = get_train_val_test_datasets( dataset_name=db_name, train_test_split='use_full_train', encoders=dict(CATEGORICAL='CategoricalOrdinalEnc', SCALAR='ScalarRobustScalerEnc', DATETIME='DatetimeScalarEnc', LATLONG='LatLongScalarEnc', TEXT='TextSummaryScalarEnc'), ) train_loader = get_dataloader( dataset=train_data, batch_size=256, sampler_class_name='SequentialSampler', num_workers=0, max_nodes_per_graph=False) writer = DummyWriter() model = GCN(writer, db_info=db_info, hidden_dim=256, n_init_layers=3, activation_class_name='SELU', activation_class_kwargs={}, loss_class_kwargs={}, loss_class_name='CrossEntropyLoss', p_dropout=0.0, drop_whole_embeddings=True, n_layers=3, readout_class_name='AvgPooling', readout_kwargs={}) if torch.cuda.is_available(): model.cuda() model.device = torch.device('cuda:0') else: model.device = torch.device('cpu') model.train() optimizer = AdamW(model.parameters(), lr=0.001, weight_decay=0.0) bdgl, features, label = next(iter(train_loader)) recursive_to((bdgl, features, label), model.device) for _ in tqdm(range(200)): optimizer.zero_grad() output = model(bdgl, features) loss = model.loss_fxn(output, label) if loss < 1e-4: break loss.backward() optimizer.step() else: tqdm.write(f'Loss: {loss}') self.fail("Didn't memorize minibatch")
def setUp(self): self.db_info = get_db_info(self.db_name) batch_size = 1 num_workers = 0 max_nodes_per_graph = 100000 _ = get_db_container(self.db_name) train_data, val_data, test_data = get_train_val_test_datasets( dataset_name=self.db_name, train_test_split='use_full_train', encoders=dict(CATEGORICAL='CategoricalOrdinalEnc', SCALAR='ScalarRobustScalerEnc', DATETIME='DatetimeScalarEnc', LATLONG='LatLongScalarEnc', TEXT='TextSummaryScalarEnc'), ) train_loader = get_dataloader( dataset=train_data, batch_size=batch_size, sampler_class_name='SequentialSampler', num_workers=num_workers, max_nodes_per_graph=max_nodes_per_graph) val_loader = get_dataloader( dataset=val_data, batch_size=batch_size, sampler_class_name='SequentialSampler', num_workers=num_workers, max_nodes_per_graph=max_nodes_per_graph) test_loader = get_dataloader( dataset=test_data, batch_size=batch_size, sampler_class_name='SequentialSampler', num_workers=num_workers, max_nodes_per_graph=max_nodes_per_graph) self.loaders = { 'train': train_loader, 'val': val_loader, 'test': test_loader }
def run_adv_detection(adv_examples, samples=None, f_pred=utils.posterior_uncertainty, f_acq='f_identity', **kwargs): model = kwargs['model'] if samples is not None: model.posterior_samples = utils.prepare_torch_dicts(samples, model) model.posterior_weights = [1 for _ in range(len(model.posterior_samples))] _, _, testloader, _ = utils.get_dataloader('mnist-40000', 200) test_inputs = trim_dataloader(testloader, None, adv_examples.shape[0]) normality_base_rate, auroc, n_aupr, ab_aupr = utils.show_ood_detection_results_softmax(test_inputs, adv_examples, f_pred, kwargs, f_acq) print( "(Anomaly Detection Results: \nBase Rate: {:.2f}, AUROC: {:.2f}, AUPR+: {:.2f}, AUPR-: {:.2f}".format( normality_base_rate, auroc, n_aupr, ab_aupr)) return normality_base_rate, auroc, n_aupr, ab_aupr
def main(args): # test dataloader test_dataloader = utils.get_dataloader(args.test_data, args.batch_size) test_size = len(test_dataloader.dataset) # device device = torch.device(args.device) # initialize model ckpt = torch.load(args.ckpt) model_args = ckpt['model_args'] model_state = ckpt['model_state'] model = Model(**model_args) model.load_state_dict(model_state) # loss function loss_function = nn.MSELoss() # train model model.eval() model = model.to(device) # test with tqdm.tqdm(total=test_size, unit=f" [TEST] itr") as test_progress_bar: test_loss = [] for i, (x_test, y_test) in enumerate(test_dataloader): with torch.no_grad(): # send data and label to testice x = torch.Tensor(x_test).to(device) y = torch.Tensor(y_test).to(device) # predict pred = model.forward(x) # calculate loss test_loss.append(loss_function(pred, y).item()) test_progress_bar.update(len(x)) # log on tensorboard test_loss_avg = sum(test_loss) / len(test_loss) print(f"Average Test Loss: {test_loss_avg}")
def icth(self): repeat_times = self.args.repeat_times seeds = range(self.args.seed, self.args.seed+repeat_times) for seed in seeds: seed_everything(seed) self.model = get_model(self.args) self.optimizer = self.args.optimizer( self.model.parameters(), lr=self.args.learning_rate) self.train_dataloader = get_dataloader(self.args, train=True) self.model = self.model.to(DEVICE) for epochs in range(1, self.args.epochs+1): self.train_an_epoch() with torch.no_grad(): self.test() self.model = self.model.to('cpu') torch.cuda.empty_cache()
def main(): args = update_parser_args_linear_eval() config = get_config(args) LE_dataset = CustomDataset(config.data) train_ds, test_ds = LE_dataset.get_dataset(config.dataset_name) train_loader, test_loader = get_dataloader(args, train_ds, test_ds) model = get_fine_tune_model(args, config) optimizer = eval("torch.optim." + args.opt)(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=len(train_loader), eta_min=0, last_epoch=-1) with torch.cuda.device(args.gpu_index): fine_tune = Downstream_Eval(kind="fine_tune", model=model, optimizer=optimizer, scheduler=scheduler, args=args, config=config) fine_tune.train(train_loader, test_loader)
def uncertainty_hist(model, adv_ex, samples): model.posterior_samples = utils.prepare_torch_dicts(samples, model) model.posterior_weights = [1 for _ in range(len(model.posterior_samples))] adv_uncert = utils.posterior_uncertainty(model, adv_ex).data.cpu().numpy() _, _, testloader, _ = utils.get_dataloader('mnist-40000', 200) test_uncert = [] for data, label in testloader: data = Variable(data) if arguments['--cuda']: data = data.cuda() test_uncert.append(utils.posterior_uncertainty(model, data).data.cpu().numpy()) test_uncert = np.concatenate(test_uncert, 0) import matplotlib.pyplot as plt test_density, test_edges = np.histogram(test_uncert, 50, range=(0.0, 0.05)) test_bin = [(test_edges[i] + test_edges[i+1]) / 2.0 for i in range(len(test_edges) - 1)] adv_density, adv_edges = np.histogram(adv_uncert, 50, range=(0.0, 0.05)) adv_bin = [(adv_edges[i] + adv_edges[i+1]) / 2.0 for i in range(len(adv_edges) - 1)] fig = plt.gcf() fig.set_size_inches(12, 6) l1, = plt.plot(test_bin, test_density, label='Test Set', lw=4) l2, = plt.plot(adv_bin, adv_density, label='Adversarial', lw=4) plt.legend(handles=[l1, l2]) plt.tick_params( axis='y', # changes apply to the y-axis which='both', # both major and minor ticks are affected left='off', # ticks along the left edge are off right='off', # ticks along the right edge are off labelleft='off') # labels along the left edge are off plt.title('Uncertainty Histogram of Test Data vs. Adv. Examples', size=16) plt.xlabel('Uncertainty', size=14) plt.ylabel('Frequency', size=14) plt.ylim(0.0, 100) plt.tight_layout() plt.show()
def _write_reals_and_fakes(parent_dir, gen, params, num=1000, device=None): dataloader = get_dataloader(params, normalize=False) real_dir = f"{parent_dir}/reals" fake_dir = f"{parent_dir}/fakes" # Delete the directory if it already exists if os.path.exists(parent_dir): shutil.rmtree(parent_dir) # Create all the needed directories if not os.path.exists(parent_dir): os.mkdir(parent_dir) if not os.path.exists(real_dir): os.mkdir(real_dir) if not os.path.exists(fake_dir): os.mkdir(fake_dir) # Write the reals to the directory (capped at `num`) print(f"Writing center-cropped reals to {real_dir}...") total = 0 for data in tqdm(dataloader, total=int(num / params["bsize"])): for img in data[0]: if total >= num: break save_image(img, f"{real_dir}/{total}.png") total += 1 if total >= num: break num = min(num, total) # Write the fakes to `parent_dir` print(f"Writing generated fakes to {fake_dir}...") for index in tqdm(range(num)): z = torch.randn(1, params["nz"], 1, 1, device=device) fake = gen(z)[0] save_image(fake, f"{fake_dir}/{index}.png") return real_dir, fake_dir
def dump_activations(ds_name, train_kwargs, train_data, encoders, results_dir, model, module_acts_to_dump, num_workers): # We dump activations for every datapoint here, even ones that weren't in model's train, val, or test train_dp_ids, test_dp_ids = get_train_test_dp_ids(ds_name) dp_ids = np.concatenate([train_dp_ids, test_dp_ids ]) if test_dp_ids is not None else train_dp_ids if ds_name in [ 'acquirevaluedshopperschallenge', 'homecreditdefaultrisk', 'kddcup2014' ]: dataset = DatabaseDataset(ds_name, dp_ids, encoders) else: dataset = TabularDataset(ds_name, dp_ids, encoders) dataset.encode(train_data.feature_encoders) loader = get_dataloader( dataset=dataset, batch_size=train_kwargs['batch_size'], sampler_class_name='SequentialSampler', num_workers=num_workers, max_nodes_per_graph=train_kwargs['max_nodes_per_graph']) model.eval() acts = [] def save_acts(module, input, output): acts.append(input[0].detach().cpu().numpy()) module = eval(f'model.{module_acts_to_dump}') module.register_forward_hook(save_acts) with torch.autograd.no_grad(): for batch_idx, (input, label) in enumerate(tqdm(loader)): recursive_to((input, label), model.device) model(input) acts = np.concatenate(acts, axis=0) np.save(os.path.join(results_dir, f'{module_acts_to_dump}.activations'), acts) return acts
def test(num_classes): net = torch.load('./net_params.pkl').eval() loss_func = torch.nn.CrossEntropyLoss() test_cf = meter.ConfusionMeter(num_classes) test_av = meter.AverageValueMeter() test_cf.reset() test_av.reset() with torch.no_grad(): for step, (x, y) in enumerate( get_dataloader(data_dir=args.data_dir, mode="test", batch_size=args.batch_size)): x, y = x.to(DEVICE), y.to(DEVICE) output = net(x) test_av.add(loss_func(output, y).item()) test_cf.add(output.cpu().detach(), y.cpu().detach()) accuracy = sum([test_cf.value()[i][i] for i in range(num_classes)]) / test_cf.value().sum() print("\ntest Loss={:.4f} Accuracy={:.4f}\n".format( test_av.value()[0], accuracy))
def main(args): path = os.path.join('saved_models', args.name) print('Starting testing') print(f'Command: {sys.argv}') for arg, value in sorted(vars(args).items()): print(f'Argument {arg}: {value}') device = torch.device( 'cuda' if torch.cuda.is_available() and not args.no_gpu else 'cpu') print(f'Using device: {device}') # load model model = models.get_model(args.model).to(device) criterion = torch.nn.CrossEntropyLoss() # dataloaders print('Loading dataloaders') testloader = utils.get_dataloader(args.directory, args.dataset, args.batch_size, args.num_workers, False) if os.path.exists(os.path.join(path, 'model.pt')): ckpt = torch.load(os.path.join(path, 'model.pt')) model.load_state_dict(ckpt['state_dict']) start_epoch = ckpt['epoch'] + 1 print(f'Loading pre-trained model from epoch {start_epoch}') else: sys.exit('Saved model not found') # test start_time = time.time() test_acc, test_loss = utils.test(model, testloader, criterion, device) test_time = time.time() - start_time print( f'Test | Accuracy: {test_acc:.2f}%, Loss: {test_loss:.4f}, Time: {test_time:.2f}s' )
# "ngf": 64, # Size of feature maps in the generator. The depth will be multiples of this. # "ndf": 64, # Size of features maps in the discriminator. The depth will be multiples of this. "nepochs": 20, # Number of training epochs. "lr": 0.0002, # Learning rate for optimizers "beta1": 0.5, # Beta1 hyperparam for Adam optimizer "beta2": 0.999, # Beta2 hyperparam for Adam optimizer "rel_avg_gan": True, # Use a relativistic average GAN instead of a standard GAN "save_epoch": 2, } # Save step. # Use GPU is available else use CPU. device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu") print(device, " will be used.\n") # Configure dataloader dataloader = get_dataloader(params) # Plot the training images. sample_batch = next(iter(dataloader)) plt.figure(figsize=(8, 8)) plt.axis("off") plt.title("Training Images") plt.imshow( np.transpose( vutils.make_grid( sample_batch[0].to(device)[:64], padding=2, normalize=True, ).cpu(), (1, 2, 0), ) )
def fl_train(args, model, fog_graph, nodes, X_trains, y_trains, device, epoch, loss_fn='nll'): # federated learning with model averaging if loss_fn == 'nll': loss_fn_ = F.nll_loss elif loss_fn == 'hinge': loss_fn_ = multiClassHingeLoss() model.train() worker_data = {} worker_targets = {} worker_num_samples = {} worker_models = {} worker_optims = {} worker_losses = {} # send data, model to workers # setup optimizer for each worker workers = [_ for _ in nodes.keys() if 'L0' in _] for w, x, y in zip(workers, X_trains, y_trains): worker_data[w] = x.send(nodes[w]) worker_targets[w] = y.send(nodes[w]) worker_num_samples[w] = x.shape[0] for w in workers: worker_models[w] = model.copy().send(nodes[w]) node_model = worker_models[w].get() worker_optims[w] = optim.SGD(params=worker_models[w].parameters(), lr=args.lr) data = worker_data[w].get() target = worker_targets[w].get() dataloader = get_dataloader(data, target, args.batch_size) for data, target in dataloader: data, target = data.to(device), target.to(device) worker_optims[w].zero_grad() output = node_model(data) loss = loss_fn_(output, target) loss.backward() worker_optims[w].step() worker_models[w] = node_model.send(nodes[w]) worker_losses[w] = loss.item() agg = 'L1_W0' worker_models[agg] = model.copy().send(nodes[agg]) children = fog_graph[agg] for child in children: worker_models[child].move(nodes[agg]) with torch.no_grad(): weighted_models = [ get_model_weights(worker_models[_], worker_num_samples[_] / args.num_train) for _ in children ] model_sum = weighted_models[0] for m in weighted_models[1:]: model_sum = add_model_weights(model_sum, m) worker_models[agg].load_state_dict(model_sum) master = get_model_weights(worker_models[agg].get()) grad = model_gradient(model.state_dict(), master, args.lr) model.load_state_dict(master) if epoch % args.log_interval == 0: loss = np.array([_ for dump, _ in worker_losses.items()]) print('Train Epoch: {} \tLoss: {:.6f} +- {:.6f} \tGrad: {}'.format( epoch, loss.mean(), loss.std(), dict(grad).values())) return grad
def fog_train(args, model, fog_graph, nodes, X_trains, y_trains, device, epoch, loss_fn, consensus, rounds, radius, d2d, factor=10, alpha_store={}, prev_grad=0, shuffle_worker_data=False): # fog learning with model averaging if loss_fn == 'nll': loss_fn_ = F.nll_loss elif loss_fn == 'hinge': loss_fn_ = multiClassHingeLoss() log = [] log_head = [] if args.var_theta: if args.true_eps: log_head.append('est') log_head += ['div', 'true_grad'] if args.dynamic_alpha: log_head += [ 'D', 'mu', args.delta_or_psi, 'eta', 'grad', 'omega', 'N', 'L', 'phi' ] log_head += ['rounds', 'agg', 'rho', 'sig', 'cls_n'] log_head.append('rounded') log.append(log_head) model.train() worker_data = {} worker_targets = {} worker_num_samples = {} worker_models = {} worker_optims = {} worker_losses = {} # send data, model to workers # setup optimizer for each worker if shuffle_worker_data: data = list(zip(X_trains, y_trains)) shuffle(data) X_trains, y_trains = zip(*data) workers = [_ for _ in nodes.keys() if 'L0' in _] for w, x, y in zip(workers, X_trains, y_trains): worker_data[w] = x.send(nodes[w]) worker_targets[w] = y.send(nodes[w]) worker_num_samples[w] = x.shape[0] for w in workers: worker_models[w] = model.copy().send(nodes[w]) node_model = worker_models[w].get() worker_optims[w] = optim.SGD( params=node_model.parameters(), lr=args.lr * np.exp(-0.01 * epoch) if args.nesterov else args.lr, weight_decay=args.decay if loss_fn == 'hinge' else 0, ) data = worker_data[w].get() target = worker_targets[w].get() dataloader = get_dataloader(data, target, args.batch_size) for data, target in dataloader: data, target = data.to(device), target.to(device) worker_optims[w].zero_grad() output = node_model(data) loss = loss_fn_(output, target) loss.backward() worker_optims[w].step() worker_models[w] = node_model.send(nodes[w]) worker_losses[w] = loss.item() num_rounds = [] num_div = [] var_radius = type(radius) == list for l in range(1, len(args.num_clusters) + 1): aggregators = [_ for _ in nodes.keys() if 'L{}'.format(l) in _] N = len(aggregators) cluster_rounds = [] cluster_div = [] for a in aggregators: agg_log = [] worker_models[a] = model.copy().send(nodes[a]) worker_num_samples[a] = 1 children = fog_graph[a] for child in children: worker_models[child].move(nodes[a]) if consensus == 'averaging' or flip(1 - d2d): model_sum = averaging_consensus(children, worker_models, worker_num_samples) worker_models[a].load_state_dict(model_sum) elif consensus == 'laplacian': num_nodes_in_cluster = len(children) V = consensus_matrix( num_nodes_in_cluster, radius if not var_radius else radius[l - 1], factor, args.topology) eps = get_cluster_eps(children, worker_models, worker_num_samples, nodes, fog_graph) if args.true_eps: est_eps = eps agg_log.append(est_eps) eps = get_true_cluster_eps(children, worker_models, worker_num_samples, nodes, fog_graph) agg_log.append(eps) cluster_div.append(eps) if args.var_theta: Z = V - (1 / num_nodes_in_cluster) eig, dump = np.linalg.eig(Z) lamda = eig.max() true_grad = estimate_true_gradient(prev_grad, args.omega) agg_log.append(true_grad) if args.dynamic_alpha: if true_grad: phi = sum(args.num_clusters) L = len(args.num_clusters) + 1 num_params = get_num_params(model) if args.delta_or_psi == 'delta': alpha, alpha_log = get_alpha_closed_form( args, true_grad, phi, N, L, num_params, l) elif args.delta_or_psi == 'psi': alpha, alpha_log = get_alpha_using_psi( args, phi, N, L, num_params, l) agg_log += alpha_log rounds = estimate_rounds(alpha, num_nodes_in_cluster, eps, lamda) else: rounds = 50 agg_log += [''] * 9 alpha = 'N/A' else: alpha_store = get_alpha(num_nodes_in_cluster, eps, a, alpha_store, args.alpha, args.dynamic_alpha) alpha = alpha_store[a] rounds = estimate_rounds(alpha, num_nodes_in_cluster, eps, lamda) agg_log += [rounds, a, lamda, alpha, num_nodes_in_cluster] try: rounds = int(np.ceil(rounds)) except TypeError: rounds = 50 if rounds > 50: rounds = 50 elif rounds < 1: rounds = 1 cluster_rounds.append(rounds) agg_log.append(rounds) model_sum = laplacian_consensus(children, worker_models, worker_num_samples, V.to(device), rounds) agg_model = worker_models[a].get() agg_model.load_state_dict(model_sum) worker_models[a] = agg_model.send(nodes[a]) else: raise Exception log.append(agg_log) num_rounds.append(cluster_rounds) num_div.append(cluster_div) table = AsciiTable(log) print(table.table) assert len(aggregators) == 1 master = get_model_weights(worker_models[aggregators[0]].get(), 1 / args.num_train) grad = model_gradient(model.state_dict(), master, args.lr) model.load_state_dict(master) if epoch % args.log_interval == 0: loss = np.array([_ for dump, _ in worker_losses.items()]) print('Train Epoch: {}({}) \tLoss: {:.6f} +- {:.6f} \tGrad: {}'.format( epoch, len(dataloader), loss.mean(), loss.std(), dict(grad).values())) return grad, num_rounds, num_div, alpha_store
# -*- coding: utf-8 -*- import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.nn import CrossEntropyLoss import os from net import CliqueNet from utils import get_args, get_dataloader if __name__ == "__main__": args = get_args() train_loader, test_loader = get_dataloader(args) use_cuda = args.use_cuda num_classes = 10 dropout_prob = 0.1 #hyper-parameters # A,B,C,D,E,r = 32,32,32,32,10,args.r # a classic CapsNet model = CliqueNet(3, num_classes, 4, 36, attention=True, compression=True, dropout_prob=dropout_prob) criterion = CrossEntropyLoss() #closs = CrossEntropyLoss() with torch.cuda.device(args.gpu): # print(args.gpu, type(args.gpu)) if args.pretrained: model.load_state_dict(torch.load(args.pretrained)) if use_cuda: