def run_infer(weights_folder_path, cfg): cfg.pretrained = False # for local test, please modify the following path into actual path. cfg.data_folder = cfg.data_dir + "test/" to_device_transform = ToDeviced(keys=("input", "target", "mask", "is_annotated"), device=cfg.device) all_path = [] for path in glob.iglob(os.path.join(weights_folder_path, "*.pth")): all_path.append(path) nets = [] for path in all_path: state_dict = torch.load(path)["model"] new_state_dict = {} for k, v in state_dict.items(): new_state_dict[k.replace("module.", "")] = v net = RanzcrNet(cfg).eval().to(cfg.device) net.load_state_dict(new_state_dict) del net.decoder del net.segmentation_head nets.append(net) test_df = pd.read_csv(cfg.test_df) test_dataset = get_test_dataset(test_df, cfg) test_dataloader = get_test_dataloader(test_dataset, cfg) with torch.no_grad(): fold_preds = [[] for i in range(len(nets))] for batch in tqdm(test_dataloader): batch = to_device_transform(batch) for i, net in enumerate(nets): if cfg.mixed_precision: with autocast(): logits = net(batch)["logits"].cpu().numpy() else: logits = net(batch)["logits"].cpu().numpy() fold_preds[i] += [logits] fold_preds = [np.concatenate(p) for p in fold_preds] preds = np.stack(fold_preds) preds = expit(preds) preds = np.mean(preds, axis=0) sub_df = test_df.copy() sub_df[cfg.label_cols] = preds submission = pd.read_csv(cfg.test_df) submission.loc[sub_df.index, cfg.label_cols] = sub_df[cfg.label_cols] submission.to_csv("submission.csv", index=False)
def fedavg(node_K_list, fed_train_time=params.fed_train_time): # return get_net(), 0 # 测试用 test_dataloader = get_test_dataloader() test_acc = 0 server = Server() clients = [] l = len(node_K_list) # 将所有叶节点加入训练 for i in range(l): # clients.append(node_K_list[i].provider) add_tree_to_list(node_K_list[i], clients) num_client = len(clients) print('训练数量', num_client) # 训练 for i in range(fed_train_time): print('第', i, '次训练') # 客户端训练 for j in range(num_client): # print('开始训练的客户端编号:', j) clients[j].train() # 客户端传权值给服务器 for j in range(num_client): cli_w = clients[j].get_net_w() server.add_w(cli_w) # 服务器做平均 print('服务器平均') server.avg() # 服务器传模型、学习率给客户端 ser_w = server.get_net_w() # ser_lr = server.get_lr() # print('本轮学习率:', ser_lr) for j in range(num_client): clients[j].update_net_w(ser_w) # clients[j].lr = ser_lr test_acc, outputs = server.test(test_dataloader) # 保存pab outputs_pab_dir = params.dataset_division_testno + '/fed_pab' + str( params.no_papa_pab) + '.npy' save_outputs(outputs, outputs_pab_dir) # 返回测试精度 作为v return server.net, test_acc
help='number of workers for dataloader') parser.add_argument('-b', type=int, default=16, help='batch size for dataloader') parser.add_argument('-s', type=bool, default=True, help='whether shuffle the dataset') args = parser.parse_args() net = get_network(args) tumor_test_loader = get_test_dataloader( #settings.CIFAR100_PATH, num_workers=args.w, batch_size=args.b, shuffle=args.s) net.load_state_dict(torch.load(args.weights), args.gpu) print(net) net.eval() # correct_1 = 0.0 correct = 0.0 total = 0.0 # 计算总损失 for n_iter, (images, labels) in enumerate(tumor_test_loader): images = Variable(images).cuda() labels = Variable(labels).cuda() output = net(images)
net = get_network(args, num_class) # data preprocessing: train_loader = get_train_dataloader(args.dataset, settings.DATASET_PATH[args.dataset], settings.TRAIN_MEAN[args.dataset], settings.TRAIN_STD[args.dataset], num_workers=4, batch_size=args.batch, shuffle=True) test_loader = get_test_dataloader(args.dataset, settings.DATASET_PATH[args.dataset], settings.TEST_MEAN[args.dataset], settings.TEST_STD[args.dataset], num_workers=4, batch_size=args.batch, shuffle=True) loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) train_scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=settings.MILESTONES, gamma=0.2) # learning rate decay iter_per_epoch = len(train_loader) warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm)
description="a cli that supports train/tl/attack") parser.add_argument("--dataset", required=True, help="the dataset, supporting cifar10 and cifar100") parser.add_argument("--model_path", required=True, help="the path of the model") parser.add_argument("--save_path", default="", help="the path to save result") args = parser.parse_args() if args.dataset == "cifar10": model = resnet.resnet50(num_classes=10) test_loader = get_test_dataloader("cifar10", batch_size=BATCH_SIZE) elif args.dataset == "cifar100": model = resnet.resnet50() test_loader = get_test_dataloader("cifar100", batch_size=BATCH_SIZE) else: raise NotImplemented("only support cifar10 or cifar100") model.load_state_dict(torch.load(args.model_path, map_location=DEVICE)) model.to(DEVICE) for attacker in (FGSMAttack, PGDAttack): results = test_attack(model, test_loader, attacker) if len(args.save_path) != 0: with open(os.path.join(args.save_path, f"{attacker.__name__}.json"), "w",
def main(args): ### CHECKPOINT_PATH = 'checkpoint' EPOCH = 75 MILESTONES = [50] TIME_NOW = datetime.now().isoformat() LOG_DIR = 'runs' DATASET = 'cifar-100' SAVE_EPOCH = 15 ### classes = [i for i in range(100)] training_batches = [ classes[i:i + args.step_classes] for i in range(0, len(classes), args.step_classes) ] net = get_network(args, use_gpu=True) checkpoint_path = os.path.join(CHECKPOINT_PATH, DATASET, str(args.step_classes), str(args.buffer_size), args.net, str(TIME_NOW)) old_data_batch = [] incremental_accuracy = [] criterion = nn.CrossEntropyLoss() replay_dataloader = None replay_dataset = get_buffer_dataset(buffer_size=args.buffer_size) for idx, training_batch in enumerate(training_batches): print('Training batch: '.format(training_batch)) # data preprocessing: training_loader = get_training_dataloader(include_list=training_batch, num_workers=args.w, batch_size=args.b, shuffle=args.s) test_loader = get_test_dataloader(include_list=training_batch + old_data_batch, num_workers=args.w, batch_size=args.b, shuffle=args.s) new_test_loader = get_test_dataloader(include_list=training_batch, num_workers=args.w, batch_size=args.b, shuffle=args.s) if idx > 0: old_test_loader = get_test_dataloader(include_list=old_data_batch, num_workers=args.w, batch_size=args.b, shuffle=args.s) if idx > 0: EPOCH = 30 #Monica if idx > len(training_batches) // 3: lr = 0.01 else: lr = 0.1 new_data_optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) #optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4) train_scheduler = optim.lr_scheduler.MultiStepLR(new_data_optimizer, milestones=MILESTONES, gamma=0.1) iter_per_epoch = float(len(training_loader)) # create checkpoint folder to save model if not os.path.exists(checkpoint_path): Path(checkpoint_path).mkdir(parents=True, exist_ok=True) ckp_path = os.path.join(checkpoint_path, '{net}-{idx}-{epoch}-{type}.pth') with tqdm(total=EPOCH) as pbar: for epoch in range(1, EPOCH): if epoch == EPOCH // 3 and idx > 0: lr *= .1 net.train() avg_learning_ratio = 0 if idx > 0: # old_dataloader = replay_manager.get_dataloader(batch_size=args.b) # old_dataiter = iter(old_dataloader) replay_dataloader = DataLoader(dataset=replay_dataset, shuffle=True, batch_size=args.b) old_dataiter = iter(replay_dataloader) for batch_index, (images, labels) in enumerate(training_loader): if idx > 0: try: old_images, old_labels = next(old_dataiter) except StopIteration: old_dataiter = iter(replay_dataloader) old_images, old_labels = next(old_dataiter) from PIL import Image # im = Image.fromarray(old_images[0].mul_(255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()) # im.save('sample_old.png') old_images_gpu = old_images.cuda() old_labels_gpu = old_labels.cuda() net.zero_grad() old_outputs = net(old_images_gpu) old_data_loss = criterion(old_outputs, old_labels_gpu) old_data_loss.backward() old_data_gradient_magnitudes = [] # old_gradient_data = [] for f in net.parameters(): old_data_gradient_magnitudes.append( f.grad.norm(2).item()**2) # old_gradient_data.append(f.grad.data) old_magnitude = np.sum( np.asarray(old_data_gradient_magnitudes)) new_labels_gpu = labels.cuda() new_images_gpu = images.cuda() net.zero_grad() outputs = net(new_images_gpu) new_data_loss = criterion(outputs, new_labels_gpu) new_data_loss.backward() new_data_gradient_magnitudes = [] # new_gradient_data = [] for f in net.parameters(): new_data_gradient_magnitudes.append( f.grad.norm(2).item()**2) # new_gradient_data.append(f.grad.data) new_magnitude = np.sum( np.asarray(new_data_gradient_magnitudes)) if idx > 0: learning_ratio = old_magnitude / new_magnitude avg_learning_ratio += learning_ratio if learning_ratio < .01: net.zero_grad() outputs = net(new_images_gpu) new_data_loss = criterion(outputs, new_labels_gpu) new_data_loss.backward() for f in net.parameters(): f.data.sub_(lr * f.grad.data) # print('Learning weighted new -- {}'.format(learning_ratio)) elif learning_ratio < .1: combined_images = torch.cat([images, old_images], axis=0) combined_labels = torch.cat([labels, old_labels], axis=0) combined_images = combined_images.cuda() combined_labels = combined_labels.cuda() net.zero_grad() outputs = net(combined_images) combined_data_loss = criterion( outputs, combined_labels) combined_data_loss.backward() for f in net.parameters(): f.data.sub_(lr * f.grad.data) # print('Learning combined! -- {}'.format(learning_ratio)) else: net.zero_grad() old_outputs = net(old_images_gpu) old_data_loss = criterion(old_outputs, old_labels_gpu) old_data_loss.backward() for f in net.parameters(): f.data.sub_(0.1 * f.grad.data) # print('Learning old! -- {}'.format(learning_ratio)) else: new_data_optimizer.step() train_scheduler.step(epoch) if (epoch == 1 or epoch == EPOCH - 1) and batch_index == 0: print('New Batch Magnitude is {} at epoch {}'.format( new_magnitude, epoch)) draw_magnitudes( new_data_gradient_magnitudes, '_'.join(str(i) for i in training_batch), checkpoint_path, '{}_{}'.format(idx, epoch)) if idx > 0: print( 'Old Batch Magnitude is {} at epoch {}'.format( old_magnitude, epoch)) draw_magnitudes(old_data_gradient_magnitudes, 'old Class', checkpoint_path, 'old_{}_{}'.format(idx, epoch)) print('Learning magnitude ratio {}'.format( avg_learning_ratio / iter_per_epoch)) if idx > 0: print( 'Training Epoch: {epoch} \tNew Loss: {:0.4f}\t Old Loss: {:0.4f}' .format(new_data_loss.item() / images.size(0), old_data_loss.item() / old_images.size(0), epoch=epoch)) loss_value, acc = evaluate(net, new_test_loader, criterion) print('New Test set: Average loss: {:.4f}, Accuracy: {:.4f}'. format(loss_value, acc)) if idx > 0: loss_value, acc = evaluate(net, old_test_loader, criterion) print( 'Old Test set: Average loss: {:.4f}, Accuracy: {:.4f}'. format(loss_value, acc)) loss_value, acc = evaluate(net, test_loader, criterion) print( 'Complete Test set: Average loss: {:.4f}, Accuracy: {:.4f}' .format(loss_value, acc)) if epoch == EPOCH - 1: incremental_accuracy.append(acc.float()) if not epoch % SAVE_EPOCH: torch.save( net.state_dict(), ckp_path.format(net=args.net, idx=idx, epoch=epoch, type='regular')) pbar.update(1) torch.save( net.state_dict(), ckp_path.format(net=args.net, idx=idx, epoch=epoch, type='end')) # Populate Replay Buffer replay_dataset.append_data(training_batch) old_data_batch += training_batch replay_dataloader = DataLoader(dataset=replay_dataset, batch_size=args.b) loss_value, acc = evaluate(net, replay_dataloader, criterion) print( 'Replay Train set: Average loss: {:.4f}, Accuracy: {:.4f}'.format( loss_value, acc)) print(incremental_accuracy)
type=str, required=True, help='the weights file you want to test') parser.add_argument('-gpu', type=bool, default=True, help='use gpu or not') parser.add_argument('-b', type=int, default=16, help='batch size for dataloader') args = parser.parse_args() net = get_network(args) cifar100_test_loader = get_test_dataloader( settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD, #settings.CIFAR100_PATH, num_workers=4, batch_size=args.b, ) net.load_state_dict(torch.load(args.weights)) print(net) net.eval() correct_1 = 0.0 correct_5 = 0.0 total = 0 with torch.no_grad(): for n_iter, (image, label) in enumerate(cifar100_test_loader): print("iteration: {}\ttotal {} iterations".format(
type=float) parser.add_argument("--phases", default=WARM_PHASES, help="epochs for warming model", type=int) parser.add_argument("--old", help="old model path") parser.add_argument("--attacker", help="attackers, now support FGSMAttack and PGDAttack") parser.add_argument("--epsilon", help="epsilons", type=float) args = parser.parse_args() if args.pattern in ("train", "attack"): train_loader = get_training_dataloader(args.dataset, args.batch, args.num_worker) test_loader = get_test_dataloader(args.dataset, args.batch, args.num_worker) if args.dataset == "cifar100": model = resnet.resnet50() elif args.dataset == "cifar10": model = resnet.resnet50(num_classes=10) else: raise Exception trainer = Trainer(model, train_loader, test_loader, args.device, args.lr, args.momentum, args.epochs, args.batch, DEFAULT_PARALLELISM, MILESTONES, args.gamma, args.phases) if args.pattern == "train": trainer.train(args.save_path) else: if args.dataset == "cifar10":
import numpy from dataset import TumorTrain,TumorTest import matplotlib.pyplot as plt from utils import get_training_dataloader, get_test_dataloader tumor_train_dataset = TumorTrain() print(len(tumor_train_dataset)) tumor_training_loader = get_training_dataloader( num_workers=4, batch_size=30, shuffle=True ) for i_batch, (sample_iamge, sample_label) in enumerate(tumor_training_loader): print(i_batch, sample_iamge.size(), sample_label.size()) # test tumor_test_dataset = TumorTest() print(len(tumor_test_dataset)) tumor_test_loader = get_test_dataloader( num_workers=4, batch_size=30, shuffle=True ) for i_batch, (sample_iamge, sample_label) in enumerate(tumor_test_loader): print(i_batch, sample_iamge.size(), sample_label.size()) # for i in range(len(tumor_train_dataset)): # sample, label= tumor_train_dataset[i] # # print(sample.size(), label.size())
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) print('==> Preparing dataset %s' % args.dataset) if args.dataset == 'cifar100': training_loader = get_training_dataloader(settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD, num_workers=4, batch_size=args.train_batch, shuffle=True) test_loader = get_test_dataloader(settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD, num_workers=4, batch_size=args.test_batch, shuffle=False) num_classes = 100 else: training_loader = get_training_dataloader_10( settings.CIFAR10_TRAIN_MEAN, settings.CIFAR10_TRAIN_STD, num_workers=4, batch_size=args.train_batch, shuffle=True) test_loader = get_test_dataloader_10(settings.CIFAR10_TRAIN_MEAN, settings.CIFAR10_TRAIN_STD, num_workers=4, batch_size=args.test_batch, shuffle=False) num_classes = 10 #data preprocessing: print("==> creating model '{}'".format(args.arch)) model = get_network(args, num_classes=num_classes) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion1 = am_softmax.AMSoftmax() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) title = 'cifar-10-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) train_scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=args.schedule, gamma=0.2) #learning rate decay iter_per_epoch = len(training_loader) warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm) for epoch in range(start_epoch, args.epochs): if epoch > args.warm: train_scheduler.step(epoch) train_loss, train_acc = train(training_loader, model, warmup_scheduler, criterion, criterion1, optimizer, epoch, use_cuda) test_loss, test_acc = eval_training(test_loader, model, criterion, epoch, use_cuda) logger.append([ optimizer.param_groups[0]['lr'], train_loss, test_loss, train_acc, test_acc ]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) logger.close() # logger.plot() # savefig(os.path.join(args.checkpoint, 'log.eps')) print('Best acc:') print(best_acc)
# 训练集 trainpath = '/home/steadysjtu/classification/train_2/' # 测试集 testpath = '/home/steadysjtu/classification/test_2/' # 细胞子图路径 logpath = '/home/steadysjtu/classification/efficientnet.txt' # data preprocessing: # 预处理https://www.cnblogs.com/wanghui-garcia/p/11448460.html cell_training_loader = get_training_dataloader(path=trainpath, mean=cell_train_mean, std=cell_train_std, num_workers=4, batch_size=args.b, shuffle=True) cell_test_loader = get_test_dataloader(path=testpath, mean=cell_train_mean, std=cell_train_std, num_workers=4, batch_size=args.b, shuffle=True) cell_train_test_loader = get_test_dataloader(path=trainpath, mean=cell_train_mean, std=cell_train_std, num_workers=4, batch_size=args.b, shuffle=True) loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) train_scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=settings.MILESTONES, gamma=0.2)
correct_1 = 0.0 correct_5 = 0.0 total = 0 video = [] class_id = [] for video_file in tqdm(os.listdir('./data/new_test_pic/')): #改 test_path = './data/new_test_pic/' + video_file + '/' #改 video.append(video_file) if len(os.listdir(test_path)) == 0: id = 1 class_id.append('snow_leopard_' + str(id + 1)) else: cifar100_test_loader = get_test_dataloader( test_path, num_workers=args.w, batch_size=args.b, ) list_id = [] for n_iter, (image1, image2, image3, image4, image5, image6) in enumerate(cifar100_test_loader): image1 = image1.cuda() image2 = image2.cuda() image3 = image3.cuda() image4 = image4.cuda() image5 = image5.cuda() image6 = image6.cuda() output1 = net(image1) output2 = net(image2)
help='initial learning rate') args = parser.parse_args() net = get_network(args, use_gpu=args.gpu) # 调用utils.py中的方法,形成网络 ############### 读取训练集和测试集数据 ############### imagenet_training_loader = get_training_dataloader( global_settings.IMAGENET_TRAIN_MEAN, global_settings.IMAGENET_TRAIN_STD, num_workers=args.w, batch_size=args.b, shuffle=args.s) imagenet_test_loader = get_test_dataloader( global_settings.IMAGENET_TRAIN_MEAN, global_settings.IMAGENET_TRAIN_MEAN, num_workers=args.w, batch_size=args.b, shuffle=args.s) ############### 选取损失函数和优化器 ############### loss_function = nn.CrossEntropyLoss() #optimizer = optim.Adam(net.parameters()) # Adam ############### 使用 SGD 优化器,动态调整学习率,使用Adam优化器时,不需要以下操作 ############### optimizer = optim.SGD( net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4 ) # SGD weight decay 为了有效限制模型中的自由参数数量以避免过度拟合,可以调整成本函数。 train_scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=global_settings.MILESTONES, gamma=0.2) # SGD learning rate decay
help='warm up training phase') parser.add_argument('-lr', type=float, default=0.001, help='initial learning rate') args = parser.parse_args() net = get_network(args, use_gpu=args.gpu) #data preprocessing: tumor_training_loader = get_training_dataloader(num_workers=args.w, batch_size=args.b, shuffle=args.s) tumor_test_loader = get_test_dataloader(num_workers=args.w, batch_size=args.b, shuffle=args.s) loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) train_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=np.sqrt(0.1)) #train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=settings.MILESTONES, gamma=0.2) #learning rate decay iter_per_epoch = len(tumor_training_loader) #warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm) checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.net, settings.TIME_NOW)
# torch.save(model_B.model.state_dict(), os.path.join(save_path, f'resnet18-{epoch}.pth')) if __name__ == '__main__': model_path = './checkpoint/resnet18/resnet18-160-best.pth' # save_path = './checkpoint/resnet18_copy_t' cifar100_training_loader = get_training_dataloader( settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD, num_workers=1, batch_size=64, shuffle=False) cifar100_test_loader = get_test_dataloader(settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD, num_workers=1, batch_size=30, shuffle=False) from models.resnet import resnet18 model_A = resnet18().cuda() model_A.load_state_dict(torch.load(model_path)) model_A = ModelWrapper(model_A) model_B = resnet18().cuda() model_B.load_state_dict(torch.load(model_path)) model_B = ModelWrapper(model_B) d_a = Discriminator().cuda() d_b = Discriminator().cuda()
# net.stage4.apply(weights_init) # net = net.to(device) # data preprocessing: cifar10_training_loader = get_training_dataloader( CIFAR10_TRAIN_MEAN, CIFAR10_TRAIN_STD, num_workers=args.w, batch_size=args.b, shuffle=args.s ) cifar10_test_loader = get_test_dataloader( CIFAR10_TRAIN_MEAN, CIFAR10_TRAIN_STD, num_workers=args.w, batch_size=args.b, shuffle=args.s ) loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=MILESTONES, gamma=0.2) # learning rate decay iter_per_epoch = len(cifar10_training_loader) warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm) best_acc = 0.0 a = time.time() for epoch in range(1, EPOCH): if epoch > args.warm: train_scheduler.step(epoch)
# data preprocessing: training_loader = get_training_dataloader( args.dataset, dataset_mean, dataset_mean, num_workers=args.w, batch_size=args.b, shuffle=args.s, randSub = args.randSub, randSubPerc=args.randSubPerc ) test_loader = get_test_dataloader( args.dataset, dataset_mean, dataset_mean, num_workers=args.w, batch_size=args.b, shuffle=args.s ) net = get_network(args, model_args, use_gpu=args.gpu) loss_function = nn.CrossEntropyLoss(reduction='none') checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.dataset, args.net, args.logDir) optimizer = optim.Adam(net.parameters(), lr=1e-4) start_epoch = 1 end_epoch = settings.EPOCH # use tensorboard if not os.path.exists(settings.LOG_DIR):
im.save(filename) torch.manual_seed(42) use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") model = vgg19_bn() model.load_state_dict( torch.load("checkpoint/vgg_baseline.pth")) model.to(device) model.eval() cifar100_test_loader = get_test_dataloader( settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD, #settings.CIFAR100_PATH, num_workers=2, batch_size=16, shuffle=True ) adversary = GradientSignAttack( model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.3, clip_min=0.0, clip_max=1.0, targeted=False) correct_1 = 0.0 correct_5 = 0.0 attack_correct_1 = 0.0 attack_correct_5 = 0.0 total = 0 for n_iter, (image, label) in enumerate(cifar100_test_loader): print("iteration: {}\ttotal {} iterations".format(n_iter + 1, len(cifar100_test_loader)))
checkpoint = torch.load('checkpoint/net.pkl') net.load_state_dict(checkpoint['net']) print("checkpoint load success") print(checkpoint['epoch']) # data preprocessing: cifar100_training_loader = get_training_dataloader( global_settings.CIFAR100_TRAIN_MEAN, global_settings.CIFAR100_TRAIN_STD, num_workers=args.w, batch_size=args.b, shuffle=args.s) cifar100_test_loader = get_test_dataloader( global_settings.CIFAR100_TRAIN_MEAN, global_settings.CIFAR100_TRAIN_STD, num_workers=args.w, batch_size=args.b, shuffle=args.s) loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, dampening=0, weight_decay=5e-4) train_scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=global_settings.MILESTONES, gamma=0.2) iter_per_epoch = len(cifar100_training_loader) warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * 1) checkpoint_path = 'checkpoint/net.pkl'
def main(): # release gpu memory device = torch.device("cuda" if torch.cuda.is_available() else "cpu") torch.cuda.empty_cache() # torch.cuda.ipc_collect() # get the list of csv training files # training_csv_files = glob(os.path.join(FLAGS.train_data_path, "*.csv")) # build training, validation, and test data loaders print(' Preparing the data!') # Fix the random seed for identical experiments # train_loader, test_loader, test_length = \ # get_dataloaders(training_csv_files[0: FLAGS.num_classes], FLAGS.test_data_path, FLAGS.num_data_per_class) # build test and train for cifar100 train_loader = get_training_dataloader( settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD, batch_size=FLAGS.batch_size, ) test_loader = get_test_dataloader( settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD, batch_size=FLAGS.batch_size, ) test_length = len(test_loader.dataset) kwargs = { "kernels_path_7": FLAGS.kernels_path_7, "kernels_path_3": FLAGS.kernels_path_3, "num_kernels_7": FLAGS.num_kernels_7, "num_kernels_3": FLAGS.num_kernels_3, "num_classes": FLAGS.num_classes } loss_function = nn.CrossEntropyLoss() for conv_model in ["Conv2d", "Conv2dRF"]: for resnet_arch in ["resnet18", "resnet34", "resnet50"]: name = resnet_arch + '_' + conv_model kwargs["conv_model"] = conv_model model = getattr(resnet_mod, resnet_arch)(**kwargs) optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.learning_rate) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) print(' Started training!') for run_id in range(FLAGS.num_runs): torch.cuda.empty_cache() # model.weights_init() if torch.cuda.device_count() > 1: model.module.weights_init() else: model.weights_init() model.to(device) for epoch in range(FLAGS.num_epochs): model.train() for batch_idx, (images_, labels_) in tqdm(enumerate(train_loader)): # print("batch_{}: {}".format(batch_idx, torch.cuda.memory_cached()/1e6)) train_function(model, optimizer, loss_function, device, images_, labels_) if (batch_idx + 1) % FLAGS.log_interval == 0 or ( batch_idx + 1) == len(train_loader): # test_function( # model, test_loader, device, test_length, FLAGS.batch_size, # FLAGS.train_data_path, # os.path.join(FLAGS.save_path, "submissions", # name+'_r={}_e={}_idx={}.csv'.format(run_id, epoch+1, batch_idx+1))) model.train() # reset back to train mode # Table for output of validation val_output = np.zeros((FLAGS.num_runs, 102)) val_oa, val_aa, val_pca = val_full(model, device, test_loader, 100) val_output[run_id, 0] = val_oa val_output[run_id, 1] = val_aa val_output[run_id, 2:] = val_pca # saving the model torch.save({'model_state_dict': model.state_dict()}, os.path.join( FLAGS.save_path, "models", "{}.pt".format(name + '_r={}'.format(run_id + 1)))) np.save( os.path.join(FLAGS.save_path, "validation_{}.npy".format(name)), val_output)
mean = (0.4914, 0.4822, 0.4465) std = (0.2023, 0.1994, 0.2010) else: print("invalid task!!") cifar100_training_loader = get_training_dataloader(mean, std, num_workers=4, batch_size=args.b, shuffle=True, alpha=args.alpha, task=args.task, da=args.da) cifar100_test_loader = get_test_dataloader(settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD, num_workers=4, batch_size=args.b, shuffle=False, task=args.task) #test training acc cifar100_train_test_loader = get_test_dataloader( settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD, num_workers=4, batch_size=args.b, shuffle=False, task=args.task, train=True) settings.MILESTONES = [120, 150, 180] if args.loss == "ce": if args.grad:
def test_model(conv: int, fcl: int, model_path: str, args, gpu: bool = False): import resource # load dataset cifar100_test_loader = get_test_dataloader( settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD, # settings.CIFAR100_PATH, num_workers=4, batch_size=args['b'], ) print(f' #2 Running with option GPU={gpu}') weights = load_weights_from_server(model_path, gpu=gpu) net, arch_name = construct_vgg_variant(conv_variant=conv, fcl_variant=fcl, batch_norm=True, progress=True, pretrained=False) if gpu: net.cuda() # net.load_state_dict(torch.load(args.weights)) net.load_state_dict(weights) correct_1 = 0.0 correct_5 = 0.0 total = 0 start = time.time() with torch.no_grad(): for n_iter, (image, label) in enumerate(cifar100_test_loader): print("iteration: {}\ttotal {} iterations".format( n_iter + 1, len(cifar100_test_loader))) if gpu: image = image.cuda() label = label.cuda() output = net(image) _, pred = output.topk(5, 1, largest=True, sorted=True) label = label.view(label.size(0), -1).expand_as(pred) correct = pred.eq(label).float() # compute top 5 correct_5 += correct[:, :5].sum() # compute top1 correct_1 += correct[:, :1].sum() # break end = time.time() # peak = p.memory_info().peak_wset # print(peak) top1 = 1 - correct_1 / len(cifar100_test_loader.dataset) top5 = 1 - correct_5 / len(cifar100_test_loader.dataset) if gpu: print('GPU INFO.....') print(torch.cuda.memory_summary(), end='') # else: # print('CPU INFO.....') # print(torch.cuda.memory_summary(device=torch.device('cpu')), end='') print() print("Top 1 err: ", top1) print("Top 5 err: ", top5) print("Parameter numbers: {}".format( sum(p.numel() for p in net.parameters()))) duration = end - start print(f'Duration for inference is {duration} seconds') max_memory = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss print(f'Max memory is {max_memory}') scoped_memory_track() # Data to save: # - top1 acc # - top5 acc # - Duration # - Memory used inference_result = { 'top1_acc': float(top1), 'top5_acc': float(top5), 'duration_s': duration, 'used_memory': 1 } return inference_result
def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code # traindir = os.path.join(args.data, 'train') # valdir = os.path.join(args.data, 'val') # normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225]) # train_dataset = datasets.ImageFolder( # traindir, # transforms.Compose([ # transforms.RandomResizedCrop(224), # transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # normalize, # ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None # train_loader = torch.utils.data.DataLoader( # train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), # num_workers=args.workers, pin_memory=True, sampler=train_sampler) # val_loader = torch.utils.data.DataLoader( # datasets.ImageFolder(valdir, transforms.Compose([ # transforms.Resize(256), # transforms.CenterCrop(224), # transforms.ToTensor(), # normalize, # ])), # batch_size=args.batch_size, shuffle=False, # num_workers=args.workers, pin_memory=True) train_loader = utils.get_training_dataloader(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_loader = utils.get_test_dataloader(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.evaluate: validate(val_loader, model, criterion, args) return for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args) # evaluate on validation set acc1 = validate(val_loader, model, criterion, args) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best)
help='save path') parser.add_argument('-dataset', type=str, default='cifar10') args = parser.parse_args() #data preprocessing: if args.dataset == 'cifar10': training_loader = get_training_dataloader(settings.CIFAR10_TRAIN_MEAN, settings.CIFAR10_TRAIN_STD, num_workers=args.w, batch_size=args.b, shuffle=args.s, dataset=args.dataset) test_loader = get_test_dataloader(settings.CIFAR10_TRAIN_MEAN, settings.CIFAR10_TRAIN_STD, num_workers=args.w, batch_size=args.b, shuffle=args.s, dataset=args.dataset) num_classes = 10 args.num_classes = num_classes net = get_network(args, use_gpu=args.gpu) loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) train_scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=settings.MILESTONES, gamma=0.2) #learning rate decay iter_per_epoch = len(training_loader)
help='initial learning rate') args = parser.parse_args() ##net--backbone # net = ChannelDistillResNet1834() net = ChannelDistillResNet1850() net = net.cuda() # print("load success!!") #data preprocessing: training_loader = get_training_dataloader(num_workers=args.numworks, batch_size=args.batch_size, shuffle=args.shuffle) test_loader = get_test_dataloader(num_workers=args.numworks, batch_size=args.batch_size, shuffle=args.shuffle) loss_function = nn.CrossEntropyLoss() loss_function1 = LabelSmoothCEloss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) train_scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=settings.MILESTONES, gamma=0.2) #learning rate decay iter_per_epoch = len(training_loader) warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm) checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.net, settings.TIME_NOW)
return correct.float() / len(cifar10_test_loader.dataset) if __name__ == '__main__': training_loader = get_training_dataloader(args.data_root, (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010), num_workers=args.w, batch_size=args.b, shuffle=args.s) cifar10_test_loader = get_test_dataloader(args.data_root, (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010), num_workers=args.w, batch_size=args.b, shuffle=args.s) net = ResNet_disout(depth=56, num_classes=10, dist_prob=args.dist_prob, block_size=args.block_size, alpha=args.alpha, nr_steps=len(training_loader) * args.epochs).cuda() loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
std = (0.2023, 0.1994, 0.2010) else: print("invalid task!!") cifar_training_loader = get_training_dataloader(mean, std, num_workers=4, batch_size=args.b, shuffle=True, alpha=0.0, task=args.task, da=True) cifar_test_loader = get_test_dataloader(mean, std, num_workers=4, batch_size=args.b, shuffle=False, task=args.task) #test training acc cifar_train_test_loader = get_test_dataloader(mean, std, num_workers=4, batch_size=args.b, shuffle=False, task=args.task, train=True) loss_function = nn.CrossEntropyLoss() if args.optimizer == 'sgd': print("using sgd!")
import torchvision.transforms as transforms from torch.utils.data import DataLoader from config.config import get_cfg_defaults from utils import get_test_dataloader from nncls.models import build_network if __name__ == '__main__': cfg = get_cfg_defaults() os.environ['CUDA_VISIBLE_DEVICES'] = cfg.TEST.GPU_ID net = build_network(cfg) cifar100_test_loader = get_test_dataloader( cfg.CIFAR100_TRAIN_MEAN, cfg.CIFAR100_TRAIN_STD, num_workers=4, batch_size=cfg.TEST.BATCH, ) net.load_state_dict(torch.load(cfg.TEST.WEIGHT)) print(net) net.eval() correct_1 = 0.0 correct_5 = 0.0 total = 0 with torch.no_grad(): for n_iter, (image, label) in enumerate(cifar100_test_loader): print("iteration: {}\ttotal {} iterations".format( n_iter + 1, len(cifar100_test_loader)))
help='resume training') args = parser.parse_args() net = get_network(args) #data preprocessing: cifar100_training_loader = get_training_dataloader( settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD, num_workers=4, batch_size=args.b, shuffle=True) cifar100_test_loader = get_test_dataloader(settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD, num_workers=4, batch_size=args.b, shuffle=True) loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) train_scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=settings.MILESTONES, gamma=0.2) #learning rate decay iter_per_epoch = len(cifar100_training_loader) warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm) if args.resume:
"batch_size": args.b, "lr": args.lr, "path": args.domain }) #data preprocessing: cifar100_training_loader = get_training_dataloader(CIFAR100_TRAIN_MEAN, CIFAR100_TRAIN_STD, num_workers=16, batch_size=args.b, domain=args.domain, shuffle=True) cifar100_test_loader = get_test_dataloader(CIFAR100_TRAIN_MEAN, CIFAR100_TRAIN_STD, num_workers=16, batch_size=args.b, domain=args.domain, shuffle=True) loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) train_scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=settings.MILESTONES, gamma=0.1) #learning rate decay iter_per_epoch = len(cifar100_training_loader) checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.net, settings.TIME_NOW)