def run(args): with open(args.model_path + 'cfg.json') as f: cfg = edict(json.load(f)) device_ids = list(map(int, args.device_ids.split(','))) num_devices = torch.cuda.device_count() if num_devices < len(device_ids): raise Exception('#available gpu : {} < --device_ids : {}'.format( num_devices, len(device_ids))) device = torch.device('cuda:{}'.format(device_ids[0])) ckpt_path = os.path.join(args.model_path, 'best.ckpt') ckpt = torch.load(ckpt_path, map_location=device) if args.fl == 'True': model = Classifier(cfg).to(device).eval() model.load_state_dict(ckpt['state_dict']) else: model = Classifier(cfg) model = DataParallel(model, device_ids=device_ids).to(device).eval() model.module.load_state_dict(ckpt['state_dict']) dataloader_test = DataLoader(ImageDataset(args.in_csv_path, cfg, mode='test'), batch_size=cfg.dev_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) test_epoch(cfg, args, model, dataloader_test, args.out_csv_path) print('Save best is step :', ckpt['step'], 'AUC :', ckpt['auc_dev_best'])
def __init__(self, data_root=r'F:\datasets\vangogh2photo', lr=2e-4, lr_decay=5e-5, content_weight=1, style_weight=1e-2, use_cuda=True, show_result_every=100, max_iteration=160000): self.model = StyleTransformer() self.use_cuda = use_cuda if use_cuda: self.model.cuda() self.dataset = ImageDataset( root=data_root, transform=T.Compose([ # T.RandomCrop(size=(128, 128)), T.ToTensor(), # T.Normalize([0.5], [0.5]) ])) self.init_lr = lr self.lr_decay = lr_decay self.optim = optim.Adam(self.model.decoder.parameters(), lr=lr) self.batch_size = 8 self.content_weight = content_weight self.style_weight = style_weight self.show_result_every = show_result_every self.max_iteration = max_iteration self.content_loss = [] self.style_loss = []
def profile_model(cfg, train_file, save_prefix, use_fl): model = Classifier(cfg) dataloader_train = DataLoader(ImageDataset(train_file, cfg, mode='train'), batch_size=cfg.train_batch_size, num_workers=4, drop_last=True, shuffle=False) device = torch.device("cpu") custom_ops = { ExpPool: count_exp_pool, LinearPool: count_lin_pool, LogSumExpPool: count_log_sum_exp_pool, torch.nn.modules.activation.Sigmoid: count_sig, } for data in dataloader_train: inputs = data[0].to(device) macs, params = profile(model, inputs=(inputs, ), custom_ops=custom_ops) break steps = len(dataloader_train) if use_fl: comm_rounds = cfg.epoch epochs = cfg.local_epoch total_batches = steps * epochs * comm_rounds else: epochs = cfg.epoch total_batches = steps * epochs # When comparing MACs /FLOPs, we want the number to be implementation-agnostic and as general as possible. # The THOP library therefore only considers the number of multiplications and ignore all other operations. total_macs = macs * total_batches total_flops_approx = 2 * total_macs total_macs_formatted, _ = clever_format([total_macs, params], "%.5f") total_flops_approx_formatted, _ = clever_format( [total_flops_approx, params], "%.5f") print(f"Total MACs: {total_macs_formatted}") print(f"Approximate Total FLOPs: {total_flops_approx_formatted}") # Save results to file with open(save_prefix, "w") as f: f.write(f"Total MACs: {total_macs_formatted}\n") f.write(f"Approximate Total FLOPs: {total_flops_approx_formatted}")
_eval(model, train_data_loader) print(f"Loss epoch {epoch}: {loss}") def _eval(model: torch.nn.Module, data_loader: DataLoader, steps: Optional[int] = 1): dataset = data_loader.dataset for batch_idx, data_batch in enumerate(data_loader): x, y = data_batch.x, data_batch.y if use_cuda: x, y = x.cuda(), y.cuda() pred = sample(model, x, steps, seq_len) _pred = dataset.decode(pred) _pred = [i[OFFSET:] for i in _pred] print( f" x: {dataset.decode(x)}, gt: {dataset.decode(y)} pred: {_pred}") if __name__ == "__main__": dataset = ImageDataset() # train_dataset = build_dataset(block_size=seq_len) # train_data_loader = build_dataloader(train_dataset) # model_encdec = make_model(train_dataset.vocab_size, train_dataset.vocab_size, d_model=dmodel) # model_gpt = make_gpt(train_dataset.vocab_size, d_model=dmodel) # print(f'Model num params (EncDec): {num_parameters(model_encdec)}') # print(f'Model num params (GPT): {num_parameters(model_gpt)}') # if use_cuda: # model_gpt.cuda() # train(model_gpt, train_data_loader) # _eval(model_gpt, train_data_loader)
print(f"Using model '{paramsfile}' at step: {ckpt['step']} " f"with AUC: {ckpt['auc_dev_best']}") return model.eval() with open(join(opt.model, 'cfg.json'), 'r') as fp: cfg = edict(json.load(fp)) classes = [ 'Cardiomegaly', 'Edema', 'Consolidation', 'Atelectasis', 'Pleural Effusion' ] num_tasks = len(cfg.num_classes) model = build_model(cfg, join(opt.model, 'best.ckpt')) dataset = ImageDataset(opt.dataset, cfg, mode='heatmap') dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, drop_last=False) gcam = GradCAM(model=model) layer = 'backbone.features.denseblock4.denselayer15' for images, paths, labels in dataloader: probs, ids = gcam.forward(images) processed_images = [[] for _ in range(len(images))] for i in range(ids.shape[1]): gcam.backward(ids[:, [i]]) regions = gcam.generate(target_layer=layer) for j in range(len(images)):
def run(args): with open(args.cfg_path) as f: cfg = edict(json.load(f)) if args.verbose is True: print(json.dumps(cfg, indent=4)) if not os.path.exists(args.save_path): os.mkdir(args.save_path) if args.logtofile is True: logging.basicConfig(filename=args.save_path + '/log.txt', filemode="w", level=logging.INFO) else: logging.basicConfig(level=logging.INFO) if not args.resume: with open(os.path.join(args.save_path, 'cfg.json'), 'w') as f: json.dump(cfg, f, indent=1) device_ids = list(map(int, args.device_ids.split(','))) num_devices = torch.cuda.device_count() if num_devices < len(device_ids): raise Exception('#available gpu : {} < --device_ids : {}'.format( num_devices, len(device_ids))) device = torch.device('cuda:{}'.format(device_ids[0])) model = Classifier(cfg) if args.verbose is True: from torchsummary import summary if cfg.fix_ratio: h, w = cfg.long_side, cfg.long_side else: h, w = cfg.height, cfg.width summary(model.to(device), (3, h, w)) model = DataParallel(model, device_ids=device_ids).to(device).train() if args.pre_train is not None: if os.path.exists(args.pre_train): ckpt = torch.load(args.pre_train, map_location=device) model.module.load_state_dict(ckpt) optimizer = get_optimizer(model.parameters(), cfg) src_folder = os.path.dirname(os.path.abspath(__file__)) + '/../' dst_folder = os.path.join(args.save_path, 'classification') # rc, size = subprocess.getstatusoutput('dir --max-depth=0 %s | cut -f1' # % src_folder) # if rc != 0: # print(size) # raise Exception('Copy folder error : {}'.format(rc)) # rc, err_msg = subprocess.getstatusoutput('cp -R %s %s' % (src_folder, # dst_folder)) # if rc != 0: # raise Exception('copy folder error : {}'.format(err_msg)) copyfile(cfg.train_csv, os.path.join(args.save_path, 'train.csv')) copyfile(cfg.dev_csv, os.path.join(args.save_path, 'valid.csv')) dataloader_train = DataLoader(ImageDataset(cfg.train_csv, cfg, mode='train'), batch_size=cfg.train_batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) dataloader_dev = DataLoader(ImageDataset(cfg.dev_csv, cfg, mode='dev'), batch_size=cfg.dev_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) dev_header = dataloader_dev.dataset._label_header summary_train = {'epoch': 0, 'step': 0} summary_dev = {'loss': float('inf'), 'acc': 0.0} summary_writer = SummaryWriter(args.save_path) epoch_start = 0 best_dict = { "acc_dev_best": 0.0, "auc_dev_best": 0.0, "loss_dev_best": float('inf'), "fused_dev_best": 0.0, "best_idx": 1 } if args.resume: ckpt_path = os.path.join(args.save_path, 'train.ckpt') ckpt = torch.load(ckpt_path, map_location=device) model.module.load_state_dict(ckpt['state_dict']) summary_train = {'epoch': ckpt['epoch'], 'step': ckpt['step']} best_dict['acc_dev_best'] = ckpt['acc_dev_best'] best_dict['loss_dev_best'] = ckpt['loss_dev_best'] best_dict['auc_dev_best'] = ckpt['auc_dev_best'] epoch_start = ckpt['epoch'] for epoch in range(epoch_start, cfg.epoch): lr = lr_schedule(cfg.lr, cfg.lr_factor, summary_train['epoch'], cfg.lr_epochs) for param_group in optimizer.param_groups: param_group['lr'] = lr summary_train, best_dict = train_epoch(summary_train, summary_dev, cfg, args, model, dataloader_train, dataloader_dev, optimizer, summary_writer, best_dict, dev_header) time_now = time.time() summary_dev, predlist, true_list = test_epoch(summary_dev, cfg, args, model, dataloader_dev) time_spent = time.time() - time_now auclist = [] for i in range(len(cfg.num_classes)): y_pred = predlist[i] y_true = true_list[i] fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred, pos_label=1) auc = metrics.auc(fpr, tpr) auclist.append(auc) summary_dev['auc'] = np.array(auclist) loss_dev_str = ' '.join( map(lambda x: '{:.5f}'.format(x), summary_dev['loss'])) acc_dev_str = ' '.join( map(lambda x: '{:.3f}'.format(x), summary_dev['acc'])) auc_dev_str = ' '.join( map(lambda x: '{:.3f}'.format(x), summary_dev['auc'])) logging.info('{}, Dev, Step : {}, Loss : {}, Acc : {}, Auc : {},' 'Mean auc: {:.3f} ' 'Run Time : {:.2f} sec'.format( time.strftime("%Y-%m-%d %H:%M:%S"), summary_train['step'], loss_dev_str, acc_dev_str, auc_dev_str, summary_dev['auc'].mean(), time_spent)) for t in range(len(cfg.num_classes)): summary_writer.add_scalar('dev/loss_{}'.format(dev_header[t]), summary_dev['loss'][t], summary_train['step']) summary_writer.add_scalar('dev/acc_{}'.format(dev_header[t]), summary_dev['acc'][t], summary_train['step']) summary_writer.add_scalar('dev/auc_{}'.format(dev_header[t]), summary_dev['auc'][t], summary_train['step']) save_best = False mean_acc = summary_dev['acc'][cfg.save_index].mean() if mean_acc >= best_dict['acc_dev_best']: best_dict['acc_dev_best'] = mean_acc if cfg.best_target == 'acc': save_best = True mean_auc = summary_dev['auc'][cfg.save_index].mean() if mean_auc >= best_dict['auc_dev_best']: best_dict['auc_dev_best'] = mean_auc if cfg.best_target == 'auc': save_best = True mean_loss = summary_dev['loss'][cfg.save_index].mean() if mean_loss <= best_dict['loss_dev_best']: best_dict['loss_dev_best'] = mean_loss if cfg.best_target == 'loss': save_best = True if save_best: torch.save( { 'epoch': summary_train['epoch'], 'step': summary_train['step'], 'acc_dev_best': best_dict['acc_dev_best'], 'auc_dev_best': best_dict['auc_dev_best'], 'loss_dev_best': best_dict['loss_dev_best'], 'state_dict': model.module.state_dict() }, os.path.join(args.save_path, 'best{}.ckpt'.format(best_dict['best_idx']))) best_dict['best_idx'] += 1 if best_dict['best_idx'] > cfg.save_top_k: best_dict['best_idx'] = 1 logging.info('{}, Best, Step : {}, Loss : {}, Acc : {},' 'Auc :{},Best Auc : {:.3f}'.format( time.strftime("%Y-%m-%d %H:%M:%S"), summary_train['step'], loss_dev_str, acc_dev_str, auc_dev_str, best_dict['auc_dev_best'])) torch.save( { 'epoch': summary_train['epoch'], 'step': summary_train['step'], 'acc_dev_best': best_dict['acc_dev_best'], 'auc_dev_best': best_dict['auc_dev_best'], 'loss_dev_best': best_dict['loss_dev_best'], 'state_dict': model.module.state_dict() }, os.path.join(args.save_path, 'train.ckpt')) summary_writer.close()
def run(args, val_h5_file): with open(args.cfg_path) as f: cfg = edict(json.load(f)) if args.verbose is True: print(json.dumps(cfg, indent=4)) if not os.path.exists(args.save_path): os.mkdir(args.save_path) if args.logtofile is True: logging.basicConfig(filename=args.save_path + '/log.txt', filemode="w", level=logging.INFO) else: logging.basicConfig(level=logging.INFO) if not args.resume: with open(os.path.join(args.save_path, 'cfg.json'), 'w') as f: json.dump(cfg, f, indent=1) device_ids = list(map(int, args.device_ids.split(','))) num_devices = torch.cuda.device_count() if num_devices < len(device_ids): raise Exception('#available gpu : {} < --device_ids : {}'.format( num_devices, len(device_ids))) device = torch.device('cuda:{}'.format(device_ids[0])) model = Classifier(cfg) if args.verbose is True: from torchsummary import summary if cfg.fix_ratio: h, w = cfg.long_side, cfg.long_side else: h, w = cfg.height, cfg.width summary(model.to(device), (3, h, w)) model = DataParallel(model, device_ids=device_ids).to(device).train() if args.pre_train is not None: if os.path.exists(args.pre_train): ckpt = torch.load(args.pre_train, map_location=device) model.module.load_state_dict(ckpt) optimizer = get_optimizer(model.parameters(), cfg) #src_folder = os.path.dirname(os.path.abspath(__file__)) + '/../' #dst_folder = os.path.join(args.save_path, 'classification') #rc, size = subprocess.getstatusoutput('du --max-depth=0 %s | cut -f1' % src_folder) #if rc != 0: raise Exception('Copy folder error : {}'.format(rc)) #rc, err_msg = subprocess.getstatusoutput('cp -R %s %s' % (src_folder, dst_folder)) #if rc != 0: raise Exception('copy folder error : {}'.format(err_msg)) #copyfile(cfg.train_csv, os.path.join(args.save_path, 'train.csv')) #copyfile(cfg.dev_csv, os.path.join(args.save_path, 'dev.csv')) # np_train_h5_file = np.array(train_h5_file['train'][:10000], dtype=np.uint8) # np_t_u_ones = np.array(train_h5_file['train_u_ones'][:10000], dtype=np.int8) # np_t_u_zeros = np.array(train_h5_file['train_u_zeros'][:10000], dtype=np.int8) # np_t_u_random = np.array(train_h5_file['train_u_random'][:10000], dtype=np.int8) np_val_h5_file = np.array(val_h5_file['val'], dtype=np.uint8) np_v_u_ones = np.array(val_h5_file['val_u_ones'], dtype=np.int8) np_v_u_zeros = np.array(val_h5_file['val_u_zeros'], dtype=np.int8) np_v_u_random = np.array(val_h5_file['val_u_random'], dtype=np.int8) train_labels = {} with h5py.File(f'{args.train_chunks}/train_labels.h5', 'r') as fp: train_labels['train_u_ones'] = np.array(fp['train_u_ones'], dtype=np.int8) train_labels['train_u_zeros'] = np.array(fp['train_u_zeros'], dtype=np.int8) train_labels['train_u_random'] = np.array(fp['train_u_random'], dtype=np.int8) np_train_samples = None for i in range(args.chunk_count): with open(f'{args.train_chunks}/chexpert_dset_chunk_{i+1}.npy', 'rb') as f: if np_train_samples is None: np_train_samples = np.load(f) else: np_train_samples = np.concatenate( (np_train_samples, np.load(f))) dataloader_train = DataLoader(ImageDataset( [np_train_samples, train_labels], cfg, mode='train'), batch_size=cfg.train_batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) dataloader_dev = DataLoader(ImageDataset( [np_val_h5_file, np_v_u_zeros, np_v_u_ones, np_v_u_random], cfg, mode='val'), batch_size=cfg.dev_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) #dev_header = dataloader_dev.dataset._label_header dev_header = [ 'No_Finding', 'Enlarged_Cardiomediastinum', 'Cardiomegaly', 'Lung_Opacity', 'Lung_Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural_Effusion', 'Pleural_Other', 'Fracture', 'Support_Devices' ] print(f'dataloaders are set. train count: {np_train_samples.shape[0]}') logging.info("[LOGGING TEST]: dataloaders are set...") summary_train = {'epoch': 0, 'step': 0} summary_dev = {'loss': float('inf'), 'acc': 0.0} summary_writer = SummaryWriter(args.save_path) epoch_start = 0 best_dict = { "acc_dev_best": 0.0, "auc_dev_best": 0.0, "loss_dev_best": float('inf'), "fused_dev_best": 0.0, "best_idx": 1 } if args.resume: ckpt_path = os.path.join(args.save_path, 'train.ckpt') ckpt = torch.load(ckpt_path, map_location=device) model.module.load_state_dict(ckpt['state_dict']) summary_train = {'epoch': ckpt['epoch'], 'step': ckpt['step']} best_dict['acc_dev_best'] = ckpt['acc_dev_best'] best_dict['loss_dev_best'] = ckpt['loss_dev_best'] best_dict['auc_dev_best'] = ckpt['auc_dev_best'] epoch_start = ckpt['epoch'] q_list = [] k_list = [] for i in range(len(cfg.num_classes)): q_list.append(args.q) k_list.append(args.k) k_list = torch.FloatTensor(k_list) q_list = torch.FloatTensor(q_list) loss_sq_hinge = MultiClassSquaredHingeLoss() print('Everything is set starting to train...') before = datetime.datetime.now() for epoch in range(epoch_start, cfg.epoch): lr = lr_schedule(cfg.lr, cfg.lr_factor, summary_train['epoch'], cfg.lr_epochs) for param_group in optimizer.param_groups: param_group['lr'] = lr summary_train, best_dict = train_epoch(summary_train, summary_dev, cfg, args, model, dataloader_train, dataloader_dev, optimizer, summary_writer, best_dict, dev_header, q_list, k_list, loss_sq_hinge) time_now = time.time() summary_dev, predlist, true_list = test_epoch(summary_dev, cfg, args, model, dataloader_dev, q_list, k_list, loss_sq_hinge) time_spent = time.time() - time_now auclist = [] for i in range(len(cfg.num_classes)): y_pred = predlist[i] y_true = true_list[i] fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred, pos_label=1) auc = metrics.auc(fpr, tpr) auclist.append(auc) summary_dev['auc'] = np.array(auclist) loss_dev_str = ' '.join( map(lambda x: '{:.5f}'.format(x), summary_dev['loss'])) acc_dev_str = ' '.join( map(lambda x: '{:.3f}'.format(x), summary_dev['acc'])) auc_dev_str = ' '.join( map(lambda x: '{:.3f}'.format(x), summary_dev['auc'])) logging.info('{}, Dev, Step : {}, Loss : {}, Acc : {}, Auc : {},' 'Mean auc: {:.3f} ' 'Run Time : {:.2f} sec'.format( time.strftime("%Y-%m-%d %H:%M:%S"), summary_train['step'], loss_dev_str, acc_dev_str, auc_dev_str, summary_dev['auc'].mean(), time_spent)) for t in range(len(cfg.num_classes)): summary_writer.add_scalar('dev/loss_{}'.format(dev_header[t]), summary_dev['loss'][t], summary_train['step']) summary_writer.add_scalar('dev/acc_{}'.format(dev_header[t]), summary_dev['acc'][t], summary_train['step']) summary_writer.add_scalar('dev/auc_{}'.format(dev_header[t]), summary_dev['auc'][t], summary_train['step']) save_best = False mean_acc = summary_dev['acc'][cfg.save_index].mean() if mean_acc >= best_dict['acc_dev_best']: best_dict['acc_dev_best'] = mean_acc if cfg.best_target == 'acc': save_best = True mean_auc = summary_dev['auc'][cfg.save_index].mean() if mean_auc >= best_dict['auc_dev_best']: best_dict['auc_dev_best'] = mean_auc if cfg.best_target == 'auc': save_best = True mean_loss = summary_dev['loss'][cfg.save_index].mean() if mean_loss <= best_dict['loss_dev_best']: best_dict['loss_dev_best'] = mean_loss if cfg.best_target == 'loss': save_best = True if save_best: torch.save( { 'epoch': summary_train['epoch'], 'step': summary_train['step'], 'acc_dev_best': best_dict['acc_dev_best'], 'auc_dev_best': best_dict['auc_dev_best'], 'loss_dev_best': best_dict['loss_dev_best'], 'state_dict': model.module.state_dict() }, os.path.join(args.save_path, 'best{}.ckpt'.format(best_dict['best_idx']))) best_dict['best_idx'] += 1 if best_dict['best_idx'] > cfg.save_top_k: best_dict['best_idx'] = 1 logging.info('{}, Best, Step : {}, Loss : {}, Acc : {},' 'Auc :{},Best Auc : {:.3f}'.format( time.strftime("%Y-%m-%d %H:%M:%S"), summary_train['step'], loss_dev_str, acc_dev_str, auc_dev_str, best_dict['auc_dev_best'])) torch.save( { 'epoch': summary_train['epoch'], 'step': summary_train['step'], 'acc_dev_best': best_dict['acc_dev_best'], 'auc_dev_best': best_dict['auc_dev_best'], 'loss_dev_best': best_dict['loss_dev_best'], 'state_dict': model.module.state_dict() }, os.path.join(args.save_path, 'train.ckpt')) print_remaining_time(before, epoch + 1, cfg.epoch, additional='[training]') summary_writer.close()
ret.fill(fill) ret[index, :] = data return ret if __name__ == "__main__": from model.faster_rcnn import FasterRcnn from data.dataset import ImageDataset from torch.utils.data import DataLoader import torch path = 'pretrained_model/checkpoints/vgg16-397923af.pth' faster_rcnn = FasterRcnn(path) dataset = ImageDataset(csv_file='../data/VOC_data_rescale_name2num.csv', image_root_dir='../data/resize/JPEGImages') dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0) sample = next(iter(dataloader)) x = sample["img_tensor"] gt_boxes = sample["img_gt_boxes"][0].detach().numpy() anchor_labels_1 = sample['anchor_labels'][0].detach().numpy() anchor_locs_1 = sample['anchor_locations'][0].detach().numpy() print(anchor_labels_1.shape) print(anchor_locs_1.shape) h = faster_rcnn.extractor(x) img_size = (x.size(2), x.size(3)) _, _, anchors, _ = faster_rcnn.rpn(h, img_size)
np_train_samples = np.load(f) else: np_train_samples = np.concatenate((np_train_samples, np.load(f))) # device = torch.device(f'cuda:{args.gpu}') # load best chexpert model from normal print('loading network: '+ args.saved_model_path) model = Classifier(cfg) #model = DataParallel(model, device_ids=args.gpu).to(device) model = DataParallel(model, device_ids=[args.gpu]).to(device) ckpt = torch.load(args.saved_model_path, map_location=device) model.module.load_state_dict(ckpt['state_dict']) model.cuda() # dataloader_train = DataLoader( ImageDataset([np_train_samples, train_labels], cfg, mode='train'), batch_size=cfg.train_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) dataloader_dev_val = DataLoader( ImageDataset([np_dev_val_h5_file, np_dev_val_u_zeros, np_dev_val_u_ones, np_dev_val_u_random], cfg, mode='val'), batch_size=cfg.dev_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) dataloader_dev = DataLoader( ImageDataset([np_dev_h5_file, np_dev_u_zeros, np_dev_u_ones, np_dev_u_random], cfg, mode='val'), batch_size=cfg.dev_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) if args.mode == 'extract': extract_features(device, model, dataloader_train, args.batch_size, args.saved_path, "inference_train_val")
def denorm(img): return img.clip(min=0, max=1) transform = torchvision.transforms.ToTensor() @torch.no_grad() def test_image(model, content_img_path, style_img_path): content = Image.open(content_img_path).convert('RGB') style = Image.open(os.path.join(style_img_path, style_img_path)).convert('RGB') content = transform(content)[None].cuda() style = transform(style)[None].cuda() result = model(content, style).cpu() imshow(content.cpu().squeeze()) imshow(denorm(result.squeeze())) @torch.no_grad() def show(model, dataset, samples_num=4): content, style = dataset(samples_num) content, style = content.cuda(), style.cuda() result = model(content, style, return_loss=False) visualize(content, result) model = StyleTransformer() model.load_state_dict(torch.load('trained_model/model2.pkl')) model.cuda().eval() dataset = ImageDataset(r'F:\datasets\Miyazaki Hayao2photo', transform, 'test')
def run_fl(args): with open(args.cfg_path) as f: cfg = edict(json.load(f)) if args.verbose is True: print(json.dumps(cfg, indent=4)) if not os.path.exists(args.save_path): os.mkdir(args.save_path) if args.logtofile is True: logging.basicConfig(filename=args.save_path + '/log.txt', filemode="w", level=logging.INFO) else: logging.basicConfig(level=logging.INFO) if not args.resume: with open(os.path.join(args.save_path, 'cfg.json'), 'w') as f: json.dump(cfg, f, indent=1) device_ids = list(map(int, args.device_ids.split(','))) num_devices = torch.cuda.device_count() if num_devices < len(device_ids): raise Exception('#available gpu : {} < --device_ids : {}'.format( num_devices, len(device_ids))) device = torch.device('cuda:{}'.format(device_ids[0])) # initialise global model model = Classifier(cfg).to(device).train() if args.verbose is True: from torchsummary import summary if cfg.fix_ratio: h, w = cfg.long_side, cfg.long_side else: h, w = cfg.height, cfg.width summary(model.to(device), (3, h, w)) if args.pre_train is not None: if os.path.exists(args.pre_train): ckpt = torch.load(args.pre_train, map_location=device) model.load_state_dict(ckpt) src_folder = os.path.dirname(os.path.abspath(__file__)) + '/../' dst_folder = os.path.join(args.save_path, 'classification') rc, size = subprocess.getstatusoutput('du --max-depth=0 %s | cut -f1' % src_folder) if rc != 0: raise Exception('Copy folder error : {}'.format(rc)) else: print('Successfully determined size of directory') rc, err_msg = subprocess.getstatusoutput('cp -R %s %s' % (src_folder, dst_folder)) if rc != 0: raise Exception('copy folder error : {}'.format(err_msg)) else: print('Successfully copied folder') # copy train files train_files = cfg.train_csv clients = {} for i, c in enumerate(string.ascii_uppercase): if i < len(train_files): clients[c] = {} else: break # initialise clients for i, client in enumerate(clients): copyfile(train_files[i], os.path.join(args.save_path, f'train_{client}.csv')) clients[client]['dataloader_train'] =\ DataLoader( ImageDataset(train_files[i], cfg, mode='train'), batch_size=cfg.train_batch_size, num_workers=args.num_workers,drop_last=True, shuffle=True ) clients[client]['bytes_uploaded'] = 0.0 clients[client]['epoch'] = 0 copyfile(cfg.dev_csv, os.path.join(args.save_path, 'dev.csv')) dataloader_dev = DataLoader(ImageDataset(cfg.dev_csv, cfg, mode='dev'), batch_size=cfg.dev_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) dev_header = dataloader_dev.dataset._label_header w_global = model.state_dict() summary_train = {'epoch': 0, 'step': 0} summary_dev = {'loss': float('inf'), 'acc': 0.0} summary_writer = SummaryWriter(args.save_path) comm_rounds = cfg.epoch best_dict = { "acc_dev_best": 0.0, "auc_dev_best": 0.0, "loss_dev_best": float('inf'), "fused_dev_best": 0.0, "best_idx": 1 } # Communication rounds loop for cr in range(comm_rounds): logging.info('{}, Start communication round {} of FL - {} ...'.format( time.strftime("%Y-%m-%d %H:%M:%S"), cr + 1, cfg.fl_technique)) w_locals = [] for client in clients: logging.info( '{}, Start local training process for client {}, communication round: {} ...' .format(time.strftime("%Y-%m-%d %H:%M:%S"), client, cr + 1)) # Load previous current global model as start point model = Classifier(cfg).to(device).train() model.load_state_dict(w_global) if cfg.fl_technique == "FedProx": global_weight_collector = get_global_weights(model, device) else: global_weight_collector = None optimizer = get_optimizer(model.parameters(), cfg) # local training loops for epoch in range(cfg.local_epoch): lr = lr_schedule(cfg.lr, cfg.lr_factor, epoch, cfg.lr_epochs) for param_group in optimizer.param_groups: param_group['lr'] = lr summary_train, best_dict = train_epoch_fl( summary_train, summary_dev, cfg, args, model, clients[client]['dataloader_train'], dataloader_dev, optimizer, summary_writer, best_dict, dev_header, epoch, global_weight_collector) summary_train['step'] += 1 bytes_to_upload = sys.getsizeof(model.state_dict()) clients[client]['bytes_uploaded'] += bytes_to_upload logging.info( '{}, Completed local rounds for client {} in communication round {}. ' 'Uploading {} bytes to server, {} bytes in total sent from client' .format(time.strftime("%Y-%m-%d %H:%M:%S"), client, cr + 1, bytes_to_upload, clients[client]['bytes_uploaded'])) w_locals.append(model.state_dict()) if cfg.fl_technique == "FedAvg": w_global = fed_avg(w_locals) elif cfg.fl_technique == 'WFedAvg': w_global = weighted_fed_avg(w_locals, cfg.train_proportions) elif cfg.fl_technique == 'FedProx': # Use weighted FedAvg when using FedProx w_global = weighted_fed_avg(w_locals, cfg.train_proportions) # Test the performance of the averaged model avged_model = Classifier(cfg).to(device) avged_model.load_state_dict(w_global) time_now = time.time() summary_dev, predlist, true_list = test_epoch(summary_dev, cfg, args, avged_model, dataloader_dev) time_spent = time.time() - time_now auclist = [] for i in range(len(cfg.num_classes)): y_pred = predlist[i] y_true = true_list[i] fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred, pos_label=1) auc = metrics.auc(fpr, tpr) auclist.append(auc) auc_summary = np.array(auclist) loss_dev_str = ' '.join( map(lambda x: '{:.5f}'.format(x), summary_dev['loss'])) acc_dev_str = ' '.join( map(lambda x: '{:.3f}'.format(x), summary_dev['acc'])) auc_dev_str = ' '.join(map(lambda x: '{:.3f}'.format(x), auc_summary)) logging.info( '{}, Averaged Model -> Dev, Step : {}, Loss : {}, Acc : {}, Auc : {},' 'Mean auc: {:.3f} ' 'Run Time : {:.2f} sec'.format(time.strftime("%Y-%m-%d %H:%M:%S"), summary_train['step'], loss_dev_str, acc_dev_str, auc_dev_str, auc_summary.mean(), time_spent)) for t in range(len(cfg.num_classes)): summary_writer.add_scalar('dev/loss_{}'.format(dev_header[t]), summary_dev['loss'][t], summary_train['step']) summary_writer.add_scalar('dev/acc_{}'.format(dev_header[t]), summary_dev['acc'][t], summary_train['step']) summary_writer.add_scalar('dev/auc_{}'.format(dev_header[t]), auc_summary[t], summary_train['step']) save_best = False mean_acc = summary_dev['acc'][cfg.save_index].mean() if mean_acc >= best_dict['acc_dev_best']: best_dict['acc_dev_best'] = mean_acc if cfg.best_target == 'acc': save_best = True mean_auc = auc_summary[cfg.save_index].mean() if mean_auc >= best_dict['auc_dev_best']: best_dict['auc_dev_best'] = mean_auc if cfg.best_target == 'auc': save_best = True mean_loss = summary_dev['loss'][cfg.save_index].mean() if mean_loss <= best_dict['loss_dev_best']: best_dict['loss_dev_best'] = mean_loss if cfg.best_target == 'loss': save_best = True if save_best: torch.save( { 'epoch': summary_train['epoch'], 'step': summary_train['step'], 'acc_dev_best': best_dict['acc_dev_best'], 'auc_dev_best': best_dict['auc_dev_best'], 'loss_dev_best': best_dict['loss_dev_best'], 'state_dict': avged_model.state_dict() }, os.path.join(args.save_path, 'best{}.ckpt'.format(best_dict['best_idx']))) best_dict['best_idx'] += 1 if best_dict['best_idx'] > cfg.save_top_k: best_dict['best_idx'] = 1 logging.info('{}, Best, Step : {}, Loss : {}, Acc : {},' 'Auc :{},Best Auc : {:.3f}'.format( time.strftime("%Y-%m-%d %H:%M:%S"), summary_train['step'], loss_dev_str, acc_dev_str, auc_dev_str, best_dict['auc_dev_best'])) torch.save( { 'epoch': cr, 'step': summary_train['step'], 'acc_dev_best': best_dict['acc_dev_best'], 'auc_dev_best': best_dict['auc_dev_best'], 'loss_dev_best': best_dict['loss_dev_best'], 'state_dict': avged_model.state_dict() }, os.path.join(args.save_path, 'train.ckpt'))
self.train_D() if self.mode == 'transition': self.fade_in_alpha += self.alpha_step self.record_number += self.current_batch_size if self.level >= 4 and ( self.record_number >= self.compute_swd_every or self.record_number == 0): self.record_number = 0 self.swd.append(self.sample_swd()) if self.passed_real_images_num >= self.switch_mode_number: if self.mode == 'stabilize': self.plot_stat_curve() torch.save(self.G.state_dict(), f'state_dict/G_{2 ** self.level}.pkl') self.update_state() self.train_G() if self.is_finished: break if __name__ == '__main__': g = Generator(**config.__dict__) d = Discriminator(**config.__dict__) dataset = ImageDataset(data_root=config.data_root, transform=T.Compose( [T.ToTensor(), T.Normalize([0.5], [0.5])]), max_resolution=config.resolution) pggan = PGGAN(g, d, dataset, **config.__dict__) pggan.train()
model = Classifier(cfg) if args.verbose: from torchsummary import summary h, w = (cfg.long_side, cfg.long_side) if cfg.fix_ratio \ else (cfg.height, cfg.width) summary(model.to(device), (3, h, w)) model = DataParallel(model, device_ids=device_ids).to(device) if args.pre_train is not None: if exists(args.pre_train): ckpt = torch.load(args.pre_train, map_location=device) model.module.load_state_dict(ckpt) optimizer = get_optimizer(model.parameters(), cfg) trainset = ImageDataset(cfg.train_csv, cfg, mode='train') testset = ImageDataset(cfg.dev_csv, cfg, mode='val') trainloader = DataLoader(trainset, batch_size=cfg.train_batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) testloader = DataLoader(testset, batch_size=cfg.dev_batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) dev_header = testloader.dataset._label_header # Initialize parameters to log training output summary_train = {'epoch': 0, 'step': 0} summary_dev = {'loss': float('inf'), 'acc': 0.0} summary_writer = SummaryWriter(args.save_path) epoch_start = 0
if 'step' in ckpt and 'auc_dev_best' in ckpt: print(f"Using model '{paramsfile}' at step: {ckpt['step']} " f"with AUC: {ckpt['auc_dev_best']}") return model makedirs(dirname(output_file), exist_ok=True) # We search for the next best json file and interpret it as # the configuration file. config_file = glob(join(opt.model, '*.json'))[0] with open(config_file, 'r') as fp: cfg = edict(json.load(fp)) if not opt.cpu: num_devices = torch.cuda.device_count() assert num_devices >= len(device_ids), f""" #available gpu : {num_devices} < --device_ids : {len(device_ids)}""" device = torch.device(f"cuda:{device_ids[0]}") else: num_devices = 0 device = torch.device('cpu') model = build_model(cfg, join(opt.model, opt.ckpt), device) dataset = ImageDataset(opt.dataset, cfg, mode='test') dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, drop_last=False, num_workers=opt.num_workers) test_epoch(cfg, opt, model, dataloader, output_file)