def evaluate(path, file): logger = utils.genlogger(os.path.join(path, 'stats.txt')) logger.info("Output Path: {}".format(path)) logger.info("<---- Evaluation on Test Set ---->") obj = torch.load(os.path.join(path, file), lambda stg, loc: stg) test_label = obj['test_label'] config = obj['config'] Net = torch.load(config['Net']) model = getattr(M, config['model'])(Net, n_class=config['n_class']) model.load_param(obj['param']) model = model.to(device) tta_transform = utils.test_transform() test_dataloader = dataloader_test( config['data_h5'], test_label, tta_transform, \ T = config['time_step'], **config['dataloader_param'] ) _, f1_macro, f1_micro, acc, auc = utils.evaluate(model, test_dataloader, device, None, config['threshold']) logger.info("<---- test evaluation: ---->") logger.info( "f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}".format( f1_macro, f1_micro, acc, auc))
def run(config_file): config = get_config(config_file) cur_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()) outdir = os.path.join(config['outputdir'], cur_time) os.makedirs(outdir) logger = utils.genlogger(os.path.join(outdir, 'log.txt')) logger.info("Output Path: {}".format(outdir)) logger.info("<---- config details ---->") for key in config: logger.info("{}: {}".format(key, config[key])) logger.info("<---- end of config ---->") train_dev = pd.read_csv(config['train_dev'], sep=',') n_class = config['n_class'] #train_set, dev_set, test_set = utils.train_dev_test_split(df, outdir) train_set, dev_set = utils.train_dev_split(train_dev, outdir) test_set = pd.read_csv(config['test'], sep=',').values num = 10 if args.debug else None train_label = utils.one_hot(train_set, n_class, num) dev_label = utils.one_hot(dev_set, n_class, num) test_label = utils.one_hot(test_set, n_class, num) logger.info("train set: {} samples".format(len(train_label))) logger.info("dev set: {} samples".format(len(dev_label))) logger.info("test set: {} samples".format(len(test_label))) #Net = torchvision.models.resnet152(pretrained=False) #Net.load_state_dict(torch.load(config['Net']).state_dict()) Net = torch.load(config['Net']) #mrcnn = torchvision.models.detection.maskrcnn_resnet50_fpn( # pretrained=True # #pretrained_backbone=False # ) #mrcnn = torch.load(config['mrcnn']) model = getattr(M, config['model'])(Net, n_class=n_class, **config['model_param']) if config['pretrain']: obj = torch.load(config['pretrain_model'], lambda x, y: x) model.load_param(obj['param']) logger.info("model: {}".format(str(model))) origin_model = model if (torch.cuda.device_count() > 1): model = torch.nn.DataParallel(model) logger.info("Use {} GPU(s)".format(torch.cuda.device_count())) model = model.to(device) # if config['model_param']['Net_grad']: # optimizer = getattr(optim, config['optim'])([ # {'params': origin_model.get_Net_param(), 'lr': config['Net_lr']}, # {'params': origin_model.get_other_param()} # ], lr=config['other_lr'] # ) # else: # optimizer = getattr(optim, config['optim'])( # origin_model.get_other_param(), # lr=config['other_lr'] # ) optimizer = getattr(optim, config['optim'])(origin_model.parameters(), lr=config['other_lr']) lr_scheduler = getattr(optim.lr_scheduler, config['lr_scheduler'])(optimizer, **config['scheduler_param']) #criterion = getattr(l, 'FocalSymmetricLovaszHardLogLoss')() criterion = getattr(losses, config['Loss'])(**config['Loss_param']) #train_transform = utils.augmentation() train_transform = utils.train_transform() test_transform = utils.simple_transform() train_dataloader = oversample_dataloader( config['data_h5'], train_label, train_transform, \ T = config['time_step'], **config['dataloader_param'] ) dev_dataloader = dataloader_single( config['data_h5'], dev_label, test_transform, \ T = config['time_step'], **config['dataloader_param'] ) test_dataloader = dataloader_single( config['data_h5'], test_label, test_transform, \ T = config['time_step'], **config['dataloader_param'] ) best_dev_loss = np.inf dev_loss, f1_macro, f1_micro, acc, auc = utils.evaluate( model, dev_dataloader, device, criterion, config['threshold']) best_f1 = f1_macro + f1_micro logger.info("dev_loss: {:.4f}\tf1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}"\ .format(dev_loss, f1_macro, f1_micro, acc, auc)) for epoch in range(1, config['n_epoch'] + 1): logger.info("<---- Epoch: {} start ---->".format(epoch)) #if (epoch >= 10 and config['model_param']['Net_grad']): # optimizer.param_groups[0]['lr'] = optimizer.param_groups[1]['lr'] / 1000 train_loss = one_epoch(model, optimizer, criterion, train_dataloader, True, config['grad_clip']) dev_loss, f1_macro, f1_micro, acc, auc = utils.evaluate( model, dev_dataloader, device, criterion, config['threshold']) logger.info("train_loss: {:.4f}\tdev_loss: {:.4f}".format( train_loss, dev_loss)) logger.info( "DEV: f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}" .format(f1_macro, f1_micro, acc, auc)) if epoch % config['saveinterval'] == 0: model_path = os.path.join(outdir, 'model_{}.th'.format(epoch)) torch.save( { "param": origin_model.get_param(), "train_label": train_label, "dev_label": dev_label, "test_label": test_label, "config": config }, model_path) # if best_dev_loss > dev_loss: # model_path = os.path.join(outdir, 'model.th') # torch.save({ # "param": origin_model.get_param(), # "train_label": train_label, # "dev_label": dev_label, # "test_label": test_label, # "config": config # }, model_path) # best_dev_loss = dev_loss if best_f1 < f1_macro + f1_micro: model_path = os.path.join(outdir, 'model_acc.th') torch.save( { "param": origin_model.get_param(), "train_label": train_label, "dev_label": dev_label, "test_label": test_label, "config": config }, model_path) best_f1 = f1_macro + f1_micro _, f1_macro, f1_micro, acc, auc = utils.evaluate( model, test_dataloader, device, None, config['threshold']) logger.info( "TEST: f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}" .format(f1_macro, f1_micro, acc, auc)) schedarg = dev_loss if lr_scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None lr_scheduler.step(schedarg) _, f1_macro, f1_micro, acc, auc = utils.evaluate(model, test_dataloader, device, None, config['threshold']) logger.info("<---- test evaluation: ---->") logger.info( "f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}".format( f1_macro, f1_micro, acc, auc))
def run(config_file): config = get_config(config_file) cur_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()) outdir = os.path.join(config['outputdir'], cur_time) os.makedirs(outdir) logger = utils.genlogger(os.path.join(outdir, 'log.txt')) logger.info("Output Path: {}".format(outdir)) logger.info("<---- config details ---->") for key in config: logger.info("{}: {}".format(key, config[key])) logger.info("<---- end of config ---->") n_class = config['n_class'] train_set = pd.read_csv(config['train_dev'], sep=',').values #test_set = pd.read_csv(config['test'], sep=',').values num = 10 if args.debug else None train_label = utils.one_hot(train_set, n_class, num) #test_label = utils.one_hot(test_set, n_class, num) logger.info("train set: {} samples".format(len(train_label))) #logger.info("test set: {} samples".format(len(test_label))) Net = torch.load(config['Net']) model = getattr(M, config['model'])(Net, n_class=n_class, **config['model_param']) if config['pretrain']: obj = torch.load(config['pretrain_model'], lambda x, y: x) model.load_param(obj['param']) logger.info('load from {}'.format(config['pretrain_model'])) logger.info("model: {}".format(str(model))) origin_model = model if (torch.cuda.device_count() > 1): model = torch.nn.DataParallel(model) logger.info("Use {} GPU(s)".format(torch.cuda.device_count())) model = model.to(device) optimizer = getattr(optim, config['optim'])(origin_model.parameters(), lr=config['other_lr']) criterion = getattr(losses, config['Loss'])(**config['Loss_param']) train_transform = utils.train_transform() #test_transform = utils.simple_transform() train_dataloader = oversample_dataloader( config['data_h5'], train_label, train_transform, \ T = config['time_step'], **config['dataloader_param'] ) # test_dataloader = dataloader_single( # config['data_h5'], test_label, test_transform, \ # T = config['time_step'], **config['dataloader_param'] # ) # test_loss, f1_macro, f1_micro, acc, auc = utils.evaluate( # model, test_dataloader, device, # criterion, config['threshold'] # ) # best_f1 = f1_macro + f1_micro # logger.info("test_loss: {:.4f}\tf1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}"\ # .format(test_loss, f1_macro, f1_micro, acc, auc)) for epoch in range(1, config['n_epoch'] + 1): logger.info("<---- Epoch: {} start ---->".format(epoch)) #if (epoch >= 10 and config['model_param']['Net_grad']): # optimizer.param_groups[0]['lr'] = optimizer.param_groups[1]['lr'] / 1000 train_loss = one_epoch(model, optimizer, criterion, train_dataloader, True, config['grad_clip']) # test_loss, f1_macro, f1_micro, acc, auc = utils.evaluate( # model, test_dataloader, device, # criterion, config['threshold'] # ) # logger.info("train_loss: {:.4f}\tdev_loss: {:.4f}".format(train_loss, test_loss)) # logger.info("TEST: f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}".format(f1_macro, f1_micro, acc, auc)) if epoch % config['saveinterval'] == 0: model_path = os.path.join(outdir, 'model_{}.th'.format(epoch)) torch.save( { "param": origin_model.get_param(), #"train_label": train_label, #"test_label": test_label, "config": config }, model_path) model_path = os.path.join(outdir, 'model.th') torch.save( { "param": origin_model.get_param(), #"train_label": train_label, "config": config }, model_path)
def run(config_file): config = get_config(config_file) cur_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()) outdir = os.path.join(config['outputdir'], cur_time) os.makedirs(outdir) logger = utils.genlogger(os.path.join(outdir, 'log.txt')) logger.info("Output Path: {}".format(outdir)) logger.info("<---- config details ---->") for key in config: logger.info("{}: {}".format(key, config[key])) logger.info("<---- end of config ---->") train_dev = pd.read_csv(config['train_dev'], sep=',') n_class = config['n_class'] #train_set, dev_set, test_set = utils.train_dev_test_split(df, outdir) train_set, dev_set = utils.train_dev_split(train_dev, outdir) test_set = pd.read_csv(config['test'], sep=',').values num = 5 if args.debug else None train_label = utils.one_hot(train_set, n_class, num) dev_label = utils.one_hot(dev_set, n_class, num) test_label = utils.one_hot(test_set, n_class, num) logger.info("train set: {} samples".format(len(train_label))) logger.info("dev set: {} samples".format(len(dev_label))) logger.info("test set: {} samples".format(len(test_label))) Net = torchvision.models.densenet201(pretrained=False) Net.load_state_dict(torch.load(config['Net']).state_dict()) model = getattr(M, config['model'])( Net, n_class=n_class, **config['model_param'] ) logger.info("model: {}".format(str(model.other))) origin_model = model if (torch.cuda.device_count() > 1): model = torch.nn.DataParallel(model) logger.info("Use {} GPU(s)".format(torch.cuda.device_count())) model = model.to(device) if config['model_param']['Net_grad']: optimizer = getattr(optim, config['optim'])([ {'params': origin_model.get_Net_param(), 'lr': config['Net_lr']}, {'params': origin_model.get_other_param()} ], lr=config['other_lr'] ) else: optimizer = getattr(optim, config['optim'])( origin_model.get_other_param(), lr=config['other_lr'] ) lr_scheduler = getattr(optim.lr_scheduler, config['lr_scheduler'])( optimizer, **config['scheduler_param'] ) criterion = getattr(torch.nn, config['Loss'])() train_transform = utils.train_transform() test_transform = utils.test_transform() train_dataloader = dataloader_multiple( config['data_h5'], train_label, train_transform, \ T=config['time_step'], **config['dataloader_param'] ) dev_dataloader = dataloader_multiple( config['data_h5'], dev_label, test_transform, \ T=config['time_step'], **config['dataloader_param'] ) test_dataloader = dataloader_multiple( config['data_h5'], test_label, test_transform, \ T=config['time_step'], **config['dataloader_param'] ) best_dev_loss = np.inf dev_loss = one_epoch( model, optimizer, criterion, dev_dataloader, False) f1_macro, f1_micro, acc = utils.evaluate( model, dev_dataloader, device, config['threshold']) best_f1 = f1_macro + f1_micro logger.info("dev_loss: {:.4f}\tf1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}"\ .format(dev_loss, f1_macro, f1_micro, acc)) for epoch in range(1, config['n_epoch'] + 1): logger.info("<---- Epoch: {} start ---->".format(epoch)) train_loss = one_epoch( model, optimizer, criterion, train_dataloader, True, config['grad_clip'] ) dev_loss = one_epoch( model, optimizer, criterion, dev_dataloader, False ) logger.info("train_loss: {:.4f}\tdev_loss: {:.4f}".format(train_loss, dev_loss)) f1_macro, f1_micro, acc = utils.evaluate( model, dev_dataloader, device, config['threshold']) logger.info("f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}".format(f1_macro, f1_micro, acc)) if epoch % config['saveinterval'] == 0: model_path = os.path.join(outdir, 'model_{}.th'.format(epoch)) torch.save({ "param": origin_model.get_param(), "train_label": train_label, "dev_label": dev_label, "test_label": test_label, "config": config }, model_path) if best_dev_loss > dev_loss: model_path = os.path.join(outdir, 'model.th') torch.save({ "param": origin_model.get_param(), "train_label": train_label, "dev_label": dev_label, "test_label": test_label, "config": config }, model_path) best_dev_loss = dev_loss if best_f1 < f1_macro + f1_micro: model_path = os.path.join(outdir, 'model_acc.th') torch.save({ "param": origin_model.get_param(), "train_label": train_label, "dev_label": dev_label, "test_label": test_label, "config": config }, model_path) best_f1 = f1_macro + f1_micro schedarg = dev_loss if lr_scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None lr_scheduler.step(schedarg) f1_macro, f1_micro, acc = utils.evaluate( model, test_dataloader, device, config['threshold']) logger.info("f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}".format(f1_macro, f1_micro, acc))
def evaluate(): obj = torch.load(args.model, lambda x, y: x) params = obj['param'] config = obj['config'] out_dir = args.path if not os.path.exists(out_dir): os.mkdir(out_dir) logger = utils.genlogger(os.path.join(out_dir, 'log.txt')) out_pred = os.path.join(out_dir, args.pred) out_disc = os.path.join(out_dir, args.disc) n_class = config['n_class'] Net = torch.load('../code/resnet_nofc.th') model = getattr(M, config['model'])( Net, n_class=n_class, **config['model_param'] ) model.load_param(params) origin_model = model if (torch.cuda.device_count() > 1): model = torch.nn.DataParallel(model) transform = utils.simple_transform() threshold = config['threshold'] model = model.to(device) model = model.eval() disc, con = {}, {} #images = glob('test_fold/*.jpg') f = h5py.File('../test/test_256.h5', 'r') with torch.set_grad_enabled(False): for key in tqdm(f.keys()): input = None features = f[key][()] for feature in features: feature = transform(torch.as_tensor( np.rollaxis(feature, 2, 0) )).unsqueeze(0) # 1 x C x H x W input = torch.cat((input, feature)) \ if input is not None else feature input = input.to(device) prob = model(input)[0].cpu().numpy().mean(0) # 10 pred = prob >= (threshold if np.max(prob) > threshold else np.max(prob)) disc[key] = ";".join(np.argwhere(pred == 1).reshape(-1).astype(str)) con[key] = ";".join(np.around(prob, decimals=4).reshape(-1).astype(str)) # with torch.set_grad_enabled(False): # features = None # names = [] # for image in images: # name = image.split('/')[-1] # image = np.array(Image.open(image).resize((256,256))).astype(np.float32) # feature = transform(torch.as_tensor( # np.rollaxis(image, 2, 0) # )).unsqueeze(0).to(device) # 1 x C x H x W # prob = model(feature)[0].cpu().numpy().mean(0) # 10 # pred = prob >= (threshold if np.max(prob) > threshold else np.max(prob)) # disc[name] = ";".join(np.argwhere(pred == 1).reshape(-1).astype(str)) # con[name] = ";".join(np.around(prob, decimals=4).reshape(-1).astype(str)) disc_df = pd.DataFrame(disc.items(), columns=['id', 'label']).set_index('id') con_df = pd.DataFrame(con.items(), columns=['id', 'pred']).set_index('id') disc_df.to_csv(out_disc, sep=',') con_df.to_csv(out_pred, sep=',')
def run(config_file): config = get_config(config_file) cur_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()) outdir = os.path.join(config['outputdir'], cur_time) os.makedirs(outdir) logger = utils.genlogger(os.path.join(outdir, 'log.txt')) logger.info("Output Path: {}".format(outdir)) logger.info("<---- config details ---->") for key in config: logger.info("{}: {}".format(key, config[key])) logger.info("<---- end of config ---->") train_dev = pd.read_csv(config['train_dev'], sep=',') n_class = config['n_class'] #train_set, dev_set, test_set = utils.train_dev_test_split(df, outdir) #train_set, dev_set = utils.train_dev_split(train_dev, outdir) train_set = train_dev.values test_set = pd.read_csv(config['test'], sep=',').values num = 5 if args.debug else None train_label = utils.one_hot(train_set, n_class, num) #dev_label = utils.one_hot(dev_set, n_class, num) test_label = utils.one_hot(test_set, n_class, num) logger.info("train set: {} samples".format(len(train_label))) #logger.info("dev set: {} samples".format(len(dev_label))) logger.info("test set: {} samples".format(len(test_label))) dim = config['pretrain_dim'] model = getattr(M, config['model'])(dim, n_class=n_class, **config['model_param']) if config['pretrain']: obj = torch.load(config['pretrain_model'], lambda x, y: x) model.load_param(obj['param']) logger.info("model: {}".format(str(model))) origin_model = model if (torch.cuda.device_count() > 1): model = torch.nn.DataParallel(model) logger.info("Use {} GPU(s)".format(torch.cuda.device_count())) model = model.to(device) optimizer = getattr(optim, config['optim'])(origin_model.parameters(), lr=config['other_lr']) lr_scheduler = getattr(optim.lr_scheduler, config['lr_scheduler'])(optimizer, **config['scheduler_param']) criterion = getattr(losses, config['Loss'])() train_dataloader = predict_dataloader( config['data_h5'], train_label, \ **config['dataloader_param'] ) # dev_dataloader = predict_dataloader( # config['data_h5'], dev_label, test_transform, \ # T = config['time_step'], **config['dataloader_param'] # ) test_dataloader = predict_dataloader( config['data_h5'], test_label, \ **config['dataloader_param'] ) test_loss, f1_macro, f1_micro, acc, auc = utils.evaluate_folder( model, test_dataloader, device, criterion, config['threshold']) best_test_loss = test_loss best_f1 = f1_macro + f1_micro logger.info("dev_loss: {:.4f}\tf1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}"\ .format(test_loss, f1_macro, f1_micro, acc, auc)) for epoch in range(1, config['n_epoch'] + 1): logger.info("<---- Epoch: {} start ---->".format(epoch)) #if (epoch >= 10 and config['model_param']['Net_grad']): # optimizer.param_groups[0]['lr'] = optimizer.param_groups[1]['lr'] / 1000 train_loss = one_epoch(model, optimizer, criterion, train_dataloader, True, config['grad_clip']) test_loss, f1_macro, f1_micro, acc, auc = utils.evaluate_folder( model, test_dataloader, device, criterion, config['threshold']) logger.info("train_loss: {:.4f}\tdev_loss: {:.4f}".format( train_loss, test_loss)) logger.info( "TEST: f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}" .format(f1_macro, f1_micro, acc, auc)) if epoch % config['saveinterval'] == 0: model_path = os.path.join(outdir, 'model_{}.th'.format(epoch)) torch.save({ "param": origin_model.state_dict(), "config": config }, model_path) if best_test_loss > test_loss: model_path = os.path.join(outdir, 'model.th') torch.save({ "param": origin_model.state_dict(), "config": config }, model_path) best_test_loss = test_loss if best_f1 < f1_macro + f1_micro: model_path = os.path.join(outdir, 'model_acc.th') torch.save({ "param": origin_model.state_dict(), "config": config }, model_path) best_f1 = f1_macro + f1_micro schedarg = test_loss if lr_scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None lr_scheduler.step(schedarg) _, f1_macro, f1_micro, acc, auc = utils.evaluate_folder( model, test_dataloader, device, None, config['threshold']) logger.info("<---- test evaluation: ---->") logger.info( "f1_macro: {:.4f}\tf1_micro: {:.4f}\tacc: {:.4f}\tauc: {:.4f}".format( f1_macro, f1_micro, acc, auc))