def setup(args, cfg, train_ann, valid_ann): logger = logging.getLogger('root') train_loader = builder.build_dataloader(cfg, ann=train_ann, mode='train') valid_loader = builder.build_dataloader(cfg, ann=valid_ann, mode='valid') # Adjust steps per epoch if necessary (i.e., equal to 0) # We assume if gradient accumulation is specified, then the user # has already adjusted the steps_per_epoch accordingly in the # config file steps_per_epoch = cfg['train']['params']['steps_per_epoch'] gradient_accmul = cfg['train']['params']['gradient_accumulation'] if steps_per_epoch == 0: cfg['train']['params']['steps_per_epoch'] = len(train_loader) logger.info('Building [{}] architecture ...'.format( cfg['model']['config'])) model = builder.build_model(cfg, args.gpu) model = model.train().cuda() optimizer = builder.build_optimizer(cfg['optimizer']['name'], model, cfg['optimizer']['params']) scheduler = builder.build_scheduler(cfg['scheduler']['name'], optimizer, cfg=cfg) return cfg, \ train_loader, \ valid_loader, \ model, \ optimizer, \ scheduler
def create_model(cfg): model = builder.build_model(cfg['model']['name'], cfg['model']['params']) model.load_state_dict( torch.load(cfg['test']['checkpoint'], map_location=lambda storage, loc: storage)) model = model.eval().cuda() return model
def create_model(cfg, checkpoint): model = builder.build_model(cfg['model']['name'], cfg['model']['params']) print('Loading <{}> model from <{}> ...'.format(cfg['model']['name'], checkpoint)) weights = torch.load(checkpoint, map_location=lambda storage, loc: storage) weights = {k.replace('module.', '') : v for k,v in weights.items()} model.load_state_dict(weights) model = model.eval().cuda() return model
def test(args, cfg, test_df): if 'csv_filename' in cfg['test'].keys(): if cfg['test']['csv_filename']: test_df = pd.read_csv(cfg['test']['csv_filename']) logger = logging.getLogger('root') logger.info('TESTING : START') logger.info('TEST: n={}'.format(len(test_df))) if 'data_dir' in cfg['test'].keys(): if cfg['test']['data_dir']: cfg['dataset']['data_dir'] = cfg['test']['data_dir'] test_df = test_df[test_df['part'] != 45] test_images = [ osp.join(cfg['dataset']['data_dir'], '{}'.format(_)) for _ in test_df['vidfile'] ] test_labels = np.asarray(test_df['label']) test_loader = builder.build_dataloader(cfg, data_info={ 'vidfiles': test_images, 'labels': test_labels }, mode='test') cfg['model']['params']['pretrained'] = None model = builder.build_model(cfg['model']['name'], cfg['model']['params']) model.load_state_dict( torch.load(cfg['test']['checkpoint'], map_location=lambda storage, loc: storage)) model = model.eval().cuda() if 'params' not in cfg['test'].keys() or type( cfg['test']['params']) == type(None): cfg['test']['params'] = {} predictor = getattr(factory_evaluate, cfg['test']['predictor']) predictor = predictor(loader=test_loader, **cfg['test']['params']) y_true, y_pred, _ = predictor.predict(model, criterion=None, epoch=None) if not osp.exists(cfg['test']['save_preds_dir']): os.makedirs(cfg['test']['save_preds_dir']) with open(osp.join(cfg['test']['save_preds_dir'], 'predictions.pkl'), 'wb') as f: pickle.dump( { 'y_true': y_true, 'y_pred': y_pred, 'imgfiles': [im.split('/')[-1] for im in test_images] }, f)
def predict(args, cfg): df = pd.read_csv(cfg['predict']['csv_filename']) logger = logging.getLogger('root') logger.info('PREDICT : START') logger.info('PREDICT: n={}'.format(len(df))) images = [osp.join(cfg['predict']['data_dir'], _) for _ in df['imgfile']] male = list(df['male'].astype('float32')) if cfg['predict']['coords']: coords = {k : np.asarray(df[k]) for k in ['x1','y1','x2','y2']} else: coords = None loader = builder.build_dataloader(cfg, data_info={'imgfiles': images, 'labels': [0]*len(images), 'male': male, 'coords': coords}, mode='predict') model = builder.build_model(cfg['model']['name'], cfg['model']['params']) model.load_state_dict(torch.load(cfg['predict']['checkpoint'], map_location=lambda storage, loc: storage)) model = model.eval().cuda() if cfg['predict']['params'] is None: cfg['predict']['params'] = {} if 'patch' in cfg['evaluation']['params'].keys(): cfg['predict']['params']['patch'] = cfg['evaluation']['params']['patch'] predictor = getattr(factory.evaluate, cfg['predict']['predictor']) predictor = predictor(loader=loader, **cfg['predict']['params']) _, y_pred, _ = predictor.predict(model, criterion=None, epoch=None) if 'percentile' in cfg['predict'].keys() and cfg['predict']['params']['patch']: y_pred = np.percentile(y_pred, cfg['predict']['percentile'], axis=1) if not osp.exists(cfg['predict']['save_preds_dir']): os.makedirs(cfg['predict']['save_preds_dir']) with open(osp.join(cfg['predict']['save_preds_dir'], 'predictions.pkl'), 'wb') as f: pickle.dump({ 'y_pred': y_pred, 'imgfiles': [im.split('/')[-1] for im in images] }, f)
def __init__(self): root_path = os.path.dirname(os.path.dirname(__file__)) if torch.cuda.is_available(): self.device = torch.device("cuda") gpu_ids = list(range(torch.cuda.device_count())) else: self.device = torch.device("cpu") gpu_ids = [] with open( os.path.join(root_path, "src", "configs", "experiment001.yaml")) as f: self.cfg = yaml.load(f, Loader=yaml.FullLoader) torch.hub.set_dir("/tmp") self.model = builder.build_model(self.cfg, 0) self.model.load_state_dict( torch.load( # self.cfg["predict"]["checkpoint"], os.path.join(root_path, "checkpoints", "experiment001", "RET50_019_VM-0.2294.PTH"), map_location=lambda storage, loc: storage, )) self.model = self.model.eval()
def setup(args, cfg, train_df, valid_df): logger = logging.getLogger('root') if cfg['dataset']['name'] == 'FaceMaskDataset': train_images = [ osp.join(cfg['dataset']['data_dir'], '{}'.format(_)) for _ in train_df['imgfile'] ] valid_images = [ osp.join(cfg['dataset']['data_dir'], '{}'.format(_)) for _ in valid_df['imgfile'] ] train_labels = np.asarray(train_df['label']) valid_labels = np.asarray(valid_df['label']) train_masks = [ osp.join(cfg['dataset']['data_dir'], '{}'.format(_)) for _ in train_df['maskfile'] ] valid_masks = [ osp.join(cfg['dataset']['data_dir'], '{}'.format(_)) for _ in valid_df['maskfile'] ] train_loader = builder.build_dataloader(cfg, data_info={ 'imgfiles': train_images, 'maskfiles': train_masks, 'labels': train_labels }, mode='train') valid_loader = builder.build_dataloader(cfg, data_info={ 'imgfiles': valid_images, 'maskfiles': valid_masks, 'labels': valid_labels }, mode='valid') else: train_images = [ osp.join(cfg['dataset']['data_dir'], '{}'.format(_)) for _ in train_df['vidfile'] ] valid_images = [ osp.join(cfg['dataset']['data_dir'], '{}'.format(_)) for _ in valid_df['vidfile'] ] train_labels = np.asarray(train_df['label']) valid_labels = np.asarray(valid_df['label']) train_loader = builder.build_dataloader(cfg, data_info={ 'vidfiles': train_images, 'labels': train_labels }, mode='train') valid_loader = builder.build_dataloader(cfg, data_info={ 'vidfiles': valid_images, 'labels': valid_labels }, mode='valid') # Adjust steps per epoch if necessary (i.e., equal to 0) # We assume if gradient accumulation is specified, then the user # has already adjusted the steps_per_epoch accordingly in the # config file steps_per_epoch = cfg['train']['params']['steps_per_epoch'] gradient_accmul = cfg['train']['params']['gradient_accumulation'] if steps_per_epoch == 0: cfg['train']['params']['steps_per_epoch'] = len(train_loader) # if gradient_accmul > 1: # new_steps_per_epoch = int(cfg['train']['params']['steps_per_epoch'] # / gradient_accmul) # cfg['train']['params']['steps_per_epoch'] = new_steps_per_epoch # Generic build function will work for model/loss logger.info('Building [{}] architecture ...'.format(cfg['model']['name'])) if 'backbone' in cfg['model']['params'].keys(): logger.info(' Using [{}] backbone ...'.format( cfg['model']['params']['backbone'])) if 'pretrained' in cfg['model']['params'].keys(): logger.info(' Pretrained weights : {}'.format( cfg['model']['params']['pretrained'])) model = builder.build_model(cfg['model']['name'], cfg['model']['params']) model = model.train().cuda() if cfg['loss']['params'] is None: cfg['loss']['params'] = {} if re.search(r'^OHEM', cfg['loss']['name']): cfg['loss']['params']['total_steps'] = cfg['train']['params'][ 'num_epochs'] * cfg['train']['params']['steps_per_epoch'] criterion = builder.build_loss(cfg['loss']['name'], cfg['loss']['params']) optimizer = builder.build_optimizer(cfg['optimizer']['name'], model.parameters(), cfg['optimizer']['params']) scheduler = builder.build_scheduler(cfg['scheduler']['name'], optimizer, cfg=cfg) if len(args.gpu) > 1: model = nn.DataParallel(model, device_ids=args.gpu) return cfg, \ train_loader, \ valid_loader, \ model, \ optimizer, \ criterion, \ scheduler
def setup(args, cfg, train_df, valid_df): logger = logging.getLogger('root') if isinstance(cfg['dataset']['data_dir'], list): data_dir_dict = { 2019: cfg['dataset']['data_dir'][0], 2020: cfg['dataset']['data_dir'][1] } if len(cfg['dataset']['data_dir']) == 3: data_dir_dict[2021] = cfg['dataset']['data_dir'][2] train_images = [] for rownum, row in train_df.iterrows(): data_dir = data_dir_dict[row.isic] imgfile = osp.join(data_dir, f'{row.image}.jpg') train_images += [imgfile] valid_images = [] for rownum, row in valid_df.iterrows(): data_dir = data_dir_dict[row.isic] imgfile = osp.join(data_dir, f'{row.image}.jpg') valid_images += [imgfile] else: train_images = [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in train_df['image'].values] valid_images = [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in valid_df['image'].values] train_data_info = { 'imgfiles': train_images, 'labels': train_df['label'].values } valid_data_info = { 'imgfiles': valid_images, 'labels': valid_df['label'].values } if cfg['dataset'].pop('meta', False): train_data_info['meta'] = [dict(age=row['age_cat'],sex=row['sex'],ant=row['anatom_site_general_challenge']) for rownum, row in train_df.iterrows()] valid_data_info['meta'] = [dict(age=row['age_cat'],sex=row['sex'],ant=row['anatom_site_general_challenge']) for rownum, row in valid_df.iterrows()] train_loader = builder.build_dataloader(cfg, data_info=train_data_info, mode='train') valid_loader = builder.build_dataloader(cfg, data_info=valid_data_info, mode='valid') ARC = False if cfg['model']['name'] in ('ArcNet', 'SiameseNet'): ARC = True if 'isic' in train_df.columns: mel_df = train_df[(train_df['label'] == 1) & (train_df['isic'] == 2020)] else: mel_df = train_df[train_df['label'] == 1] mel_df = mel_df.drop_duplicates() arc_data_info = { 'imgfiles': [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in mel_df['image'].values], 'labels': mel_df['label'].values } arc_loader = builder.build_dataloader(cfg, data_info=arc_data_info, mode='predict') print(f'{len(arc_loader)} melanoma examples will be used as reference ...') OHEM = False if 'ohem' in cfg['train']['params'] and cfg['train']['params']['ohem']: print('Creating benign loader ...') OHEM = True benign_df = train_df[train_df['label'] == 0] benign_data_info = { 'imgfiles': [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in benign_df['image'].values], 'labels': benign_df['label'].values } benign_loader = builder.build_dataloader(cfg, data_info=benign_data_info, mode='predict') # Adjust steps per epoch if necessary (i.e., equal to 0) # We assume if gradient accumulation is specified, then the user # has already adjusted the steps_per_epoch accordingly in the # config file steps_per_epoch = cfg['train']['params']['steps_per_epoch'] gradient_accmul = cfg['train']['params']['gradient_accumulation'] if steps_per_epoch == 0: cfg['train']['params']['steps_per_epoch'] = len(train_loader) # Generic build function will work for model/loss logger.info('Building [{}] architecture ...'.format(cfg['model']['name'])) if 'backbone' in cfg['model']['params'].keys(): logger.info(' Using [{}] backbone ...'.format(cfg['model']['params']['backbone'])) if 'pretrained' in cfg['model']['params'].keys(): logger.info(' Pretrained weights : {}'.format(cfg['model']['params']['pretrained'])) model = builder.build_model(cfg['model']['name'], cfg['model']['params']) model = model.train().cuda() if cfg['loss']['params'] is None: cfg['loss']['params'] = {} if re.search(r'^OHEM', cfg['loss']['name']): cfg['loss']['params']['total_steps'] = cfg['train']['params']['num_epochs'] * cfg['train']['params']['steps_per_epoch'] if cfg['loss']['name'] == 'CrossEntropyLoss': weighted = cfg['loss'].pop('weighted', False) if weighted: wts = get_invfreq_weights(train_data_info['labels'], scale=weighted) cfg['loss']['params']['weight'] = torch.tensor(wts) logger.info('Using the following class weights:') for i in range(len(wts)): logger.info(f' Class {i} : {wts[i]:.4f}') criterion = builder.build_loss(cfg['loss']['name'], cfg['loss']['params']) optimizer = builder.build_optimizer( cfg['optimizer']['name'], model.parameters(), cfg['optimizer']['params']) scheduler = builder.build_scheduler( cfg['scheduler']['name'], optimizer, cfg=cfg) if len(args.gpu) > 1: print(f'DEVICES : {args.gpu}') model = nn.DataParallel(model, device_ids=args.gpu) if args.gpu[0] != 0: model.to(f'cuda:{model.device_ids[0]}') if ARC: valid_loader = (valid_loader, arc_loader) if OHEM: train_loader = (train_loader, benign_loader) return cfg, \ train_loader, \ valid_loader, \ model, \ optimizer, \ criterion, \ scheduler
def predict_ensemble(args, cfg): df = pd.read_csv(cfg['predict']['csv_filename']) BATCH_SIZE = None if 'batch_size' in cfg['predict'].keys(): BATCH_SIZE = cfg['predict']['batch_size'] model_cfgs = [] for cfgfile in cfg['model_configs']: with open(cfgfile) as f: model_cfgs.append(yaml.load(f, Loader=yaml.FullLoader)) logger = logging.getLogger('root') logger.info('PREDICT : START') logger.info('PREDICT: n={}'.format(len(df))) images = [osp.join(cfg['predict']['data_dir'], _) for _ in df['imgfile']] male = list(df['male'].astype('float32')) if cfg['predict']['coords']: coords = {k : np.asarray(df[k]) for k in ['x1','y1','x2','y2']} else: coords = None loaders = [] models = [] for model_cfg in model_cfgs: model_cfg = set_inference_batch_size(model_cfg) if 'predict' not in model_cfg.keys(): model_cfg['predict'] = copy.deepcopy(model_cfg['test']) if BATCH_SIZE: model_cfg['predict']['batch_size'] = BATCH_SIZE loaders.append(builder.build_dataloader(model_cfg, data_info={'imgfiles': images, 'labels': [0]*len(images), 'male': male, 'coords': coords}, mode='predict')) model = builder.build_model(model_cfg['model']['name'], model_cfg['model']['params']) model.load_state_dict(torch.load(model_cfg['predict']['checkpoint'], map_location=lambda storage, loc: storage)) model = model.eval().cuda() models.append(model) for model_cfg in model_cfgs: if model_cfg['predict']['params'] is None: model_cfg['predict']['params'] = {} if 'patch' in model_cfg['evaluation']['params'].keys(): model_cfg['predict']['params']['patch'] = model_cfg['evaluation']['params']['patch'] predictors = [] for ind, model_cfg in enumerate(model_cfgs): predictor = getattr(factory.evaluate, model_cfg['predict']['predictor']) predictor = predictor(loader=loaders[ind], **model_cfg['predict']['params']) predictors.append(predictor) y_pred_list = [] for ind, model_cfg in enumerate(model_cfgs): _, y_pred, _ = predictors[ind].predict(models[ind], criterion=None, epoch=None) if 'percentile' in model_cfg['predict'].keys() and model_cfg['predict']['params']['patch']: y_pred = np.percentile(y_pred, model_cfg['predict']['percentile'], axis=1) y_pred_list.append(y_pred) y_pred = np.mean(np.asarray(y_pred_list), axis=0) if not osp.exists(cfg['predict']['save_preds_dir']): os.makedirs(cfg['predict']['save_preds_dir']) with open(osp.join(cfg['predict']['save_preds_dir'], 'predictions.pkl'), 'wb') as f: pickle.dump({ 'y_pred': y_pred, 'imgfiles': [im.split('/')[-1] for im in images] }, f)
def setup(args, cfg, train_df, valid_df): logger = logging.getLogger('root') train_images = [osp.join(cfg['dataset']['data_dir'], _) for _ in train_df['imgfile']] valid_images = [osp.join(cfg['dataset']['data_dir'], _) for _ in valid_df['imgfile']] train_labels = list(train_df['boneage']) valid_labels = list(valid_df['boneage']) train_male = list(train_df['male'].astype('float32')) valid_male = list(valid_df['male'].astype('float32')) if cfg['dataset']['coords']: train_coords = {k : np.asarray(train_df[k]) for k in ['x1','y1','x2','y2']} valid_coords = {k : np.asarray(valid_df[k]) for k in ['x1','y1','x2','y2']} else: train_coords = None valid_coords = None train_loader = builder.build_dataloader(cfg, data_info={'imgfiles': train_images, 'labels': train_labels, 'male': train_male, 'coords': train_coords}, mode='train') valid_loader = builder.build_dataloader(cfg, data_info={'imgfiles': valid_images, 'labels': valid_labels, 'male': valid_male, 'coords': valid_coords}, mode='valid') # Adjust steps per epoch if necessary (i.e., equal to 0) # We assume if gradient accumulation is specified, then the user # has already adjusted the steps_per_epoch accordingly in the # config file steps_per_epoch = cfg['train']['params']['steps_per_epoch'] gradient_accmul = cfg['train']['params']['gradient_accumulation'] if steps_per_epoch == 0: cfg['train']['params']['steps_per_epoch'] = len(train_loader) # Generic build function will work for model/loss logger.info('Building [{}] architecture ...'.format(cfg['model']['name'])) logger.info(' Using [{}] backbone ...'.format(cfg['model']['params']['backbone'])) logger.info(' Pretrained weights : {}'.format(cfg['model']['params']['pretrained'])) model = builder.build_model(cfg['model']['name'], cfg['model']['params']) model = model.train().cuda() if cfg['loss']['name'] == 'BalancedHybridLoss': strata_weights = pd.cut(train_df['boneage'], bins=[0,24]+list(np.arange(12*3, 12*17, 12))+[228], labels=range(16)) strata_weights = pd.DataFrame(strata_weights.value_counts()).reset_index().sort_values('index', ascending=True) strata_weights = strata_weights['boneage'].max() / strata_weights['boneage'] strata_weights = np.asarray(strata_weights) cfg['loss']['params']['strata_weights'] = strata_weights criterion = builder.build_loss(cfg['loss']['name'], cfg['loss']['params']) optimizer = builder.build_optimizer( cfg['optimizer']['name'], model.parameters(), cfg['optimizer']['params']) scheduler = builder.build_scheduler( cfg['scheduler']['name'], optimizer, cfg=cfg) return cfg, \ train_loader, \ valid_loader, \ model, \ optimizer, \ criterion, \ scheduler