def setup(args, cfg, train_ann, valid_ann): logger = logging.getLogger('root') train_loader = builder.build_dataloader(cfg, ann=train_ann, mode='train') valid_loader = builder.build_dataloader(cfg, ann=valid_ann, mode='valid') # Adjust steps per epoch if necessary (i.e., equal to 0) # We assume if gradient accumulation is specified, then the user # has already adjusted the steps_per_epoch accordingly in the # config file steps_per_epoch = cfg['train']['params']['steps_per_epoch'] gradient_accmul = cfg['train']['params']['gradient_accumulation'] if steps_per_epoch == 0: cfg['train']['params']['steps_per_epoch'] = len(train_loader) logger.info('Building [{}] architecture ...'.format( cfg['model']['config'])) model = builder.build_model(cfg, args.gpu) model = model.train().cuda() optimizer = builder.build_optimizer(cfg['optimizer']['name'], model, cfg['optimizer']['params']) scheduler = builder.build_scheduler(cfg['scheduler']['name'], optimizer, cfg=cfg) return cfg, \ train_loader, \ valid_loader, \ model, \ optimizer, \ scheduler
def setup(args, cfg, train_df, valid_df): logger = logging.getLogger('root') if cfg['dataset']['name'] == 'FaceMaskDataset': train_images = [ osp.join(cfg['dataset']['data_dir'], '{}'.format(_)) for _ in train_df['imgfile'] ] valid_images = [ osp.join(cfg['dataset']['data_dir'], '{}'.format(_)) for _ in valid_df['imgfile'] ] train_labels = np.asarray(train_df['label']) valid_labels = np.asarray(valid_df['label']) train_masks = [ osp.join(cfg['dataset']['data_dir'], '{}'.format(_)) for _ in train_df['maskfile'] ] valid_masks = [ osp.join(cfg['dataset']['data_dir'], '{}'.format(_)) for _ in valid_df['maskfile'] ] train_loader = builder.build_dataloader(cfg, data_info={ 'imgfiles': train_images, 'maskfiles': train_masks, 'labels': train_labels }, mode='train') valid_loader = builder.build_dataloader(cfg, data_info={ 'imgfiles': valid_images, 'maskfiles': valid_masks, 'labels': valid_labels }, mode='valid') else: train_images = [ osp.join(cfg['dataset']['data_dir'], '{}'.format(_)) for _ in train_df['vidfile'] ] valid_images = [ osp.join(cfg['dataset']['data_dir'], '{}'.format(_)) for _ in valid_df['vidfile'] ] train_labels = np.asarray(train_df['label']) valid_labels = np.asarray(valid_df['label']) train_loader = builder.build_dataloader(cfg, data_info={ 'vidfiles': train_images, 'labels': train_labels }, mode='train') valid_loader = builder.build_dataloader(cfg, data_info={ 'vidfiles': valid_images, 'labels': valid_labels }, mode='valid') # Adjust steps per epoch if necessary (i.e., equal to 0) # We assume if gradient accumulation is specified, then the user # has already adjusted the steps_per_epoch accordingly in the # config file steps_per_epoch = cfg['train']['params']['steps_per_epoch'] gradient_accmul = cfg['train']['params']['gradient_accumulation'] if steps_per_epoch == 0: cfg['train']['params']['steps_per_epoch'] = len(train_loader) # if gradient_accmul > 1: # new_steps_per_epoch = int(cfg['train']['params']['steps_per_epoch'] # / gradient_accmul) # cfg['train']['params']['steps_per_epoch'] = new_steps_per_epoch # Generic build function will work for model/loss logger.info('Building [{}] architecture ...'.format(cfg['model']['name'])) if 'backbone' in cfg['model']['params'].keys(): logger.info(' Using [{}] backbone ...'.format( cfg['model']['params']['backbone'])) if 'pretrained' in cfg['model']['params'].keys(): logger.info(' Pretrained weights : {}'.format( cfg['model']['params']['pretrained'])) model = builder.build_model(cfg['model']['name'], cfg['model']['params']) model = model.train().cuda() if cfg['loss']['params'] is None: cfg['loss']['params'] = {} if re.search(r'^OHEM', cfg['loss']['name']): cfg['loss']['params']['total_steps'] = cfg['train']['params'][ 'num_epochs'] * cfg['train']['params']['steps_per_epoch'] criterion = builder.build_loss(cfg['loss']['name'], cfg['loss']['params']) optimizer = builder.build_optimizer(cfg['optimizer']['name'], model.parameters(), cfg['optimizer']['params']) scheduler = builder.build_scheduler(cfg['scheduler']['name'], optimizer, cfg=cfg) if len(args.gpu) > 1: model = nn.DataParallel(model, device_ids=args.gpu) return cfg, \ train_loader, \ valid_loader, \ model, \ optimizer, \ criterion, \ scheduler
def setup(args, cfg, train_df, valid_df): logger = logging.getLogger('root') if isinstance(cfg['dataset']['data_dir'], list): data_dir_dict = { 2019: cfg['dataset']['data_dir'][0], 2020: cfg['dataset']['data_dir'][1] } if len(cfg['dataset']['data_dir']) == 3: data_dir_dict[2021] = cfg['dataset']['data_dir'][2] train_images = [] for rownum, row in train_df.iterrows(): data_dir = data_dir_dict[row.isic] imgfile = osp.join(data_dir, f'{row.image}.jpg') train_images += [imgfile] valid_images = [] for rownum, row in valid_df.iterrows(): data_dir = data_dir_dict[row.isic] imgfile = osp.join(data_dir, f'{row.image}.jpg') valid_images += [imgfile] else: train_images = [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in train_df['image'].values] valid_images = [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in valid_df['image'].values] train_data_info = { 'imgfiles': train_images, 'labels': train_df['label'].values } valid_data_info = { 'imgfiles': valid_images, 'labels': valid_df['label'].values } if cfg['dataset'].pop('meta', False): train_data_info['meta'] = [dict(age=row['age_cat'],sex=row['sex'],ant=row['anatom_site_general_challenge']) for rownum, row in train_df.iterrows()] valid_data_info['meta'] = [dict(age=row['age_cat'],sex=row['sex'],ant=row['anatom_site_general_challenge']) for rownum, row in valid_df.iterrows()] train_loader = builder.build_dataloader(cfg, data_info=train_data_info, mode='train') valid_loader = builder.build_dataloader(cfg, data_info=valid_data_info, mode='valid') ARC = False if cfg['model']['name'] in ('ArcNet', 'SiameseNet'): ARC = True if 'isic' in train_df.columns: mel_df = train_df[(train_df['label'] == 1) & (train_df['isic'] == 2020)] else: mel_df = train_df[train_df['label'] == 1] mel_df = mel_df.drop_duplicates() arc_data_info = { 'imgfiles': [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in mel_df['image'].values], 'labels': mel_df['label'].values } arc_loader = builder.build_dataloader(cfg, data_info=arc_data_info, mode='predict') print(f'{len(arc_loader)} melanoma examples will be used as reference ...') OHEM = False if 'ohem' in cfg['train']['params'] and cfg['train']['params']['ohem']: print('Creating benign loader ...') OHEM = True benign_df = train_df[train_df['label'] == 0] benign_data_info = { 'imgfiles': [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in benign_df['image'].values], 'labels': benign_df['label'].values } benign_loader = builder.build_dataloader(cfg, data_info=benign_data_info, mode='predict') # Adjust steps per epoch if necessary (i.e., equal to 0) # We assume if gradient accumulation is specified, then the user # has already adjusted the steps_per_epoch accordingly in the # config file steps_per_epoch = cfg['train']['params']['steps_per_epoch'] gradient_accmul = cfg['train']['params']['gradient_accumulation'] if steps_per_epoch == 0: cfg['train']['params']['steps_per_epoch'] = len(train_loader) # Generic build function will work for model/loss logger.info('Building [{}] architecture ...'.format(cfg['model']['name'])) if 'backbone' in cfg['model']['params'].keys(): logger.info(' Using [{}] backbone ...'.format(cfg['model']['params']['backbone'])) if 'pretrained' in cfg['model']['params'].keys(): logger.info(' Pretrained weights : {}'.format(cfg['model']['params']['pretrained'])) model = builder.build_model(cfg['model']['name'], cfg['model']['params']) model = model.train().cuda() if cfg['loss']['params'] is None: cfg['loss']['params'] = {} if re.search(r'^OHEM', cfg['loss']['name']): cfg['loss']['params']['total_steps'] = cfg['train']['params']['num_epochs'] * cfg['train']['params']['steps_per_epoch'] if cfg['loss']['name'] == 'CrossEntropyLoss': weighted = cfg['loss'].pop('weighted', False) if weighted: wts = get_invfreq_weights(train_data_info['labels'], scale=weighted) cfg['loss']['params']['weight'] = torch.tensor(wts) logger.info('Using the following class weights:') for i in range(len(wts)): logger.info(f' Class {i} : {wts[i]:.4f}') criterion = builder.build_loss(cfg['loss']['name'], cfg['loss']['params']) optimizer = builder.build_optimizer( cfg['optimizer']['name'], model.parameters(), cfg['optimizer']['params']) scheduler = builder.build_scheduler( cfg['scheduler']['name'], optimizer, cfg=cfg) if len(args.gpu) > 1: print(f'DEVICES : {args.gpu}') model = nn.DataParallel(model, device_ids=args.gpu) if args.gpu[0] != 0: model.to(f'cuda:{model.device_ids[0]}') if ARC: valid_loader = (valid_loader, arc_loader) if OHEM: train_loader = (train_loader, benign_loader) return cfg, \ train_loader, \ valid_loader, \ model, \ optimizer, \ criterion, \ scheduler
def setup(args, cfg, train_df, valid_df): logger = logging.getLogger('root') train_images = [osp.join(cfg['dataset']['data_dir'], _) for _ in train_df['imgfile']] valid_images = [osp.join(cfg['dataset']['data_dir'], _) for _ in valid_df['imgfile']] train_labels = list(train_df['boneage']) valid_labels = list(valid_df['boneage']) train_male = list(train_df['male'].astype('float32')) valid_male = list(valid_df['male'].astype('float32')) if cfg['dataset']['coords']: train_coords = {k : np.asarray(train_df[k]) for k in ['x1','y1','x2','y2']} valid_coords = {k : np.asarray(valid_df[k]) for k in ['x1','y1','x2','y2']} else: train_coords = None valid_coords = None train_loader = builder.build_dataloader(cfg, data_info={'imgfiles': train_images, 'labels': train_labels, 'male': train_male, 'coords': train_coords}, mode='train') valid_loader = builder.build_dataloader(cfg, data_info={'imgfiles': valid_images, 'labels': valid_labels, 'male': valid_male, 'coords': valid_coords}, mode='valid') # Adjust steps per epoch if necessary (i.e., equal to 0) # We assume if gradient accumulation is specified, then the user # has already adjusted the steps_per_epoch accordingly in the # config file steps_per_epoch = cfg['train']['params']['steps_per_epoch'] gradient_accmul = cfg['train']['params']['gradient_accumulation'] if steps_per_epoch == 0: cfg['train']['params']['steps_per_epoch'] = len(train_loader) # Generic build function will work for model/loss logger.info('Building [{}] architecture ...'.format(cfg['model']['name'])) logger.info(' Using [{}] backbone ...'.format(cfg['model']['params']['backbone'])) logger.info(' Pretrained weights : {}'.format(cfg['model']['params']['pretrained'])) model = builder.build_model(cfg['model']['name'], cfg['model']['params']) model = model.train().cuda() if cfg['loss']['name'] == 'BalancedHybridLoss': strata_weights = pd.cut(train_df['boneage'], bins=[0,24]+list(np.arange(12*3, 12*17, 12))+[228], labels=range(16)) strata_weights = pd.DataFrame(strata_weights.value_counts()).reset_index().sort_values('index', ascending=True) strata_weights = strata_weights['boneage'].max() / strata_weights['boneage'] strata_weights = np.asarray(strata_weights) cfg['loss']['params']['strata_weights'] = strata_weights criterion = builder.build_loss(cfg['loss']['name'], cfg['loss']['params']) optimizer = builder.build_optimizer( cfg['optimizer']['name'], model.parameters(), cfg['optimizer']['params']) scheduler = builder.build_scheduler( cfg['scheduler']['name'], optimizer, cfg=cfg) return cfg, \ train_loader, \ valid_loader, \ model, \ optimizer, \ criterion, \ scheduler