예제 #1
0
def setup(args, cfg, train_ann, valid_ann):

    logger = logging.getLogger('root')

    train_loader = builder.build_dataloader(cfg, ann=train_ann, mode='train')
    valid_loader = builder.build_dataloader(cfg, ann=valid_ann, mode='valid')

    # Adjust steps per epoch if necessary (i.e., equal to 0)
    # We assume if gradient accumulation is specified, then the user
    # has already adjusted the steps_per_epoch accordingly in the
    # config file
    steps_per_epoch = cfg['train']['params']['steps_per_epoch']
    gradient_accmul = cfg['train']['params']['gradient_accumulation']
    if steps_per_epoch == 0:
        cfg['train']['params']['steps_per_epoch'] = len(train_loader)

    logger.info('Building [{}] architecture ...'.format(
        cfg['model']['config']))
    model = builder.build_model(cfg, args.gpu)
    model = model.train().cuda()

    optimizer = builder.build_optimizer(cfg['optimizer']['name'], model,
                                        cfg['optimizer']['params'])
    scheduler = builder.build_scheduler(cfg['scheduler']['name'],
                                        optimizer,
                                        cfg=cfg)

    return cfg, \
           train_loader, \
           valid_loader, \
           model, \
           optimizer, \
           scheduler
예제 #2
0
파일: run.py 프로젝트: prithvi1998/DFDC
 def create_model(cfg):
     model = builder.build_model(cfg['model']['name'],
                                 cfg['model']['params'])
     model.load_state_dict(
         torch.load(cfg['test']['checkpoint'],
                    map_location=lambda storage, loc: storage))
     model = model.eval().cuda()
     return model
예제 #3
0
 def create_model(cfg, checkpoint):
     model = builder.build_model(cfg['model']['name'], cfg['model']['params'])
     print('Loading <{}> model from <{}> ...'.format(cfg['model']['name'], checkpoint))
     weights = torch.load(checkpoint, map_location=lambda storage, loc: storage)
     weights = {k.replace('module.', '') : v for k,v in weights.items()}
     model.load_state_dict(weights)
     model = model.eval().cuda()
     return model
예제 #4
0
파일: run.py 프로젝트: prithvi1998/DFDC
def test(args, cfg, test_df):

    if 'csv_filename' in cfg['test'].keys():
        if cfg['test']['csv_filename']:
            test_df = pd.read_csv(cfg['test']['csv_filename'])

    logger = logging.getLogger('root')
    logger.info('TESTING : START')
    logger.info('TEST: n={}'.format(len(test_df)))

    if 'data_dir' in cfg['test'].keys():
        if cfg['test']['data_dir']:
            cfg['dataset']['data_dir'] = cfg['test']['data_dir']

    test_df = test_df[test_df['part'] != 45]
    test_images = [
        osp.join(cfg['dataset']['data_dir'], '{}'.format(_))
        for _ in test_df['vidfile']
    ]
    test_labels = np.asarray(test_df['label'])

    test_loader = builder.build_dataloader(cfg,
                                           data_info={
                                               'vidfiles': test_images,
                                               'labels': test_labels
                                           },
                                           mode='test')

    cfg['model']['params']['pretrained'] = None
    model = builder.build_model(cfg['model']['name'], cfg['model']['params'])
    model.load_state_dict(
        torch.load(cfg['test']['checkpoint'],
                   map_location=lambda storage, loc: storage))
    model = model.eval().cuda()

    if 'params' not in cfg['test'].keys() or type(
            cfg['test']['params']) == type(None):
        cfg['test']['params'] = {}

    predictor = getattr(factory_evaluate, cfg['test']['predictor'])
    predictor = predictor(loader=test_loader, **cfg['test']['params'])

    y_true, y_pred, _ = predictor.predict(model, criterion=None, epoch=None)

    if not osp.exists(cfg['test']['save_preds_dir']):
        os.makedirs(cfg['test']['save_preds_dir'])

    with open(osp.join(cfg['test']['save_preds_dir'], 'predictions.pkl'),
              'wb') as f:
        pickle.dump(
            {
                'y_true': y_true,
                'y_pred': y_pred,
                'imgfiles': [im.split('/')[-1] for im in test_images]
            }, f)
예제 #5
0
def predict(args, cfg):

    df = pd.read_csv(cfg['predict']['csv_filename'])

    logger = logging.getLogger('root')
    logger.info('PREDICT : START')
    logger.info('PREDICT: n={}'.format(len(df)))

    images = [osp.join(cfg['predict']['data_dir'], _) for _ in df['imgfile']]
    male = list(df['male'].astype('float32'))
    if cfg['predict']['coords']:
        coords = {k : np.asarray(df[k]) for k in ['x1','y1','x2','y2']}
    else:
        coords = None

    loader = builder.build_dataloader(cfg, data_info={'imgfiles': images, 'labels': [0]*len(images), 'male': male, 'coords': coords}, mode='predict')

    model = builder.build_model(cfg['model']['name'], cfg['model']['params'])
    model.load_state_dict(torch.load(cfg['predict']['checkpoint'], map_location=lambda storage, loc: storage))
    model = model.eval().cuda()

    if cfg['predict']['params'] is None:
        cfg['predict']['params'] = {}
        if 'patch' in cfg['evaluation']['params'].keys(): 
            cfg['predict']['params']['patch'] = cfg['evaluation']['params']['patch']

    predictor = getattr(factory.evaluate, cfg['predict']['predictor'])
    predictor = predictor(loader=loader,
        **cfg['predict']['params'])

    _, y_pred, _ = predictor.predict(model, criterion=None, epoch=None)

    if 'percentile' in cfg['predict'].keys() and cfg['predict']['params']['patch']:
        y_pred = np.percentile(y_pred, cfg['predict']['percentile'], axis=1)

    if not osp.exists(cfg['predict']['save_preds_dir']):
        os.makedirs(cfg['predict']['save_preds_dir'])

    with open(osp.join(cfg['predict']['save_preds_dir'], 'predictions.pkl'), 'wb') as f:
        pickle.dump({
            'y_pred': y_pred,
            'imgfiles': [im.split('/')[-1] for im in images]
        }, f)
    def __init__(self):
        root_path = os.path.dirname(os.path.dirname(__file__))

        if torch.cuda.is_available():
            self.device = torch.device("cuda")
            gpu_ids = list(range(torch.cuda.device_count()))
        else:
            self.device = torch.device("cpu")
            gpu_ids = []

        with open(
                os.path.join(root_path, "src", "configs",
                             "experiment001.yaml")) as f:
            self.cfg = yaml.load(f, Loader=yaml.FullLoader)
        torch.hub.set_dir("/tmp")
        self.model = builder.build_model(self.cfg, 0)
        self.model.load_state_dict(
            torch.load(
                # self.cfg["predict"]["checkpoint"],
                os.path.join(root_path, "checkpoints", "experiment001",
                             "RET50_019_VM-0.2294.PTH"),
                map_location=lambda storage, loc: storage,
            ))
        self.model = self.model.eval()
예제 #7
0
파일: run.py 프로젝트: prithvi1998/DFDC
def setup(args, cfg, train_df, valid_df):

    logger = logging.getLogger('root')

    if cfg['dataset']['name'] == 'FaceMaskDataset':
        train_images = [
            osp.join(cfg['dataset']['data_dir'], '{}'.format(_))
            for _ in train_df['imgfile']
        ]
        valid_images = [
            osp.join(cfg['dataset']['data_dir'], '{}'.format(_))
            for _ in valid_df['imgfile']
        ]
        train_labels = np.asarray(train_df['label'])
        valid_labels = np.asarray(valid_df['label'])
        train_masks = [
            osp.join(cfg['dataset']['data_dir'], '{}'.format(_))
            for _ in train_df['maskfile']
        ]
        valid_masks = [
            osp.join(cfg['dataset']['data_dir'], '{}'.format(_))
            for _ in valid_df['maskfile']
        ]

        train_loader = builder.build_dataloader(cfg,
                                                data_info={
                                                    'imgfiles': train_images,
                                                    'maskfiles': train_masks,
                                                    'labels': train_labels
                                                },
                                                mode='train')
        valid_loader = builder.build_dataloader(cfg,
                                                data_info={
                                                    'imgfiles': valid_images,
                                                    'maskfiles': valid_masks,
                                                    'labels': valid_labels
                                                },
                                                mode='valid')

    else:
        train_images = [
            osp.join(cfg['dataset']['data_dir'], '{}'.format(_))
            for _ in train_df['vidfile']
        ]
        valid_images = [
            osp.join(cfg['dataset']['data_dir'], '{}'.format(_))
            for _ in valid_df['vidfile']
        ]
        train_labels = np.asarray(train_df['label'])
        valid_labels = np.asarray(valid_df['label'])

        train_loader = builder.build_dataloader(cfg,
                                                data_info={
                                                    'vidfiles': train_images,
                                                    'labels': train_labels
                                                },
                                                mode='train')
        valid_loader = builder.build_dataloader(cfg,
                                                data_info={
                                                    'vidfiles': valid_images,
                                                    'labels': valid_labels
                                                },
                                                mode='valid')

    # Adjust steps per epoch if necessary (i.e., equal to 0)
    # We assume if gradient accumulation is specified, then the user
    # has already adjusted the steps_per_epoch accordingly in the
    # config file
    steps_per_epoch = cfg['train']['params']['steps_per_epoch']
    gradient_accmul = cfg['train']['params']['gradient_accumulation']
    if steps_per_epoch == 0:
        cfg['train']['params']['steps_per_epoch'] = len(train_loader)
        # if gradient_accmul > 1:
        #     new_steps_per_epoch = int(cfg['train']['params']['steps_per_epoch']
        #                               / gradient_accmul)
        #     cfg['train']['params']['steps_per_epoch'] = new_steps_per_epoch

    # Generic build function will work for model/loss
    logger.info('Building [{}] architecture ...'.format(cfg['model']['name']))
    if 'backbone' in cfg['model']['params'].keys():
        logger.info('  Using [{}] backbone ...'.format(
            cfg['model']['params']['backbone']))
    if 'pretrained' in cfg['model']['params'].keys():
        logger.info('  Pretrained weights : {}'.format(
            cfg['model']['params']['pretrained']))
    model = builder.build_model(cfg['model']['name'], cfg['model']['params'])
    model = model.train().cuda()

    if cfg['loss']['params'] is None:
        cfg['loss']['params'] = {}

    if re.search(r'^OHEM', cfg['loss']['name']):
        cfg['loss']['params']['total_steps'] = cfg['train']['params'][
            'num_epochs'] * cfg['train']['params']['steps_per_epoch']

    criterion = builder.build_loss(cfg['loss']['name'], cfg['loss']['params'])
    optimizer = builder.build_optimizer(cfg['optimizer']['name'],
                                        model.parameters(),
                                        cfg['optimizer']['params'])
    scheduler = builder.build_scheduler(cfg['scheduler']['name'],
                                        optimizer,
                                        cfg=cfg)

    if len(args.gpu) > 1:
        model = nn.DataParallel(model, device_ids=args.gpu)

    return cfg, \
           train_loader, \
           valid_loader, \
           model, \
           optimizer, \
           criterion, \
           scheduler
예제 #8
0
def setup(args, cfg, train_df, valid_df):

    logger = logging.getLogger('root')

    if isinstance(cfg['dataset']['data_dir'], list):
        data_dir_dict = {
            2019: cfg['dataset']['data_dir'][0],
            2020: cfg['dataset']['data_dir'][1]
        }
        if len(cfg['dataset']['data_dir']) == 3:
            data_dir_dict[2021] = cfg['dataset']['data_dir'][2]
        train_images = []
        for rownum, row in train_df.iterrows():
            data_dir = data_dir_dict[row.isic]
            imgfile = osp.join(data_dir, f'{row.image}.jpg')
            train_images += [imgfile]
        valid_images = []
        for rownum, row in valid_df.iterrows():
            data_dir = data_dir_dict[row.isic]
            imgfile = osp.join(data_dir, f'{row.image}.jpg')
            valid_images += [imgfile]
    else:
        train_images = [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in train_df['image'].values]
        valid_images = [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in valid_df['image'].values]
    train_data_info = {
        'imgfiles': train_images,
        'labels': train_df['label'].values
    }
    valid_data_info = {
        'imgfiles': valid_images,
        'labels': valid_df['label'].values
    }
    if cfg['dataset'].pop('meta', False):
        train_data_info['meta'] = [dict(age=row['age_cat'],sex=row['sex'],ant=row['anatom_site_general_challenge']) for rownum, row in train_df.iterrows()]
        valid_data_info['meta'] = [dict(age=row['age_cat'],sex=row['sex'],ant=row['anatom_site_general_challenge']) for rownum, row in valid_df.iterrows()]
    train_loader = builder.build_dataloader(cfg, data_info=train_data_info, mode='train')
    valid_loader = builder.build_dataloader(cfg, data_info=valid_data_info, mode='valid')

    ARC = False
    if cfg['model']['name'] in ('ArcNet', 'SiameseNet'):
        ARC = True
        if 'isic' in train_df.columns:
            mel_df = train_df[(train_df['label'] == 1) & (train_df['isic'] == 2020)]
        else:
            mel_df = train_df[train_df['label'] == 1]
        mel_df = mel_df.drop_duplicates()
        arc_data_info = {
            'imgfiles': [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in mel_df['image'].values],
            'labels': mel_df['label'].values        
        }
        arc_loader = builder.build_dataloader(cfg, data_info=arc_data_info, mode='predict')
        print(f'{len(arc_loader)} melanoma examples will be used as reference ...')

    OHEM = False
    if 'ohem' in cfg['train']['params'] and cfg['train']['params']['ohem']:
        print('Creating benign loader ...')
        OHEM = True
        benign_df = train_df[train_df['label'] == 0]
        benign_data_info = {
            'imgfiles': [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in benign_df['image'].values],
            'labels': benign_df['label'].values        
        }
        benign_loader = builder.build_dataloader(cfg, data_info=benign_data_info, mode='predict')

    # Adjust steps per epoch if necessary (i.e., equal to 0)
    # We assume if gradient accumulation is specified, then the user
    # has already adjusted the steps_per_epoch accordingly in the 
    # config file
    steps_per_epoch = cfg['train']['params']['steps_per_epoch']
    gradient_accmul = cfg['train']['params']['gradient_accumulation']
    if steps_per_epoch == 0:
        cfg['train']['params']['steps_per_epoch'] = len(train_loader)

    # Generic build function will work for model/loss
    logger.info('Building [{}] architecture ...'.format(cfg['model']['name']))
    if 'backbone' in cfg['model']['params'].keys():
        logger.info('  Using [{}] backbone ...'.format(cfg['model']['params']['backbone']))
    if 'pretrained' in cfg['model']['params'].keys():
        logger.info('  Pretrained weights : {}'.format(cfg['model']['params']['pretrained']))
    model = builder.build_model(cfg['model']['name'], cfg['model']['params'])
    model = model.train().cuda()

    if cfg['loss']['params'] is None:
        cfg['loss']['params'] = {}

    if re.search(r'^OHEM', cfg['loss']['name']):
        cfg['loss']['params']['total_steps'] = cfg['train']['params']['num_epochs'] * cfg['train']['params']['steps_per_epoch']

    if cfg['loss']['name'] == 'CrossEntropyLoss':
        weighted = cfg['loss'].pop('weighted', False)
        if weighted:
            wts = get_invfreq_weights(train_data_info['labels'], scale=weighted)
            cfg['loss']['params']['weight'] = torch.tensor(wts)
            logger.info('Using the following class weights:')
            for i in range(len(wts)):
                logger.info(f'  Class {i} : {wts[i]:.4f}')

    criterion = builder.build_loss(cfg['loss']['name'], cfg['loss']['params'])
    optimizer = builder.build_optimizer(
        cfg['optimizer']['name'], 
        model.parameters(), 
        cfg['optimizer']['params'])
    scheduler = builder.build_scheduler(
        cfg['scheduler']['name'], 
        optimizer, 
        cfg=cfg)

    if len(args.gpu) > 1:
        print(f'DEVICES : {args.gpu}')
        model = nn.DataParallel(model, device_ids=args.gpu)
        if args.gpu[0] != 0:
            model.to(f'cuda:{model.device_ids[0]}')
            
    if ARC: valid_loader = (valid_loader, arc_loader)
    if OHEM: train_loader = (train_loader, benign_loader)
    
    return cfg, \
           train_loader, \
           valid_loader, \
           model, \
           optimizer, \
           criterion, \
           scheduler
예제 #9
0
def predict_ensemble(args, cfg):

    df = pd.read_csv(cfg['predict']['csv_filename'])

    BATCH_SIZE = None
    if 'batch_size' in cfg['predict'].keys():
        BATCH_SIZE = cfg['predict']['batch_size']

    model_cfgs = []
    for cfgfile in cfg['model_configs']:
        with open(cfgfile) as f:
            model_cfgs.append(yaml.load(f, Loader=yaml.FullLoader))


    logger = logging.getLogger('root')
    logger.info('PREDICT : START')
    logger.info('PREDICT: n={}'.format(len(df)))

    images = [osp.join(cfg['predict']['data_dir'], _) for _ in df['imgfile']]
    male = list(df['male'].astype('float32'))
    if cfg['predict']['coords']:
        coords = {k : np.asarray(df[k]) for k in ['x1','y1','x2','y2']}
    else:
        coords = None

    loaders = []
    models  = []
    for model_cfg in model_cfgs:
        model_cfg = set_inference_batch_size(model_cfg)
        if 'predict' not in model_cfg.keys():
            model_cfg['predict'] = copy.deepcopy(model_cfg['test'])
        if BATCH_SIZE:
            model_cfg['predict']['batch_size'] = BATCH_SIZE
        loaders.append(builder.build_dataloader(model_cfg, data_info={'imgfiles': images, 'labels': [0]*len(images), 'male': male, 'coords': coords}, mode='predict'))
        model = builder.build_model(model_cfg['model']['name'], model_cfg['model']['params'])
        model.load_state_dict(torch.load(model_cfg['predict']['checkpoint'], map_location=lambda storage, loc: storage))
        model = model.eval().cuda()
        models.append(model)

    for model_cfg in model_cfgs:
        if model_cfg['predict']['params'] is None:
            model_cfg['predict']['params'] = {}
            if 'patch' in model_cfg['evaluation']['params'].keys(): 
                model_cfg['predict']['params']['patch'] = model_cfg['evaluation']['params']['patch']

    predictors = []
    for ind, model_cfg in enumerate(model_cfgs):
        predictor = getattr(factory.evaluate, model_cfg['predict']['predictor'])
        predictor = predictor(loader=loaders[ind],
            **model_cfg['predict']['params'])
        predictors.append(predictor)

    y_pred_list = []
    for ind, model_cfg in enumerate(model_cfgs):
        _, y_pred, _ = predictors[ind].predict(models[ind], criterion=None, epoch=None)
        if 'percentile' in model_cfg['predict'].keys() and model_cfg['predict']['params']['patch']:
            y_pred = np.percentile(y_pred, model_cfg['predict']['percentile'], axis=1)
        y_pred_list.append(y_pred)

    y_pred = np.mean(np.asarray(y_pred_list), axis=0)

    if not osp.exists(cfg['predict']['save_preds_dir']):
        os.makedirs(cfg['predict']['save_preds_dir'])

    with open(osp.join(cfg['predict']['save_preds_dir'], 'predictions.pkl'), 'wb') as f:
        pickle.dump({
            'y_pred': y_pred,
            'imgfiles': [im.split('/')[-1] for im in images]
        }, f)
예제 #10
0
def setup(args, cfg, train_df, valid_df):

    logger = logging.getLogger('root')

    train_images = [osp.join(cfg['dataset']['data_dir'], _) for _ in train_df['imgfile']]
    valid_images = [osp.join(cfg['dataset']['data_dir'], _) for _ in valid_df['imgfile']]

    train_labels = list(train_df['boneage'])
    valid_labels = list(valid_df['boneage'])

    train_male = list(train_df['male'].astype('float32'))
    valid_male = list(valid_df['male'].astype('float32'))

    if cfg['dataset']['coords']:
        train_coords = {k : np.asarray(train_df[k]) for k in ['x1','y1','x2','y2']}
        valid_coords = {k : np.asarray(valid_df[k]) for k in ['x1','y1','x2','y2']}
    else:
        train_coords = None
        valid_coords = None

    train_loader = builder.build_dataloader(cfg, data_info={'imgfiles': train_images, 'labels': train_labels, 'male': train_male, 'coords': train_coords}, mode='train')
    valid_loader = builder.build_dataloader(cfg, data_info={'imgfiles': valid_images, 'labels': valid_labels, 'male': valid_male, 'coords': valid_coords}, mode='valid')
    
    # Adjust steps per epoch if necessary (i.e., equal to 0)
    # We assume if gradient accumulation is specified, then the user
    # has already adjusted the steps_per_epoch accordingly in the 
    # config file
    steps_per_epoch = cfg['train']['params']['steps_per_epoch']
    gradient_accmul = cfg['train']['params']['gradient_accumulation']
    if steps_per_epoch == 0:
        cfg['train']['params']['steps_per_epoch'] = len(train_loader)


    # Generic build function will work for model/loss
    logger.info('Building [{}] architecture ...'.format(cfg['model']['name']))
    logger.info('  Using [{}] backbone ...'.format(cfg['model']['params']['backbone']))
    logger.info('  Pretrained weights : {}'.format(cfg['model']['params']['pretrained']))
    model = builder.build_model(cfg['model']['name'], cfg['model']['params'])
    model = model.train().cuda()

    if cfg['loss']['name'] == 'BalancedHybridLoss':
        strata_weights = pd.cut(train_df['boneage'], bins=[0,24]+list(np.arange(12*3, 12*17, 12))+[228], labels=range(16))
        strata_weights = pd.DataFrame(strata_weights.value_counts()).reset_index().sort_values('index', ascending=True)
        strata_weights = strata_weights['boneage'].max() / strata_weights['boneage']
        strata_weights = np.asarray(strata_weights)
        cfg['loss']['params']['strata_weights'] = strata_weights
    criterion = builder.build_loss(cfg['loss']['name'], cfg['loss']['params'])
    optimizer = builder.build_optimizer(
        cfg['optimizer']['name'], 
        model.parameters(), 
        cfg['optimizer']['params'])
    scheduler = builder.build_scheduler(
        cfg['scheduler']['name'], 
        optimizer, 
        cfg=cfg)

    return cfg, \
           train_loader, \
           valid_loader, \
           model, \
           optimizer, \
           criterion, \
           scheduler