예제 #1
0
def setup(args, cfg, train_ann, valid_ann):

    logger = logging.getLogger('root')

    train_loader = builder.build_dataloader(cfg, ann=train_ann, mode='train')
    valid_loader = builder.build_dataloader(cfg, ann=valid_ann, mode='valid')

    # Adjust steps per epoch if necessary (i.e., equal to 0)
    # We assume if gradient accumulation is specified, then the user
    # has already adjusted the steps_per_epoch accordingly in the
    # config file
    steps_per_epoch = cfg['train']['params']['steps_per_epoch']
    gradient_accmul = cfg['train']['params']['gradient_accumulation']
    if steps_per_epoch == 0:
        cfg['train']['params']['steps_per_epoch'] = len(train_loader)

    logger.info('Building [{}] architecture ...'.format(
        cfg['model']['config']))
    model = builder.build_model(cfg, args.gpu)
    model = model.train().cuda()

    optimizer = builder.build_optimizer(cfg['optimizer']['name'], model,
                                        cfg['optimizer']['params'])
    scheduler = builder.build_scheduler(cfg['scheduler']['name'],
                                        optimizer,
                                        cfg=cfg)

    return cfg, \
           train_loader, \
           valid_loader, \
           model, \
           optimizer, \
           scheduler
예제 #2
0
파일: run.py 프로젝트: prithvi1998/DFDC
def setup(args, cfg, train_df, valid_df):

    logger = logging.getLogger('root')

    if cfg['dataset']['name'] == 'FaceMaskDataset':
        train_images = [
            osp.join(cfg['dataset']['data_dir'], '{}'.format(_))
            for _ in train_df['imgfile']
        ]
        valid_images = [
            osp.join(cfg['dataset']['data_dir'], '{}'.format(_))
            for _ in valid_df['imgfile']
        ]
        train_labels = np.asarray(train_df['label'])
        valid_labels = np.asarray(valid_df['label'])
        train_masks = [
            osp.join(cfg['dataset']['data_dir'], '{}'.format(_))
            for _ in train_df['maskfile']
        ]
        valid_masks = [
            osp.join(cfg['dataset']['data_dir'], '{}'.format(_))
            for _ in valid_df['maskfile']
        ]

        train_loader = builder.build_dataloader(cfg,
                                                data_info={
                                                    'imgfiles': train_images,
                                                    'maskfiles': train_masks,
                                                    'labels': train_labels
                                                },
                                                mode='train')
        valid_loader = builder.build_dataloader(cfg,
                                                data_info={
                                                    'imgfiles': valid_images,
                                                    'maskfiles': valid_masks,
                                                    'labels': valid_labels
                                                },
                                                mode='valid')

    else:
        train_images = [
            osp.join(cfg['dataset']['data_dir'], '{}'.format(_))
            for _ in train_df['vidfile']
        ]
        valid_images = [
            osp.join(cfg['dataset']['data_dir'], '{}'.format(_))
            for _ in valid_df['vidfile']
        ]
        train_labels = np.asarray(train_df['label'])
        valid_labels = np.asarray(valid_df['label'])

        train_loader = builder.build_dataloader(cfg,
                                                data_info={
                                                    'vidfiles': train_images,
                                                    'labels': train_labels
                                                },
                                                mode='train')
        valid_loader = builder.build_dataloader(cfg,
                                                data_info={
                                                    'vidfiles': valid_images,
                                                    'labels': valid_labels
                                                },
                                                mode='valid')

    # Adjust steps per epoch if necessary (i.e., equal to 0)
    # We assume if gradient accumulation is specified, then the user
    # has already adjusted the steps_per_epoch accordingly in the
    # config file
    steps_per_epoch = cfg['train']['params']['steps_per_epoch']
    gradient_accmul = cfg['train']['params']['gradient_accumulation']
    if steps_per_epoch == 0:
        cfg['train']['params']['steps_per_epoch'] = len(train_loader)
        # if gradient_accmul > 1:
        #     new_steps_per_epoch = int(cfg['train']['params']['steps_per_epoch']
        #                               / gradient_accmul)
        #     cfg['train']['params']['steps_per_epoch'] = new_steps_per_epoch

    # Generic build function will work for model/loss
    logger.info('Building [{}] architecture ...'.format(cfg['model']['name']))
    if 'backbone' in cfg['model']['params'].keys():
        logger.info('  Using [{}] backbone ...'.format(
            cfg['model']['params']['backbone']))
    if 'pretrained' in cfg['model']['params'].keys():
        logger.info('  Pretrained weights : {}'.format(
            cfg['model']['params']['pretrained']))
    model = builder.build_model(cfg['model']['name'], cfg['model']['params'])
    model = model.train().cuda()

    if cfg['loss']['params'] is None:
        cfg['loss']['params'] = {}

    if re.search(r'^OHEM', cfg['loss']['name']):
        cfg['loss']['params']['total_steps'] = cfg['train']['params'][
            'num_epochs'] * cfg['train']['params']['steps_per_epoch']

    criterion = builder.build_loss(cfg['loss']['name'], cfg['loss']['params'])
    optimizer = builder.build_optimizer(cfg['optimizer']['name'],
                                        model.parameters(),
                                        cfg['optimizer']['params'])
    scheduler = builder.build_scheduler(cfg['scheduler']['name'],
                                        optimizer,
                                        cfg=cfg)

    if len(args.gpu) > 1:
        model = nn.DataParallel(model, device_ids=args.gpu)

    return cfg, \
           train_loader, \
           valid_loader, \
           model, \
           optimizer, \
           criterion, \
           scheduler
예제 #3
0
def setup(args, cfg, train_df, valid_df):

    logger = logging.getLogger('root')

    if isinstance(cfg['dataset']['data_dir'], list):
        data_dir_dict = {
            2019: cfg['dataset']['data_dir'][0],
            2020: cfg['dataset']['data_dir'][1]
        }
        if len(cfg['dataset']['data_dir']) == 3:
            data_dir_dict[2021] = cfg['dataset']['data_dir'][2]
        train_images = []
        for rownum, row in train_df.iterrows():
            data_dir = data_dir_dict[row.isic]
            imgfile = osp.join(data_dir, f'{row.image}.jpg')
            train_images += [imgfile]
        valid_images = []
        for rownum, row in valid_df.iterrows():
            data_dir = data_dir_dict[row.isic]
            imgfile = osp.join(data_dir, f'{row.image}.jpg')
            valid_images += [imgfile]
    else:
        train_images = [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in train_df['image'].values]
        valid_images = [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in valid_df['image'].values]
    train_data_info = {
        'imgfiles': train_images,
        'labels': train_df['label'].values
    }
    valid_data_info = {
        'imgfiles': valid_images,
        'labels': valid_df['label'].values
    }
    if cfg['dataset'].pop('meta', False):
        train_data_info['meta'] = [dict(age=row['age_cat'],sex=row['sex'],ant=row['anatom_site_general_challenge']) for rownum, row in train_df.iterrows()]
        valid_data_info['meta'] = [dict(age=row['age_cat'],sex=row['sex'],ant=row['anatom_site_general_challenge']) for rownum, row in valid_df.iterrows()]
    train_loader = builder.build_dataloader(cfg, data_info=train_data_info, mode='train')
    valid_loader = builder.build_dataloader(cfg, data_info=valid_data_info, mode='valid')

    ARC = False
    if cfg['model']['name'] in ('ArcNet', 'SiameseNet'):
        ARC = True
        if 'isic' in train_df.columns:
            mel_df = train_df[(train_df['label'] == 1) & (train_df['isic'] == 2020)]
        else:
            mel_df = train_df[train_df['label'] == 1]
        mel_df = mel_df.drop_duplicates()
        arc_data_info = {
            'imgfiles': [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in mel_df['image'].values],
            'labels': mel_df['label'].values        
        }
        arc_loader = builder.build_dataloader(cfg, data_info=arc_data_info, mode='predict')
        print(f'{len(arc_loader)} melanoma examples will be used as reference ...')

    OHEM = False
    if 'ohem' in cfg['train']['params'] and cfg['train']['params']['ohem']:
        print('Creating benign loader ...')
        OHEM = True
        benign_df = train_df[train_df['label'] == 0]
        benign_data_info = {
            'imgfiles': [osp.join(cfg['dataset']['data_dir'], f'{_}.jpg') for _ in benign_df['image'].values],
            'labels': benign_df['label'].values        
        }
        benign_loader = builder.build_dataloader(cfg, data_info=benign_data_info, mode='predict')

    # Adjust steps per epoch if necessary (i.e., equal to 0)
    # We assume if gradient accumulation is specified, then the user
    # has already adjusted the steps_per_epoch accordingly in the 
    # config file
    steps_per_epoch = cfg['train']['params']['steps_per_epoch']
    gradient_accmul = cfg['train']['params']['gradient_accumulation']
    if steps_per_epoch == 0:
        cfg['train']['params']['steps_per_epoch'] = len(train_loader)

    # Generic build function will work for model/loss
    logger.info('Building [{}] architecture ...'.format(cfg['model']['name']))
    if 'backbone' in cfg['model']['params'].keys():
        logger.info('  Using [{}] backbone ...'.format(cfg['model']['params']['backbone']))
    if 'pretrained' in cfg['model']['params'].keys():
        logger.info('  Pretrained weights : {}'.format(cfg['model']['params']['pretrained']))
    model = builder.build_model(cfg['model']['name'], cfg['model']['params'])
    model = model.train().cuda()

    if cfg['loss']['params'] is None:
        cfg['loss']['params'] = {}

    if re.search(r'^OHEM', cfg['loss']['name']):
        cfg['loss']['params']['total_steps'] = cfg['train']['params']['num_epochs'] * cfg['train']['params']['steps_per_epoch']

    if cfg['loss']['name'] == 'CrossEntropyLoss':
        weighted = cfg['loss'].pop('weighted', False)
        if weighted:
            wts = get_invfreq_weights(train_data_info['labels'], scale=weighted)
            cfg['loss']['params']['weight'] = torch.tensor(wts)
            logger.info('Using the following class weights:')
            for i in range(len(wts)):
                logger.info(f'  Class {i} : {wts[i]:.4f}')

    criterion = builder.build_loss(cfg['loss']['name'], cfg['loss']['params'])
    optimizer = builder.build_optimizer(
        cfg['optimizer']['name'], 
        model.parameters(), 
        cfg['optimizer']['params'])
    scheduler = builder.build_scheduler(
        cfg['scheduler']['name'], 
        optimizer, 
        cfg=cfg)

    if len(args.gpu) > 1:
        print(f'DEVICES : {args.gpu}')
        model = nn.DataParallel(model, device_ids=args.gpu)
        if args.gpu[0] != 0:
            model.to(f'cuda:{model.device_ids[0]}')
            
    if ARC: valid_loader = (valid_loader, arc_loader)
    if OHEM: train_loader = (train_loader, benign_loader)
    
    return cfg, \
           train_loader, \
           valid_loader, \
           model, \
           optimizer, \
           criterion, \
           scheduler
예제 #4
0
def setup(args, cfg, train_df, valid_df):

    logger = logging.getLogger('root')

    train_images = [osp.join(cfg['dataset']['data_dir'], _) for _ in train_df['imgfile']]
    valid_images = [osp.join(cfg['dataset']['data_dir'], _) for _ in valid_df['imgfile']]

    train_labels = list(train_df['boneage'])
    valid_labels = list(valid_df['boneage'])

    train_male = list(train_df['male'].astype('float32'))
    valid_male = list(valid_df['male'].astype('float32'))

    if cfg['dataset']['coords']:
        train_coords = {k : np.asarray(train_df[k]) for k in ['x1','y1','x2','y2']}
        valid_coords = {k : np.asarray(valid_df[k]) for k in ['x1','y1','x2','y2']}
    else:
        train_coords = None
        valid_coords = None

    train_loader = builder.build_dataloader(cfg, data_info={'imgfiles': train_images, 'labels': train_labels, 'male': train_male, 'coords': train_coords}, mode='train')
    valid_loader = builder.build_dataloader(cfg, data_info={'imgfiles': valid_images, 'labels': valid_labels, 'male': valid_male, 'coords': valid_coords}, mode='valid')
    
    # Adjust steps per epoch if necessary (i.e., equal to 0)
    # We assume if gradient accumulation is specified, then the user
    # has already adjusted the steps_per_epoch accordingly in the 
    # config file
    steps_per_epoch = cfg['train']['params']['steps_per_epoch']
    gradient_accmul = cfg['train']['params']['gradient_accumulation']
    if steps_per_epoch == 0:
        cfg['train']['params']['steps_per_epoch'] = len(train_loader)


    # Generic build function will work for model/loss
    logger.info('Building [{}] architecture ...'.format(cfg['model']['name']))
    logger.info('  Using [{}] backbone ...'.format(cfg['model']['params']['backbone']))
    logger.info('  Pretrained weights : {}'.format(cfg['model']['params']['pretrained']))
    model = builder.build_model(cfg['model']['name'], cfg['model']['params'])
    model = model.train().cuda()

    if cfg['loss']['name'] == 'BalancedHybridLoss':
        strata_weights = pd.cut(train_df['boneage'], bins=[0,24]+list(np.arange(12*3, 12*17, 12))+[228], labels=range(16))
        strata_weights = pd.DataFrame(strata_weights.value_counts()).reset_index().sort_values('index', ascending=True)
        strata_weights = strata_weights['boneage'].max() / strata_weights['boneage']
        strata_weights = np.asarray(strata_weights)
        cfg['loss']['params']['strata_weights'] = strata_weights
    criterion = builder.build_loss(cfg['loss']['name'], cfg['loss']['params'])
    optimizer = builder.build_optimizer(
        cfg['optimizer']['name'], 
        model.parameters(), 
        cfg['optimizer']['params'])
    scheduler = builder.build_scheduler(
        cfg['scheduler']['name'], 
        optimizer, 
        cfg=cfg)

    return cfg, \
           train_loader, \
           valid_loader, \
           model, \
           optimizer, \
           criterion, \
           scheduler