Esempio n. 1
0
def get_dataloaders(path_to_data, datasets, args=ARGS):
    train_stages = datasets.keys()

    ## Data reader classes
    storms_data_reader = {
        'train': StormsDatasetSequence(datasets['train'], nb_imgs=args.sequence_length, gap=args.sequence_gap),
        'valid': StormsDatasetSequence(datasets['valid'], nb_imgs=args.sequence_length,  gap=args.sequence_gap),
        'fold': StormsDatasetSequence(datasets['fold'], nb_imgs=args.sequence_length, gap=args.sequence_gap),
        'test': StormsDatasetSequence(datasets['test'], nb_imgs=args.sequence_length, gap=args.sequence_gap),
    }

    ## Load images function
    load_function = {
        'train': lambda x: storms_data_reader['train'].read(x),
        'valid': lambda x: storms_data_reader['valid'].read(x),
        'fold': lambda x: storms_data_reader['fold'].read(x),
        'test': lambda x: storms_data_reader['test'].read(x),
    }

    # Data Feeders
    dsmean, dsstd = DATASET_MEAN / 255, DATASET_STD / 255
    img_pre_process_tt = transforms.Compose([
        transforms.Normalize(mean=dsmean, std=dsstd)
    ])

    def preprocess(imgs):
        imgs = [img_pre_process_tt(s) for s in imgs]
        return torch.stack(imgs).to(torch.float32)

    transformations = args.transformations
    data_feeder_args = {
        'train': {},
        'valid': {},
        'fold': {},
        'test': {'predict': True},
    }

    # Data Feeders
    data_feeders = _generate_data_feeders(datasets, load_function, transformations, preprocess, train_stages,
                                          data_feeder_args)

    def worker_init_fn(worker_id):
        np.random.seed(np.random.get_state()[1][0] + worker_id)

    ## Data Loader
    data_loader_args = {
        "train": {'batch_size': args.train_batch_size, 'shuffle': True, 'worker_init_fn': worker_init_fn, },
        "valid": {'batch_size': args.valid_batch_size, 'shuffle': False, 'worker_init_fn': worker_init_fn, },
        "fold": {'batch_size': args.valid_batch_size, 'shuffle': False, 'worker_init_fn': worker_init_fn, },
        "test": {'batch_size': args.test_batch_size, 'shuffle': False, 'worker_init_fn': worker_init_fn, },
    }
    if 'cuda' in DEVICE:
        for phase, dct in data_loader_args.items():
            dct.update({'pin_memory': True, 'num_workers': args.num_workers})
    # Data Loaders
    data_loaders = _generate_data_loaders(data_feeders, train_stages, data_loader_args)

    return data_loaders
def main(only_test):

    # Get dataset
    dset_df = get_storms_df(os.path.join(PATH_TO_ROOT, DATA_DIR))
    if only_test:
        dset_df = dset_df[dset_df.test]

    # Output directory
    if not os.path.isdir(SAVE_TO):
        os.mkdir(SAVE_TO)

    global storms_data_reader
    storms_data_reader = StormsDatasetSequence(dset_df, 5, gap=0.5)

    max_workers = 16 * 1

    from concurrent.futures import ThreadPoolExecutor, as_completed
    with tqdm(total=len(storms_data_reader)) as pbar:
        with ThreadPoolExecutor(max_workers=max_workers) as ex:
            futures = [
                ex.submit(process_img, i_img)
                for i_img in range(len(storms_data_reader))
            ]
            for future in as_completed(futures):
                result = future.result()
                pbar.update(1)
def get_dataloaders(path_to_data, datasets, args=ARGS):
    train_stages = datasets.keys()

    ## Data reader classes
    storms_data_reader = {
        'train':
        StormsDatasetSequence(datasets['train'],
                              nb_imgs=args.sequence_length,
                              gap=args.sequence_gap,
                              missing='black',
                              max_block_size=5),
        'valid':
        StormsDatasetSequence(datasets['valid'],
                              nb_imgs=args.sequence_length,
                              gap=args.sequence_gap,
                              missing='black'),
        'fold':
        StormsDatasetSequence(datasets['fold'],
                              nb_imgs=args.sequence_length,
                              gap=args.sequence_gap,
                              missing='black'),
        'test':
        StormsDatasetSequence(datasets['test'],
                              nb_imgs=args.sequence_length,
                              gap=args.sequence_gap,
                              missing='black'),
    }

    ## Load images function
    load_function = {
        'train': lambda x: storms_data_reader['train'].read(x),
        'valid': lambda x: storms_data_reader['valid'].read(x),
        'fold': lambda x: storms_data_reader['fold'].read(x),
        'test': lambda x: storms_data_reader['test'].read(x),
    }

    # Data Feeders
    dsmean, dsstd = DATASET_MEAN / 255, DATASET_STD / 255
    img_pre_process_tt = transforms.Compose([
        #transforms.ToTensor(),
        # ptm_utils.ToSpaceBGR(pretrainedmodels.pretrained_settings[args.basemodel_name][args.pretrained]['input_space']=='BGR'),
        # ptm_utils.ToRange255(max(pretrainedmodels.pretrained_settings[args.basemodel_name][args.pretrained]['input_range'])==255),
        # transforms.Normalize(mean=pretrainedmodels.pretrained_settings[args.basemodel_name][args.pretrained]['mean'],
        #                     std=pretrainedmodels.pretrained_settings[args.basemodel_name][args.pretrained]['std'])
        transforms.Normalize(mean=dsmean, std=dsstd)
    ])

    def preprocess(imgs):
        imgs = [img_pre_process_tt(s) for s in imgs]
        return torch.stack(imgs).to(torch.float32)

    transformations = args.transformations
    data_feeder_args = {
        'train': {
            'scale_df': 2
        },
        'valid': {},
        'fold': {},
        'test': {
            'predict': True
        },
    }

    # Data Feeders
    dts = {s1: storms_data_reader[s1].get_iter_df() for s1 in datasets.keys()}
    data_feeders = _generate_data_feeders(dts, load_function, transformations,
                                          preprocess, train_stages,
                                          data_feeder_args)

    def worker_init_fn(worker_id):
        np.random.seed(np.random.get_state()[1][0] + worker_id)

    ## Data Loader
    data_loader_args = {
        "train": {
            'batch_size': args.train_batch_size,
            'shuffle': True,
            'worker_init_fn': worker_init_fn,
        },
        "valid": {
            'batch_size': args.valid_batch_size,
            'shuffle': False,
            'worker_init_fn': worker_init_fn,
        },
        "fold": {
            'batch_size': args.valid_batch_size,
            'shuffle': False,
            'worker_init_fn': worker_init_fn,
        },
        "test": {
            'batch_size': args.test_batch_size,
            'shuffle': False,
            'worker_init_fn': worker_init_fn,
        },
    }
    if 'cuda' in DEVICE:
        for phase, dct in data_loader_args.items():
            dct.update({'pin_memory': True, 'num_workers': args.num_workers})
    # Data Loaders
    data_loaders = _generate_data_loaders(data_feeders, train_stages,
                                          data_loader_args)

    return data_loaders