Beispiel #1
0
def prepare_confounder_data(args, train, return_full_dataset=False):
    full_dataset = confounder_settings[args.dataset]['constructor'](
        root_dir=args.root_dir,
        target_name=args.target_name,
        confounder_names=args.confounder_names,
        model_type=args.model,
        augment_data=args.augment_data)
    if return_full_dataset:
        return DRODataset(full_dataset,
                          process_item_fn=None,
                          n_groups=full_dataset.n_groups,
                          n_classes=full_dataset.n_classes,
                          group_str_fn=full_dataset.group_str)
    if train:
        splits = ['train', 'val', 'test']
    else:
        splits = ['test']
    subsets = full_dataset.get_splits(
        splits,
        train_frac=args.fraction,
        subsample_to_minority=args.subsample_to_minority)
    dro_subsets = [
        DRODataset(
            subsets[split],
            process_item_fn=None,
            n_groups=full_dataset.n_groups,
            n_classes=full_dataset.n_classes,
            group_str_fn=full_dataset.group_str) \
        for split in splits]
    return dro_subsets
Beispiel #2
0
def prepare_confounder_data(args, train, return_full_dataset=False):
    full_dataset = confounder_settings[args.dataset]['constructor'](
        root_dir=args.root_dir,
        target_name=args.target_name,
        confounder_names=args.confounder_names,
        model_type=args.model,
        augment_data=args.augment_data)
    if return_full_dataset: # never used
        return DRODataset(
            full_dataset,
            process_item_fn=None,
            n_groups=full_dataset.n_groups,
            n_classes=full_dataset.n_classes,
            group_str_fn=full_dataset.group_str)
    if train:
        splits = ['train', 'val', 'test'] # used
    else:
        splits = ['test']
    subsets = full_dataset.get_splits(splits, train_frac=args.fraction)

    # if args.ood:
    #     dro_subsets = [ 1]
    #     dro_subsets.extend([DRODataset(subsets[split], process_item_fn=None, n_groups=full_dataset.n_groups,
    #                         n_classes=full_dataset.n_classes, group_str_fn=full_dataset.group_str, args=args) \
    #             for split in ['val', 'test']] )

    # else:
    dro_subsets = [DRODataset(subsets[split], process_item_fn=None, n_groups=full_dataset.n_groups,
                        n_classes=full_dataset.n_classes, group_str_fn=full_dataset.group_str, args=args) \
            for split in splits]
    return dro_subsets
Beispiel #3
0
def prepare_label_shift_data(args, train):
    settings = label_shift_settings[args.dataset]
    data = settings["load_fn"](args, train)
    n_classes = settings["n_classes"]
    if train:
        train_data, val_data = data
        if args.fraction < 1:
            train_data = subsample(train_data, args.fraction)
        train_data = apply_label_shift(
            train_data,
            n_classes,
            args.shift_type,
            args.minority_fraction,
            args.imbalance_ratio,
        )
        data = [train_data, val_data]
    dro_data = [
        DRODataset(
            subset,
            process_item_fn=settings["process_fn"],
            n_groups=n_classes,
            n_classes=n_classes,
            group_str_fn=settings["group_str_fn"],
        ) for subset in data
    ]
    return dro_data
Beispiel #4
0
def prepare_confounder_data(args, train, return_full_dataset=False):
    if args.dataset != "jigsaw":
        full_dataset = confounder_settings[args.dataset]["constructor"](
            root_dir=args.root_dir,
            target_name=args.target_name,
            confounder_names=args.confounder_names,
            model_type=args.model,
            augment_data=args.augment_data,
            metadata_csv_name=args.metadata_csv_name if
            (args.metadata_csv_name is not None) else "metadata.csv")
    else:
        full_dataset = confounder_settings[args.dataset]["constructor"](
            root_dir=args.root_dir,
            target_name=args.target_name,
            confounder_names=args.confounder_names,
            model_type=args.model,
            augment_data=args.augment_data,
            metadata_csv_name=args.metadata_csv_name if
            (args.metadata_csv_name is not None) else "metadata.csv",
            batch_size=args.batch_size)

    if return_full_dataset:
        return DRODataset(
            full_dataset,
            process_item_fn=None,
            n_groups=full_dataset.n_groups,
            n_classes=full_dataset.n_classes,
            group_str_fn=full_dataset.group_str,
        )
    if train:
        splits = ["train", "val", "test"]
    else:
        splits = ["test"]
    subsets = full_dataset.get_splits(splits, train_frac=args.fraction)
    dro_subsets = [
        DRODataset(
            subsets[split],
            process_item_fn=None,
            n_groups=full_dataset.n_groups,
            n_classes=full_dataset.n_classes,
            group_str_fn=full_dataset.group_str,
        ) for split in splits
    ]
    return dro_subsets
Beispiel #5
0
def prepare_confounder_data(args, train, return_full_dataset=False):
    full_dataset = confounder_settings[args.dataset]['constructor'](args=args)
    if return_full_dataset:
        return DRODataset(full_dataset,
                          process_item_fn=None,
                          n_groups=full_dataset.n_groups,
                          n_classes=full_dataset.n_classes,
                          group_str_fn=full_dataset.group_str)
    if train:
        if args.dataset == 'CheXpert':
            splits = ['train', 'val']
        else:
            splits = ['train', 'val', 'test']
    else:
        splits = ['test']
    subsets = full_dataset.get_splits(splits)
    dro_subsets = [DRODataset(subsets[split], process_item_fn=None, n_groups=full_dataset.n_groups,
                              n_classes=full_dataset.n_classes, group_str_fn=full_dataset.group_str) \
                   for split in splits]
    return dro_subsets