コード例 #1
0
def compute_features_learner(
        data, dataset_type: DatasetType, learn: Learner,
        embedding_layer: Module) -> List[Dict[str, np.array]]:
    """Compute features for multiple image using mini-batching.

    Use this function to featurize the training or test set of a learner

    Args:
        dataset_type: Specify train, valid or test set.
        learn: Trained model to use as featurizer
        embedding_layer: Number of columns on which to display the images

    Note: this function processes each image at a time and is hence slower
          compared to using mini-batches of >1.

    Returns: DNN feature of the provided image.

    """
    # Note: In Fastai, for DatasetType.Train, only the output of complete minibatches is computed. Ie if one has 101 images,
    # and uses a minibatch size of 16, then len(feats) is 96 and not 101. For DatasetType.Valid this is not the case,
    # and len(feats) is as expected 101. A way around this is to use DatasetType.Fix instead when referring to the training set.
    # See e.g. issue: https://forums.fast.ai/t/get-preds-returning-less-results-than-length-of-original-dataset/34148

    if dataset_type == DatasetType.Train or dataset_type == DatasetType.Fix:
        dataset_type = (
            DatasetType.Fix
        )  # Training set without shuffeling and no dropping of last batch. See note above.
        label_list = list(data.train_ds.items)
    elif dataset_type == DatasetType.Valid:
        label_list = list(data.valid_ds.items)
    elif dataset_type == DatasetType.Test:
        label_list = list(data.test_ds.items)
    else:
        raise Exception(
            "Dataset_type needs to be of type DatasetType.Train, DatasetType.Valid, DatasetType.Test or DatasetType.Fix."
        )

    # Update what data the learner object is using
    tmp_data = learn.data
    learn.data = data

    # Compute features
    featurizer = SaveFeatures(embedding_layer)
    learn.get_preds(dataset_type)
    feats = featurizer.features[:]

    # Set data back to before
    learn.data = tmp_data

    # Get corresponding image paths
    assert len(feats) == len(label_list)
    im_paths = [str(x) for x in label_list]
    return dict(zip(im_paths, feats))
コード例 #2
0
ファイル: model.py プロジェクト: sravan90/ComputerVision
def compute_features_learner(
        data, dataset_type: DatasetType, learn: Learner,
        embedding_layer: Module) -> List[Dict[str, np.array]]:
    """Compute features for multiple image using mini-batching.

    Use this function to featurize the training or test set of a learner

    Args:
        dataset_type: Specify train, valid or test set.
        learn: Trained model to use as featurizer
        embedding_layer: Number of columns on which to display the images

    Note: this function processes each image at a time and is hence slower
          compared to using mini-batches of >1.

    Returns: DNN feature of the provided image.

    """
    if dataset_type == DatasetType.Train:
        label_list = list(data.train_ds.items)
    elif dataset_type == DatasetType.Valid:
        label_list = list(data.valid_ds.items)
    elif dataset_type == DatasetType.Test:
        label_list = list(data.test_ds.items)
    else:
        raise Exception(
            "Dataset_type needs to be of type DatasetType.Train, DatasetType.Valid or DatasetType.Test."
        )

    featurizer = SaveFeatures(embedding_layer)
    _ = learn.get_preds(dataset_type)
    feats = featurizer.features[:]

    # Get corresponding image paths
    im_paths = [str(x) for x in label_list]
    assert len(feats) == len(im_paths)
    return dict(zip(im_paths, feats))
コード例 #3
0
]
learn = Learner(db,
                model,
                metrics=[rmse, mae],
                callback_fns=callback_fns,
                wd=args.wd,
                loss_func=contribs_rmse_loss)
if args.start_epoch > 0: learn.load(model_se_str + f'_{args.start_epoch-1}')
else: learn.load(model_str)
torch.cuda.empty_cache()
if distributed_train: learn = learn.to_distributed(args.local_rank)

learn.fit(args.epochs)

# make predictions
n_val = len(train_df[train_df['molecule_id'].isin(val_mol_ids)])
val_preds = np.zeros((n_val, args.epochs))
test_preds = np.zeros((len(test_df), args.epochs))
for m in range(args.epochs):
    print(f'Predicting for model {m}')
    learn.load(model_se_str + f'_{m}')
    val_contrib_preds = learn.get_preds(DatasetType.Valid)
    test_contrib_preds = learn.get_preds(DatasetType.Test)
    val_preds[:, m] = val_contrib_preds[0][:, -1].detach().numpy()
    test_preds[:, m] = test_contrib_preds[0][:, -1].detach().numpy()
val_preds = val_preds * C.SC_STD + C.SC_MEAN
test_preds = test_preds * C.SC_STD + C.SC_MEAN

# store results
store_submit(pd.DataFrame(test_preds), snapshots_str, print_head=True)
store_oof(pd.DataFrame(val_preds), snapshots_str, print_head=True)