Ejemplo n.º 1
0
def get_kornia_batch_augs(size,
                          rotate=True,
                          jitter=True,
                          bw=True,
                          blur=True,
                          resize_scale=(0.2, 1.0),
                          resize_ratio=(3 / 4, 4 / 3),
                          rotate_deg=30,
                          jitter_s=.6,
                          blur_s=(4, 32),
                          same_on_batch=False,
                          flip_p=0.5,
                          jitter_p=0.3,
                          bw_p=0.3,
                          blur_p=0.3,
                          stats=imagenet_stats,
                          cuda=default_device().type == 'cuda',
                          xtra_tfms=[]):
    "Input batch augmentations implemented in kornia"
    tfms = []
    tfms += [
        korniatfm.RandomResizedCrop((size, size),
                                    scale=resize_scale,
                                    ratio=resize_ratio,
                                    same_on_batch=same_on_batch)
    ]
    tfms += [korniatfm.RandomHorizontalFlip(p=flip_p)]

    if rotate:
        tfms += [
            korniatfm.RandomRotation(rotate_deg, same_on_batch=same_on_batch)
        ]

    if jitter:
        tfms += [
            korniatfm.ColorJitter(0.8 * jitter_s,
                                  0.8 * jitter_s,
                                  0.8 * jitter_s,
                                  0.2 * jitter_s,
                                  p=jitter_p,
                                  same_on_batch=same_on_batch)
        ]
    if bw:
        tfms += [
            korniatfm.RandomGrayscale(p=bw_p, same_on_batch=same_on_batch)
        ]
    if blur:
        tfms += [
            RandomGaussianBlur(p=blur_p, s=blur_s, same_on_batch=same_on_batch)
        ]

    if stats is not None: tfms += [Normalize.from_stats(*stats, cuda=cuda)]

    tfms += xtra_tfms
    pipe = Pipeline(tfms, split_idx=0)
    return pipe
Ejemplo n.º 2
0
def main() -> None:
    """Parse arguments + run relevant bits of the package as directed."""
    # Parse command line arguments & setting device:
    args = _parse_args()
    timestamp = datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
    print(timestamp)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)

    # Loading dataset:
    print("Loading dataset...")
    np_dataset = np.expand_dims(np.load(args.path_to_dataset)[:, :, :, 1],
                                axis=1)
    dataset = FullDataset(np.repeat(np_dataset, 3, axis=1))
    labels = np.load(args.path_to_labels)[:]
    with open(args.path_to_label_dictionary, "r") as read_file:
        label_dict = json.load(read_file)

    # Defining augmentations:
    augmentations = torch.nn.Sequential(
        aug.ColorJitter(brightness=0.8, contrast=0.8, p=0.3),
        aug.RandomSharpness(0.5), aug.RandomHorizontalFlip(),
        aug.RandomVerticalFlip(),
        aug.GaussianBlur(kernel_size=(3, 3), sigma=(1, 2), p=0.2),
        aug.RandomResizedCrop((64, 64)), aug.RandomAffine(degrees=180, p=0.2))

    # Training BYOL:
    handler = BYOLHandler(device=device,
                          load_from_path=args.path_to_model,
                          augmentations=augmentations)
    if not args.no_train:
        print("Training handler...")
        handler.train(dataset, epochs=args.epochs, use_tqdm=args.tqdm)
        handler.save()

    # Generating embeddings:
    if args.path_to_embeddings is None:
        embeddings = handler.infer(dataset, use_tqdm=args.tqdm)
        print("Embedding dimensions:", embeddings.shape)

    if args.path_to_embeddings is not None:
        embeddings = np.load(args.path_to_embeddings)

    # Finding most relevant features:
    print("Calculating batch scaled importance of features...")

    # Perform variance selection:
    ctrl_labels: list[int] = []
    for num_label in label_dict:
        if label_dict[num_label] in args.ctrl_labels:
            ctrl_labels.append(int(num_label))

    ctrl_mask = np.array([0] * labels.shape[0], dtype=bool)
    for ctrl_label in ctrl_labels:
        ctrl_mask = ctrl_mask | (labels == ctrl_label)
    print("Control mask shape:", embeddings[ctrl_mask].shape)

    ctrl_std = scipy.stats.median_abs_deviation(embeddings[ctrl_mask], axis=0)
    total_std = scipy.stats.median_abs_deviation(embeddings, axis=0)
    ratio_std = total_std / ((ctrl_std) + 1e-16)

    sorted_indices = np.argsort(ratio_std)
    sorted_embeddings = embeddings[:, sorted_indices]

    # Transforming into uniform distribution:
    # lambdas = 1 / sorted_embeddings.mean(axis=0)
    # uniform_embeddings = 1 - (lambdas * np.exp(sorted_embeddings * -lambdas))
    # print("Size of final embeddings array:", uniform_embeddings.shape)

    # zero_mask = np.min(sorted_embeddings[:, -5:], axis=1) > 0
    # zero_filter = sorted_embeddings[zero_mask]
    # zero_labels = labels[zero_mask]

    outlier_mask = np.max(sorted_embeddings[:, -10:], axis=1) < 2
    # outlier_filtered_embeddings = sorted_embeddings[outlier_mask]
    # outlier_labels = labels[outlier_mask]

    # print("Size of zero filtered dataset:", zero_filter.shape)

    # Running inference:
    print("Performing UMAP analyis and plotting...")
    if not os.path.exists("plots"):
        os.mkdir("plots")

    plots_dir = os.path.join("plots", timestamp)

    if not os.path.exists(plots_dir):
        os.mkdir(plots_dir)

    # plot_umap_embeddings(
    #     sorted_embeddings[outlier_mask][::10, -750:],
    #     labels[outlier_mask][::10],
    #     plots_dir,
    #     ctrl_mask[outlier_mask][::10],
    #     np_dataset[outlier_mask][::10]
    # )

    embeddings_path = os.path.join(plots_dir, "embeddings.npy")
    np.save(embeddings_path, sorted_embeddings)

    labels_path = os.path.join(plots_dir, "labels.npy")
    np.save(labels_path, labels)

    return None