Ejemplo n.º 1
0
def evaluate(model, dataloader, nb_classes):
    model_is_training = model.training
    model.eval()

    # calculate embeddings with model, also get labels (non-batch-wise)
    X, T = predict_batchwise(model, dataloader)

    # calculate NMI with kmeans clustering
    nmi = evaluation.calc_normalized_mutual_information(
        T, 
        evaluation.cluster_by_kmeans(
            X, nb_classes
        )
    )
    logging.info("NMI: {:.3f}".format(nmi * 100))

    # get predictions by assigning nearest 8 neighbors with euclidian
    Y = evaluation.assign_by_euclidian_at_k(X, T, 8)
    
    # calculate recall @ 1, 2, 4, 8
    recall = []
    for k in [1, 2, 4, 8]:
        r_at_k = evaluation.calc_recall_at_k(T, Y, k)
        recall.append(r_at_k)
        logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))
    model.train(model_is_training) # revert to previous training state
    return nmi, recall
Ejemplo n.º 2
0
def evaluate(model,
             dataloader,
             nb_classes,
             net_type='bn_inception',
             dataroot='CARS'):
    model_is_training = model.training
    model.eval()

    # calculate embeddings with model, also get labels (non-batch-wise)
    X, T = predict_batchwise(model, dataloader, net_type)

    if dataroot != 'Stanford':
        # calculate NMI with kmeans clustering
        nmi = evaluation.calc_normalized_mutual_information(
            T, evaluation.cluster_by_kmeans(X, nb_classes))
        logging.info("NMI: {:.3f}".format(nmi * 100))
    else:
        nmi = -1

    recall = []
    if dataroot != 'Stanford':
        Y = evaluation.assign_by_euclidian_at_k(X, T, 8)
        which_nearest_neighbors = [1, 2, 4, 8]
    else:
        Y = evaluation.assign_by_euclidian_at_k(X, T, 1000)
        which_nearest_neighbors = [1, 10, 100, 1000]

    for k in which_nearest_neighbors:
        r_at_k = evaluation.calc_recall_at_k(T, Y, k)
        recall.append(r_at_k)
        logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))

    model.train(model_is_training)  # revert to previous training state
    return nmi, recall
Ejemplo n.º 3
0
def evaluate(model, dataloader, with_nmi = True):
    nb_classes = dataloader.dataset.nb_classes()

    # calculate embeddings with model and get targets
    X, T, *_ = predict_batchwise(model, dataloader)

    if with_nmi:
        # calculate NMI with kmeans clustering
        nmi = evaluation.calc_normalized_mutual_information(
            T,
            evaluation.cluster_by_kmeans(
                X, nb_classes
            )
        )
        logging.info("NMI: {:.3f}".format(nmi * 100))

    # get predictions by assigning nearest 8 neighbors with euclidian
    Y = evaluation.assign_by_euclidian_at_k(X, T, 8)
    Y = torch.from_numpy(Y)

    # calculate recall @ 1, 2, 4, 8
    recall = []
    for k in [1, 2, 4, 8]:
        r_at_k = evaluation.calc_recall_at_k(T, Y, k)
        recall.append(r_at_k)
        logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))
    if with_nmi:
        return recall, nmi
    else:
        return recall
def evaluate(model, dataloader=None, fc7=None, batch=None, calc_nmi=False):
    nb_classes = model.nb_classes

    model_is_training = model.training
    model.eval()

    # calculate embeddings with model, also get labels
    emb, labels = predict_batchwise(model,
                                    dataloader=dataloader,
                                    fc7=fc7,
                                    batch=batch)

    nmi = None
    if dataloader is not None and calc_nmi:
        nmi = evaluation.calc_normalized_mutual_information(
            labels, evaluation.cluster_by_kmeans(emb, nb_classes))

    recall = []
    # rank the nearest neighbors for each input
    k_pred_labels = evaluation.assign_by_euclidian_at_k(emb, labels, 1000)
    if batch is None:
        which_nearest_neighbors = [1, 10, 100, 1000]
    else:
        which_nearest_neighbors = [1]

    for k in which_nearest_neighbors:
        r_at_k = evaluation.calc_recall_at_k(labels, k_pred_labels, k)
        recall.append(r_at_k)
        logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))

    if model_is_training:
        model.train()  # revert to previous training state

    return recall, nmi
    def on_epoch_end(self, epoch, logs={}):

        if epoch % self.interval == 0:

            get_intermediate_layer_output = backend.function(
                [self.model.input],
                [self.model.get_layer("predictions").output])
            y_given = []

            y_embedding = []
            nb_classes = 0

            print("Before getting validation samples:")
            print(datetime.datetime.now().time())

            for i in range(128):

                X_val_temp, y_val_temp = self.validation_data.next()

                nb_classes = y_val_temp.shape[1]

                y_given.append(y_val_temp)

                y_embedding_temp = get_intermediate_layer_output([X_val_temp
                                                                  ])[0]
                y_embedding.append(y_embedding_temp)

            print("After getting validation samples:")
            print(datetime.datetime.now().time())

            y_embedding = np.concatenate(y_embedding, axis=0)

            y_given = np.concatenate(y_given, axis=0)
            y_given_class_order = np.argsort(y_given, axis=-1)

            y_given_class = np.transpose(y_given_class_order)[-1]

            nmi = evaluation.calc_normalized_mutual_information(
                y_given_class,
                evaluation.cluster_by_kmeans(y_embedding, nb_classes))

            logging.info("NMI: {:.3f}".format(nmi * 100))

            # get predictions by assigning nearest 8 neighbors with euclidian
            Y = evaluation.assign_by_euclidian_at_k(y_embedding, y_given_class,
                                                    8)

            # calculate recall @ 1, 2, 4, 8
            recall = []
            for k in [1, 2, 4, 8]:
                r_at_k = evaluation.calc_recall_at_k(y_given_class, Y, k)
                recall.append(r_at_k)
                logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))

            return nmi, recall
Ejemplo n.º 6
0
def evaluate_inshop(model,
                    dl_query,
                    dl_gallery,
                    K=[1, 10, 20, 30, 40, 50],
                    with_nmi=False):

    # calculate embeddings with model and get targets
    X_query, T_query, *_ = predict_batchwise_inshop(model, dl_query)
    X_gallery, T_gallery, *_ = predict_batchwise_inshop(model, dl_gallery)

    nb_classes = dl_query.dataset.nb_classes()

    assert nb_classes == len(set(T_query))
    #assert nb_classes == len(T_query.unique())

    # calculate full similarity matrix, choose only first `len(X_query)` rows
    # and only last columns corresponding to the column
    T_eval = torch.cat(
        [torch.from_numpy(T_query),
         torch.from_numpy(T_gallery)])
    X_eval = torch.cat(
        [torch.from_numpy(X_query),
         torch.from_numpy(X_gallery)])
    D = similarity.pairwise_distance(X_eval)[:len(X_query), len(X_query):]

    #D = torch.from_numpy(D)
    # get top k labels with smallest (`largest = False`) distance
    Y = T_gallery[D.topk(k=max(K), dim=1, largest=False)[1]]

    recall = []
    for k in K:
        r_at_k = evaluation.calc_recall_at_k(T_query, Y, k)
        recall.append(r_at_k)
        logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))

    if with_nmi:
        # calculate NMI with kmeans clustering
        nmi = evaluation.calc_normalized_mutual_information(
            T_eval.numpy(),
            evaluation.cluster_by_kmeans(X_eval.numpy(), nb_classes))
    else:
        nmi = 1

    logging.info("NMI: {:.3f}".format(nmi * 100))

    return nmi, recall
Ejemplo n.º 7
0
def evaluate(model, dataloader, eval_nmi=True, recall_list=[1, 2, 4, 8]):
    eval_time = time.time()
    nb_classes = dataloader.dataset.nb_classes()

    # calculate embeddings with model and get targets
    X, T, *_ = predict_batchwise(model, dataloader)

    print('done collecting prediction')

    #eval_time = time.time() - eval_time
    #logging.info('Eval time: %.2f' % eval_time)

    if eval_nmi:
        # calculate NMI with kmeans clustering
        nmi = evaluation.calc_normalized_mutual_information(
            T, evaluation.cluster_by_kmeans(X, nb_classes))
    else:
        nmi = 1

    logging.info("NMI: {:.3f}".format(nmi * 100))

    # get predictions by assigning nearest 8 neighbors with euclidian
    max_dist = max(recall_list)
    Y = evaluation.assign_by_euclidian_at_k(X, T, max_dist)
    Y = torch.from_numpy(Y)

    # calculate recall @ 1, 2, 4, 8
    recall = []
    for k in recall_list:
        r_at_k = evaluation.calc_recall_at_k(T, Y, k)
        recall.append(r_at_k)
        logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))

    chmean = (2 * nmi * recall[0]) / (nmi + recall[0])
    logging.info("hmean: %s", str(chmean))

    eval_time = time.time() - eval_time
    logging.info('Eval time: %.2f' % eval_time)
    return nmi, recall
Ejemplo n.º 8
0
def evaluate(model, dataloader, nb_classes):
    model_is_training = model.training
    model.eval()

    # calculate embeddings with model, also get labels (non-batch-wise)
    X, T = predict_batchwise(model, dataloader)

    # calculate NMI with kmeans clustering
    nmi = evaluation.calc_normalized_mutual_information(
        T, evaluation.cluster_by_kmeans(X, nb_classes))
    logging.info("NMI: {:.3f}".format(nmi * 100))

    # get predictions by assigning nearest 8 neighbors with euclidian
    Y = evaluation.assign_by_euclidian_at_k(X, T, 8)

    # calculate recall @ 1, 2, 4, 8
    recall = []
    for k in [1, 2, 4, 8]:
        r_at_k = evaluation.calc_recall_at_k(T, Y, k)
        recall.append(r_at_k)
        logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))
    model.train(model_is_training)  # revert to previous training state
    return nmi, recall
Ejemplo n.º 9
0
    def validation_epoch_end(self, outputs: Dict[str, Any]) -> None:
        """Compute metrics on the full validation set.

        Args:
            outputs (Dict[str, Any]): Dict of values collected over each batch put through model.eval()(..)
        """

        val_Xs = torch.cat([h["Xs"] for h in outputs])
        val_Ts = torch.cat([h["Ts"] for h in outputs])
        val_indexes = torch.cat([h["index"] for h in outputs])
        Y = assign_by_euclidian_at_k(val_Xs.cpu(), val_Ts.cpu(), 8)
        Y = torch.from_numpy(Y)

        # Return early when PL is running the sanity check.
        if self.trainer.running_sanity_check:
            return

        # Compute and Log R@k
        recall = []
        logs = {}
        for k in [1, 2, 4, 8]:
            r_at_k = 100 * calc_recall_at_k(val_Ts.cpu(), Y, k)
            recall.append(r_at_k)
            logs[f"val_R@{k}"] = r_at_k
        self.log_dict(logs)

        # Compute and log NMI
        nmi = 100 * calc_normalized_mutual_information(
            val_Ts.cpu(), cluster_by_kmeans(val_Xs.cpu(), self.hparams.num_classes)
        )
        self.log_dict({"NMI": nmi})

        # Inspect the embedding space in 2 and 3 dimensions.
        if 2 in self.hparams.vis_dim:
            pca = PCA(2)
            projected = pca.fit_transform(val_Xs.cpu())
            proxies = pca.transform(self.proxies.detach().cpu())
            fig_embedded_data = go.Figure()
            for cls_idx, cls_name in enumerate(self.val_dataset.classes):
                x_s = [
                    o
                    for i, o in enumerate(projected[:, 0])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                y_s = [
                    o
                    for i, o in enumerate(projected[:, 1])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                marker_color = colors_by_name[cls_idx % len(colors_by_name)]
                fig_embedded_data.add_scatter(
                    x=x_s,
                    y=y_s,
                    marker_color=marker_color,
                    text=cls_name,
                    name=cls_name,
                    mode="markers",
                )

            wandb.log({"Embedding of Validation Dataset 2D": fig_embedded_data})

            fig_embedded_proxies = go.Figure()
            for cls_name, x_y in zip(self.val_dataset.classes, proxies):
                x_s = [
                    o
                    for i, o in enumerate(proxies[:, 0])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                y_s = [
                    o
                    for i, o in enumerate(proxies[:, 1])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                marker_color = colors_by_name[
                    self.val_dataset.classes.index(cls_name) % len(colors_by_name)
                ]
                fig_embedded_proxies.add_scatter(
                    x=[x_y[0]],
                    y=[x_y[1]],
                    marker_color=marker_color,
                    text=cls_name,
                    name=cls_name,
                    mode="markers",
                )
            wandb.log(
                {"Embedding of Proxies (on validation data) 2D": fig_embedded_proxies}
            )

        if 3 in self.hparams.vis_dim:
            pca = PCA(3)
            projected = pca.fit_transform(val_Xs.cpu())
            proxies = pca.transform(self.proxies.detach().cpu())
            fig_embedded_data = go.Figure()

            for cls_idx, cls_name in enumerate(self.val_dataset.classes):
                x_s = [
                    o
                    for i, o in enumerate(projected[:, 0])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                y_s = [
                    o
                    for i, o in enumerate(projected[:, 1])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                z_s = [
                    o
                    for i, o in enumerate(projected[:, 2])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                marker_color = colors_by_name[cls_idx % len(colors_by_name)]
                fig_embedded_data.add_scatter3d(
                    x=x_s,
                    y=y_s,
                    z=z_s,
                    marker_color=marker_color,
                    text=cls_name,
                    name=cls_name,
                    mode="markers",
                )
            wandb.log({"Embedding of Validation Dataset 3D": fig_embedded_data})
            fig_embedded_proxies = go.Figure()
            for cls_name, x_y_z in zip(self.val_dataset.classes, proxies):
                marker_color = colors_by_name[
                    self.val_dataset.classes.index(cls_name) % len(colors_by_name)
                ]
                fig_embedded_proxies.add_scatter3d(
                    x=[x_y_z[0]],
                    y=[x_y_z[1]],
                    z=[x_y_z[2]],
                    marker_color=marker_color,
                    text=cls_name,
                    name=cls_name,
                    mode="markers",
                )
            wandb.log(
                {"Embedding of Proxies (on validation data) 3D": fig_embedded_proxies}
            )

        cm = confusion_matrix(
            y_true=val_Ts.cpu().numpy(),
            y_pred=Y[:, 0].cpu().numpy(),
            labels=[o for o in range(0, len(self.val_dataset.classes))],
        )
        fig_cm = ff.create_annotated_heatmap(
            cm,
            x=self.val_dataset.classes,
            y=self.val_dataset.classes,
            annotation_text=cm.astype(str),
            colorscale="Viridis",
        )
        wandb.log({"Confusion Matrix": fig_cm})

        # Log a query and top 4 selction
        image_dict = {}
        top_k_indices = torch.cdist(val_Xs, val_Xs).topk(5, largest=False).indices
        max_idx = len(top_k_indices) - 1
        for i, example_result in enumerate(
            top_k_indices[[randint(0, max_idx) for _ in range(0, 5)]]
        ):

            image_dict[f"global step {self.global_step} example: {i}"] = [
                wandb.Image(
                    Image.open(
                        self.val_dataset.im_paths[val_indexes[example_result[0]]]
                    ),
                    caption=f"query: {self.val_dataset.get_label_description(self.val_dataset.get_label(val_indexes[example_result[0]]))}",
                )
            ]
            image_dict[f"global step {self.global_step} example: {i}"].extend(
                [
                    wandb.Image(
                        Image.open(self.val_dataset.im_paths[val_indexes[idx]]),
                        caption=f"retrival:({rank}) {self.val_dataset.get_label_description(self.val_dataset.get_label(val_indexes[idx]))}",
                    )
                    for rank, idx in enumerate(example_result[1:])
                ]
            )
        self.logger.experiment.log(image_dict)

        # Since validation set samples are iid I prefer looking at a histogram of validation losses.
        wandb.log(
            {f"val_loss_hist": wandb.Histogram([[h["val_loss"] for h in outputs]])}
        )