Esempio n. 1
0
def evaluate(gpu_device, model, dataloader, nb_classes, name='tra_similar.jpg'):
    # calculate embeddings with model, also get labels (non-batch-wise)
    X, T = predict_batchwise(gpu_device, model, dataloader)
    #X = F.normalize(X, p=2, dim=1)
    # calculate NMI with kmeans clustering
    nmi=0.0
    if X.shape[0]<10000:
        '''
        nmi = evaluation.calc_normalized_mutual_information(
        T, 
        evaluation.cluster_by_kmeans(
            X, nb_classes
        ))
        '''
        nmi = 1.0
        # get predictions by assigning nearest 8 neighbors with euclidian
        Y = evaluation.assign_by_euclidian_at_k(X, T, 8)
        
        # calculate recall @ 1, 2, 4, 8
        recall = []
        for k in [1, 2, 4, 8]:
            r_at_k = evaluation.calc_recall_at_k(T, Y, k)
            recall.append(r_at_k)
        return nmi, recall
    else:
        # get predictions by assigning nearest 8 neighbors with euclidian
        Y = evaluation.assign_by_euclidian_at_k(X, T, 1000)
        
        # calculate recall @ 1, 10, 100, 1000
        recall = []
        for k in [1, 10, 100, 1000]:
            r_at_k = evaluation.calc_recall_at_k(T, Y, k)
            recall.append(r_at_k)
        return nmi, recall
Esempio n. 2
0
def evaluate(model,
             dataloader,
             nb_classes,
             net_type='bn_inception',
             dataroot='CARS'):
    model_is_training = model.training
    model.eval()

    # calculate embeddings with model, also get labels (non-batch-wise)
    X, T = predict_batchwise(model, dataloader, net_type)

    if dataroot != 'Stanford':
        # calculate NMI with kmeans clustering
        nmi = evaluation.calc_normalized_mutual_information(
            T, evaluation.cluster_by_kmeans(X, nb_classes))
        logging.info("NMI: {:.3f}".format(nmi * 100))
    else:
        nmi = -1

    recall = []
    if dataroot != 'Stanford':
        Y = evaluation.assign_by_euclidian_at_k(X, T, 8)
        which_nearest_neighbors = [1, 2, 4, 8]
    else:
        Y = evaluation.assign_by_euclidian_at_k(X, T, 1000)
        which_nearest_neighbors = [1, 10, 100, 1000]

    for k in which_nearest_neighbors:
        r_at_k = evaluation.calc_recall_at_k(T, Y, k)
        recall.append(r_at_k)
        logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))

    model.train(model_is_training)  # revert to previous training state
    return nmi, recall
Esempio n. 3
0
def evaluate(model, dataloader, nb_classes):
    model_is_training = model.training
    model.eval()

    # calculate embeddings with model, also get labels (non-batch-wise)
    X, T = predict_batchwise(model, dataloader)

    # calculate NMI with kmeans clustering
    nmi = evaluation.calc_normalized_mutual_information(
        T, 
        evaluation.cluster_by_kmeans(
            X, nb_classes
        )
    )
    logging.info("NMI: {:.3f}".format(nmi * 100))

    # get predictions by assigning nearest 8 neighbors with euclidian
    Y = evaluation.assign_by_euclidian_at_k(X, T, 8)
    
    # calculate recall @ 1, 2, 4, 8
    recall = []
    for k in [1, 2, 4, 8]:
        r_at_k = evaluation.calc_recall_at_k(T, Y, k)
        recall.append(r_at_k)
        logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))
    model.train(model_is_training) # revert to previous training state
    return nmi, recall
Esempio n. 4
0
def evaluate(model, dataloader, with_nmi = True):
    nb_classes = dataloader.dataset.nb_classes()

    # calculate embeddings with model and get targets
    X, T, *_ = predict_batchwise(model, dataloader)

    if with_nmi:
        # calculate NMI with kmeans clustering
        nmi = evaluation.calc_normalized_mutual_information(
            T,
            evaluation.cluster_by_kmeans(
                X, nb_classes
            )
        )
        logging.info("NMI: {:.3f}".format(nmi * 100))

    # get predictions by assigning nearest 8 neighbors with euclidian
    Y = evaluation.assign_by_euclidian_at_k(X, T, 8)
    Y = torch.from_numpy(Y)

    # calculate recall @ 1, 2, 4, 8
    recall = []
    for k in [1, 2, 4, 8]:
        r_at_k = evaluation.calc_recall_at_k(T, Y, k)
        recall.append(r_at_k)
        logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))
    if with_nmi:
        return recall, nmi
    else:
        return recall
def evaluate(model, dataloader=None, fc7=None, batch=None, calc_nmi=False):
    nb_classes = model.nb_classes

    model_is_training = model.training
    model.eval()

    # calculate embeddings with model, also get labels
    emb, labels = predict_batchwise(model,
                                    dataloader=dataloader,
                                    fc7=fc7,
                                    batch=batch)

    nmi = None
    if dataloader is not None and calc_nmi:
        nmi = evaluation.calc_normalized_mutual_information(
            labels, evaluation.cluster_by_kmeans(emb, nb_classes))

    recall = []
    # rank the nearest neighbors for each input
    k_pred_labels = evaluation.assign_by_euclidian_at_k(emb, labels, 1000)
    if batch is None:
        which_nearest_neighbors = [1, 10, 100, 1000]
    else:
        which_nearest_neighbors = [1]

    for k in which_nearest_neighbors:
        r_at_k = evaluation.calc_recall_at_k(labels, k_pred_labels, k)
        recall.append(r_at_k)
        logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))

    if model_is_training:
        model.train()  # revert to previous training state

    return recall, nmi
    def on_epoch_end(self, epoch, logs={}):

        if epoch % self.interval == 0:

            get_intermediate_layer_output = backend.function(
                [self.model.input],
                [self.model.get_layer("predictions").output])
            y_given = []

            y_embedding = []
            nb_classes = 0

            print("Before getting validation samples:")
            print(datetime.datetime.now().time())

            for i in range(128):

                X_val_temp, y_val_temp = self.validation_data.next()

                nb_classes = y_val_temp.shape[1]

                y_given.append(y_val_temp)

                y_embedding_temp = get_intermediate_layer_output([X_val_temp
                                                                  ])[0]
                y_embedding.append(y_embedding_temp)

            print("After getting validation samples:")
            print(datetime.datetime.now().time())

            y_embedding = np.concatenate(y_embedding, axis=0)

            y_given = np.concatenate(y_given, axis=0)
            y_given_class_order = np.argsort(y_given, axis=-1)

            y_given_class = np.transpose(y_given_class_order)[-1]

            nmi = evaluation.calc_normalized_mutual_information(
                y_given_class,
                evaluation.cluster_by_kmeans(y_embedding, nb_classes))

            logging.info("NMI: {:.3f}".format(nmi * 100))

            # get predictions by assigning nearest 8 neighbors with euclidian
            Y = evaluation.assign_by_euclidian_at_k(y_embedding, y_given_class,
                                                    8)

            # calculate recall @ 1, 2, 4, 8
            recall = []
            for k in [1, 2, 4, 8]:
                r_at_k = evaluation.calc_recall_at_k(y_given_class, Y, k)
                recall.append(r_at_k)
                logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))

            return nmi, recall
Esempio n. 7
0
def get_recall_results2(model, dataloader):
    eval_time = time.time()
    nb_classes = dataloader.dataset.nb_classes()

    # calculate embeddings with model and get targets
    X, T, I = predict_batchwise(model, dataloader)

    # get predictions by assigning nearest 8 neighbors with euclidian
    max_dist = 1
    Y = evaluation.assign_by_euclidian_at_k(X, T, max_dist)
    Y = torch.from_numpy(Y)

    recall_results = []
    for t, y in zip(T, Y):
        recall_results.append(t in y[:1])

    return recall_results, X, T, I
Esempio n. 8
0
def evaluate(model, dataloader, eval_nmi=True, recall_list=[1, 2, 4, 8]):
    eval_time = time.time()
    nb_classes = dataloader.dataset.nb_classes()

    # calculate embeddings with model and get targets
    X, T, *_ = predict_batchwise(model, dataloader)

    print('done collecting prediction')

    #eval_time = time.time() - eval_time
    #logging.info('Eval time: %.2f' % eval_time)

    if eval_nmi:
        # calculate NMI with kmeans clustering
        nmi = evaluation.calc_normalized_mutual_information(
            T, evaluation.cluster_by_kmeans(X, nb_classes))
    else:
        nmi = 1

    logging.info("NMI: {:.3f}".format(nmi * 100))

    # get predictions by assigning nearest 8 neighbors with euclidian
    max_dist = max(recall_list)
    Y = evaluation.assign_by_euclidian_at_k(X, T, max_dist)
    Y = torch.from_numpy(Y)

    # calculate recall @ 1, 2, 4, 8
    recall = []
    for k in recall_list:
        r_at_k = evaluation.calc_recall_at_k(T, Y, k)
        recall.append(r_at_k)
        logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))

    chmean = (2 * nmi * recall[0]) / (nmi + recall[0])
    logging.info("hmean: %s", str(chmean))

    eval_time = time.time() - eval_time
    logging.info('Eval time: %.2f' % eval_time)
    return nmi, recall
Esempio n. 9
0
def evaluate(model, dataloader, nb_classes):
    model_is_training = model.training
    model.eval()

    # calculate embeddings with model, also get labels (non-batch-wise)
    X, T = predict_batchwise(model, dataloader)

    # calculate NMI with kmeans clustering
    nmi = evaluation.calc_normalized_mutual_information(
        T, evaluation.cluster_by_kmeans(X, nb_classes))
    logging.info("NMI: {:.3f}".format(nmi * 100))

    # get predictions by assigning nearest 8 neighbors with euclidian
    Y = evaluation.assign_by_euclidian_at_k(X, T, 8)

    # calculate recall @ 1, 2, 4, 8
    recall = []
    for k in [1, 2, 4, 8]:
        r_at_k = evaluation.calc_recall_at_k(T, Y, k)
        recall.append(r_at_k)
        logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k))
    model.train(model_is_training)  # revert to previous training state
    return nmi, recall
Esempio n. 10
0
def evaluate(gpu_device,
             model,
             dataloader,
             nb_classes,
             name='tra_similar.jpg'):
    # calculate embeddings with model, also get labels (non-batch-wise)
    X4, X5, X52, T = predict_batchwise(gpu_device, model, dataloader)
    #import pdb
    #pdb.set_trace()
    X = torch.cat([X4, X5, X52], dim=1)
    #X = X4
    #X = F.normalize(X, p=2, dim=1)
    # calculate NMI with kmeans clustering
    nmi = 0.0
    if X.shape[0] < 10000:
        '''
        nmi = evaluation.calc_normalized_mutual_information(
        T, 
        evaluation.cluster_by_kmeans(
            X, nb_classes
        ))
        '''
        nmi = 1.0
        '''
        # get predictions by assigning nearest 8 neighbors with euclidian
        Y = evaluation.assign_by_euclidian_at_k(X4, T, 8)
        # calculate recall @ 1, 2, 4, 8
        recall = []
        for k in [1, 2, 4, 8]:
            r_at_k = evaluation.calc_recall_at_k(T, Y, k)
            recall.append(r_at_k)
        Y = evaluation.assign_by_euclidian_at_k(X5, T, 8)
        # calculate recall @ 1, 2, 4, 8
        recall = []
        for k in [1, 2, 4, 8]:
            r_at_k = evaluation.calc_recall_at_k(T, Y, k)
            recall.append(r_at_k)
        '''
        #Y = evaluation.assign_by_euclidian_at_k(X, T, 8)
        Y, indices = evaluation.assign_by_euclidian_at_k_hist(X, T, 8, name)
        '''
        result = open('result.txt', 'w')
        imgs = dataloader.dataset.imgs
        for i in range(indices.shape[0]):
            result.write('query:'+imgs[i][0]+'\n')
            for j in indices[i][:4]:
                result.write(imgs[j][0]+'\n')
            result.write('\n')
        result.close()
        import pdb;pdb.set_trace()
        '''
        # calculate recall @ 1, 2, 4, 8
        recall = []
        for k in [1, 2, 4, 8]:
            r_at_k = evaluation.calc_recall_at_k(T, Y, k)
            recall.append(r_at_k)
        return nmi, recall
    else:
        # get predictions by assigning nearest 8 neighbors with euclidian
        Y = evaluation.assign_by_euclidian_at_k(X, T, 1000)

        # calculate recall @ 1, 10, 100, 1000
        recall = []
        for k in [1, 10, 100, 1000]:
            r_at_k = evaluation.calc_recall_at_k(T, Y, k)
            recall.append(r_at_k)
        return nmi, recall
Esempio n. 11
0
    def validation_epoch_end(self, outputs: Dict[str, Any]) -> None:
        """Compute metrics on the full validation set.

        Args:
            outputs (Dict[str, Any]): Dict of values collected over each batch put through model.eval()(..)
        """

        val_Xs = torch.cat([h["Xs"] for h in outputs])
        val_Ts = torch.cat([h["Ts"] for h in outputs])
        val_indexes = torch.cat([h["index"] for h in outputs])
        Y = assign_by_euclidian_at_k(val_Xs.cpu(), val_Ts.cpu(), 8)
        Y = torch.from_numpy(Y)

        # Return early when PL is running the sanity check.
        if self.trainer.running_sanity_check:
            return

        # Compute and Log R@k
        recall = []
        logs = {}
        for k in [1, 2, 4, 8]:
            r_at_k = 100 * calc_recall_at_k(val_Ts.cpu(), Y, k)
            recall.append(r_at_k)
            logs[f"val_R@{k}"] = r_at_k
        self.log_dict(logs)

        # Compute and log NMI
        nmi = 100 * calc_normalized_mutual_information(
            val_Ts.cpu(), cluster_by_kmeans(val_Xs.cpu(), self.hparams.num_classes)
        )
        self.log_dict({"NMI": nmi})

        # Inspect the embedding space in 2 and 3 dimensions.
        if 2 in self.hparams.vis_dim:
            pca = PCA(2)
            projected = pca.fit_transform(val_Xs.cpu())
            proxies = pca.transform(self.proxies.detach().cpu())
            fig_embedded_data = go.Figure()
            for cls_idx, cls_name in enumerate(self.val_dataset.classes):
                x_s = [
                    o
                    for i, o in enumerate(projected[:, 0])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                y_s = [
                    o
                    for i, o in enumerate(projected[:, 1])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                marker_color = colors_by_name[cls_idx % len(colors_by_name)]
                fig_embedded_data.add_scatter(
                    x=x_s,
                    y=y_s,
                    marker_color=marker_color,
                    text=cls_name,
                    name=cls_name,
                    mode="markers",
                )

            wandb.log({"Embedding of Validation Dataset 2D": fig_embedded_data})

            fig_embedded_proxies = go.Figure()
            for cls_name, x_y in zip(self.val_dataset.classes, proxies):
                x_s = [
                    o
                    for i, o in enumerate(proxies[:, 0])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                y_s = [
                    o
                    for i, o in enumerate(proxies[:, 1])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                marker_color = colors_by_name[
                    self.val_dataset.classes.index(cls_name) % len(colors_by_name)
                ]
                fig_embedded_proxies.add_scatter(
                    x=[x_y[0]],
                    y=[x_y[1]],
                    marker_color=marker_color,
                    text=cls_name,
                    name=cls_name,
                    mode="markers",
                )
            wandb.log(
                {"Embedding of Proxies (on validation data) 2D": fig_embedded_proxies}
            )

        if 3 in self.hparams.vis_dim:
            pca = PCA(3)
            projected = pca.fit_transform(val_Xs.cpu())
            proxies = pca.transform(self.proxies.detach().cpu())
            fig_embedded_data = go.Figure()

            for cls_idx, cls_name in enumerate(self.val_dataset.classes):
                x_s = [
                    o
                    for i, o in enumerate(projected[:, 0])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                y_s = [
                    o
                    for i, o in enumerate(projected[:, 1])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                z_s = [
                    o
                    for i, o in enumerate(projected[:, 2])
                    if self.val_dataset.get_label_description(Y[i, 0]) == cls_name
                ]
                marker_color = colors_by_name[cls_idx % len(colors_by_name)]
                fig_embedded_data.add_scatter3d(
                    x=x_s,
                    y=y_s,
                    z=z_s,
                    marker_color=marker_color,
                    text=cls_name,
                    name=cls_name,
                    mode="markers",
                )
            wandb.log({"Embedding of Validation Dataset 3D": fig_embedded_data})
            fig_embedded_proxies = go.Figure()
            for cls_name, x_y_z in zip(self.val_dataset.classes, proxies):
                marker_color = colors_by_name[
                    self.val_dataset.classes.index(cls_name) % len(colors_by_name)
                ]
                fig_embedded_proxies.add_scatter3d(
                    x=[x_y_z[0]],
                    y=[x_y_z[1]],
                    z=[x_y_z[2]],
                    marker_color=marker_color,
                    text=cls_name,
                    name=cls_name,
                    mode="markers",
                )
            wandb.log(
                {"Embedding of Proxies (on validation data) 3D": fig_embedded_proxies}
            )

        cm = confusion_matrix(
            y_true=val_Ts.cpu().numpy(),
            y_pred=Y[:, 0].cpu().numpy(),
            labels=[o for o in range(0, len(self.val_dataset.classes))],
        )
        fig_cm = ff.create_annotated_heatmap(
            cm,
            x=self.val_dataset.classes,
            y=self.val_dataset.classes,
            annotation_text=cm.astype(str),
            colorscale="Viridis",
        )
        wandb.log({"Confusion Matrix": fig_cm})

        # Log a query and top 4 selction
        image_dict = {}
        top_k_indices = torch.cdist(val_Xs, val_Xs).topk(5, largest=False).indices
        max_idx = len(top_k_indices) - 1
        for i, example_result in enumerate(
            top_k_indices[[randint(0, max_idx) for _ in range(0, 5)]]
        ):

            image_dict[f"global step {self.global_step} example: {i}"] = [
                wandb.Image(
                    Image.open(
                        self.val_dataset.im_paths[val_indexes[example_result[0]]]
                    ),
                    caption=f"query: {self.val_dataset.get_label_description(self.val_dataset.get_label(val_indexes[example_result[0]]))}",
                )
            ]
            image_dict[f"global step {self.global_step} example: {i}"].extend(
                [
                    wandb.Image(
                        Image.open(self.val_dataset.im_paths[val_indexes[idx]]),
                        caption=f"retrival:({rank}) {self.val_dataset.get_label_description(self.val_dataset.get_label(val_indexes[idx]))}",
                    )
                    for rank, idx in enumerate(example_result[1:])
                ]
            )
        self.logger.experiment.log(image_dict)

        # Since validation set samples are iid I prefer looking at a histogram of validation losses.
        wandb.log(
            {f"val_loss_hist": wandb.Histogram([[h["val_loss"] for h in outputs]])}
        )
 def update(self, fc7, batch):
     emb, labels = predict_batchwise(None, fc7=fc7, batch=batch)
     k_pred_labels = evaluation.assign_by_euclidian_at_k(emb, labels, 1000)
     self.recall += torch.tensor(
         evaluation.calc_recall_at_k(labels, k_pred_labels, k=1))
     self.total += 1