Beispiel #1
0
 def fit(self, X, y):
     y, classes = self._initialize(X, y)
     # ====== compute classes' average ====== #
     enroll = compute_class_avg(X, y, classes, sorting=True)
     M = X.mean(axis=0).reshape(1, -1)
     self._mean = M
     if self._centering:
         X = X - M
     # ====== WCCN ====== #
     if self._wccn:
         W = compute_wccn(X, y, classes=None,
                          class_avg=enroll)  # [feat_dim, feat_dim]
     else:
         W = 1
     self._W = W
     # ====== preprocess ====== #
     # whitening the data
     if self._wccn:
         X = np.dot(X, W)
     # length normalization
     if self._unit_length:
         X = length_norm(X, axis=-1)
     # linear discriminant analysis
     if self._lda is not None:
         self._lda.fit(X, y)  # [nb_classes, nb_classes - 1]
     # ====== enroll vecs ====== #
     self._enroll_vecs = self.normalize(enroll, concat=False)
     # ====== max min ====== #
     if self._lda is not None:
         X = self._lda.transform(X)
         X = length_norm(X, axis=-1, ord=2)
     vmin = X.min(0, keepdims=True)
     vmax = X.max(0, keepdims=True)
     self._vmin, self._vmax = vmin, vmax
     return self
Beispiel #2
0
 def fit(self, X, y):
   y, classes = self._initialize(X, y)
   # ====== compute classes' average ====== #
   enroll = compute_class_avg(X, y, classes, sorting=True)
   M = X.mean(axis=0).reshape(1, -1)
   self._mean = M
   if self._centering:
     X = X - M
   # ====== WCCN ====== #
   if self._wccn:
     W = compute_wccn(X, y, classes=None, class_avg=enroll) # [feat_dim, feat_dim]
   else:
     W = 1
   self._W = W
   # ====== preprocess ====== #
   # whitening the data
   if self._wccn:
     X = np.dot(X, W)
   # length normalization
   if self._unit_length:
     X = length_norm(X, axis=-1)
   # linear discriminant analysis
   if self._lda is not None:
     self._lda.fit(X, y) # [nb_classes, nb_classes - 1]
   # ====== enroll vecs ====== #
   self._enroll_vecs = self.normalize(enroll, concat=False)
   # ====== max min ====== #
   if self._lda is not None:
     X = self._lda.transform(X)
     X = length_norm(X, axis=-1, ord=2)
   vmin = X.min(0, keepdims=True)
   vmax = X.max(0, keepdims=True)
   self._vmin, self._vmax = vmin, vmax
   return self
Beispiel #3
0
 def normalize(self, X, concat=None):
     """
 Parameters
 ----------
 X : array [nb_samples, feat_dim]
 concat : {None, True, False}
   if not None, override the default `concat` attribute of
   this `VectorNormalizer`
 """
     if not self.is_fitted:
         raise RuntimeError("VectorNormalizer has not been fitted.")
     if concat is None:
         concat = self._concat
     if concat:
         X_org = X[:] if not isinstance(X, np.ndarray) else X
     else:
         X_org = None
     # ====== normalizing ====== #
     if self._centering:
         X = X - self._mean
     if self._wccn:
         X = np.dot(X, self.W)
     # ====== LDA ====== #
     if self._lda is not None:
         X_lda = self._lda.transform(X)  # [nb_classes, nb_classes - 1]
         # concat if necessary
         if concat:
             X = np.concatenate((X_lda, X_org), axis=-1)
         else:
             X = X_lda
     # ====== unit length normalization ====== #
     if self._unit_length:
         X = length_norm(X, axis=-1, ord=2)
     return X
Beispiel #4
0
 def normalize(self, X, concat=None):
   """
   Parameters
   ----------
   X : array [nb_samples, feat_dim]
   concat : {None, True, False}
     if not None, override the default `concat` attribute of
     this `VectorNormalizer`
   """
   if not self.is_fitted:
     raise RuntimeError("VectorNormalizer has not been fitted.")
   if concat is None:
     concat = self._concat
   if concat:
     X_org = X[:] if not isinstance(X, np.ndarray) else X
   else:
     X_org = None
   # ====== normalizing ====== #
   if self._centering:
     X = X - self._mean
   if self._wccn:
     X = np.dot(X, self.W)
   # ====== LDA ====== #
   if self._lda is not None:
     X_lda = self._lda.transform(X) # [nb_classes, nb_classes - 1]
     # concat if necessary
     if concat:
       X = np.concatenate((X_lda, X_org), axis=-1)
     else:
       X = X_lda
   # ====== unit length normalization ====== #
   if self._unit_length:
     X = length_norm(X, axis=-1, ord=2)
   return X
Beispiel #5
0
def plot_distance_heatmap(X,
                          labels,
                          lognorm=True,
                          colormap='hot',
                          ax=None,
                          legend_enable=True,
                          legend_loc='upper center',
                          legend_ncol=3,
                          legend_colspace=0.2,
                          fontsize=10,
                          cbar=True,
                          title=None):
    r"""

  Arguments:
    X : (n_samples, n_features). Coordination for scatter points
    labels : (n_samples,). List of classes index or name
  """
    import seaborn as sns
    from matplotlib import pyplot as plt
    from matplotlib.lines import Line2D
    from odin import backend as K

    # prepare data
    X = K.length_norm(X, axis=-1, epsilon=np.finfo(X.dtype).eps)
    ax = to_axis(ax)
    n_samples, n_dim = X.shape
    # processing labels
    labels = np.array(labels).ravel()
    assert labels.shape[0] == n_samples, "labels must be 1-D array."
    is_continuous = isinstance(labels[0],
                               Number) and int(labels[0]) != labels[0]
    # float values label (normalize -1 to 1) or binary classification
    if is_continuous:
        min_val = np.min(labels)
        max_val = np.max(labels)
        labels = 2 * (labels - min_val) / (max_val - min_val) - 1
        n_labels = 2
        labels_name = {'-1': 0, '+1': 1}
    else:
        labels_name = {name: i for i, name in enumerate(np.unique(labels))}
        n_labels = len(labels_name)
        labels = np.array([labels_name[name] for name in labels])
    # ====== sorting label and X ====== #
    order_X = np.vstack(
        [x for _, x in sorted(zip(labels, X), key=lambda pair: pair[0])])
    order_label = np.vstack(
        [y for y, x in sorted(zip(labels, X), key=lambda pair: pair[0])])
    distance = sp.spatial.distance_matrix(order_X, order_X)
    if bool(lognorm):
        distance = np.log1p(distance)
    min_non_zero = np.min(distance[np.nonzero(distance)])
    distance = np.clip(distance, a_min=min_non_zero, a_max=np.max(distance))
    # ====== convert data to image ====== #
    cm = plt.get_cmap(colormap)
    distance_img = cm(distance)
    # diagonal black line (i.e. zero distance)
    # for i in range(n_samples):
    #   distance_img[i, i] = (0, 0, 0, 1)
    # labels colormap
    width = max(int(0.032 * n_samples), 8)
    if n_labels == 2:
        cm = plt.get_cmap('bwr')
        horz_bar = np.repeat(cm(order_label.T), repeats=width, axis=0)
        vert_bar = np.repeat(cm(order_label), repeats=width, axis=1)
        all_colors = np.array((cm(np.min(labels)), cm(np.max(labels))))
    else:  # use seaborn color palette here is better
        cm = [i + (1., ) for i in sns.color_palette(n_colors=n_labels)]
        c = np.stack([cm[i] for i in order_label.ravel()])
        horz_bar = np.repeat(np.expand_dims(c, 0), repeats=width, axis=0)
        vert_bar = np.repeat(np.expand_dims(c, 1), repeats=width, axis=1)
        all_colors = cm
    # image
    final_img = np.zeros(shape=(n_samples + width, n_samples + width,
                                distance_img.shape[2]),
                         dtype=distance_img.dtype)
    final_img[width:, width:] = distance_img
    final_img[:width, width:] = horz_bar
    final_img[width:, :width] = vert_bar
    assert np.sum(final_img[:width, :width]) == 0, \
    "Something wrong with my spacial coordination when writing this code!"
    # ====== plotting ====== #
    ax.imshow(final_img)
    ax.axis('off')
    # ====== legend ====== #
    if bool(legend_enable):
        legend_elements = [
            Line2D([0], [0],
                   marker='o',
                   color=color,
                   label=name,
                   linewidth=0,
                   linestyle=None,
                   lw=0,
                   markerfacecolor=color,
                   markersize=fontsize // 2)
            for color, name in zip(all_colors, labels_name.keys())
        ]
        ax.legend(handles=legend_elements,
                  markerscale=1.,
                  scatterpoints=1,
                  scatteryoffsets=[0.375, 0.5, 0.3125],
                  loc=legend_loc,
                  bbox_to_anchor=(0.5, -0.01),
                  ncol=int(legend_ncol),
                  columnspacing=float(legend_colspace),
                  labelspacing=0.,
                  fontsize=fontsize - 1,
                  handletextpad=0.1)
    # ====== final configurations ====== #
    if title is not None:
        ax.set_title(str(title), fontsize=fontsize)
    if cbar:
        from odin.visual import plot_colorbar
        plot_colorbar(colormap,
                      vmin=np.min(distance),
                      vmax=np.max(distance),
                      ax=ax,
                      orientation='vertical')
    return ax
Beispiel #6
0
def plot_distance_heatmap(X,
                          labels,
                          labels_name=None,
                          lognorm=True,
                          colormap='hot',
                          ax=None,
                          legend_enable=True,
                          legend_loc='upper center',
                          legend_ncol=3,
                          legend_colspace=0.2,
                          fontsize=10,
                          show_colorbar=True,
                          title=None):
    r"""

  Parameters
  ----------
  X : (n_samples, n_features)
    coordination for scatter points

  labels : (n_samples, n_classes) or (n_samples, 1) or (n_samples,)
    list of classes index, in case of binary classification,
    the list can be float value represent confidence value for
    positive class.

  labels_name : (n_classes,)
    list of classes' name, this will be used to determine
    number of classes

  # visualize_distance(latent_scVI, labels, "scVI")
  """
    from matplotlib.lines import Line2D
    X = K.length_norm(X, axis=-1, epsilon=np.finfo(X.dtype).eps)

    ax = to_axis(ax)
    n_samples, n_dim = X.shape

    # processing labels
    labels = np.array(labels)
    if labels.ndim == 2:
        if labels.shape[1] == 1:
            labels = labels.ravel()
        else:
            labels = np.argmax(labels, axis=-1)
    elif labels.ndim > 2:
        raise ValueError("Only support 1-D or 2-D labels")

    labels_int = labels.astype('int32')
    # float values label (normalize -1 to 1) or binary classification
    if not np.all(labels_int == labels) or \
    (labels_name is not None and len(labels_name) == 2) or \
    (len(np.unique(labels)) == 2):
        min_val = np.min(labels)
        max_val = np.max(labels)
        labels = 2 * (labels - min_val) / (max_val - min_val) - 1
        label_colormap = 'bwr'
    # integer values label and multiple classes classification
    else:
        labels = labels_int
        label_colormap = 'Dark2'

    # ====== sorting label and X ====== #
    order_X = np.vstack(
        [x for _, x in sorted(zip(labels, X), key=lambda pair: pair[0])])
    order_label = np.vstack(
        [y for y, x in sorted(zip(labels, X), key=lambda pair: pair[0])])
    distance = sp.spatial.distance_matrix(order_X, order_X)
    if bool(lognorm):
        distance = np.log1p(distance)
    min_non_zero = np.min(distance[np.nonzero(distance)])
    distance = np.clip(distance, a_min=min_non_zero, a_max=np.max(distance))

    # ====== convert data to image ====== #
    cm = plt.get_cmap(colormap)
    distance_img = cm(distance)
    # diagonal black line (i.e. zero distance)
    for i in range(n_samples):
        distance_img[i, i] = (0, 0, 0, 1)

    cm = plt.get_cmap(label_colormap)
    width = max(int(0.032 * n_samples), 8)
    horz_bar = np.repeat(cm(order_label.T), repeats=width, axis=0)
    vert_bar = np.repeat(cm(order_label), repeats=width, axis=1)

    final_img = np.zeros(shape=(n_samples + width, n_samples + width,
                                distance_img.shape[2]),
                         dtype=distance_img.dtype)
    final_img[width:, width:] = distance_img
    final_img[:width, width:] = horz_bar
    final_img[width:, :width] = vert_bar
    assert np.sum(final_img[:width, :width]) == 0, \
    "Something wrong with my spacial coordination when writing this code!"
    # ====== plotting ====== #
    ax.imshow(final_img)
    ax.axis('off')
    # ====== legend ====== #
    if labels_name is not None and bool(legend_enable):
        cm = plt.get_cmap(label_colormap)
        labels_name = np.asarray(labels_name)
        if len(labels_name) == 2:  # binary (easy peasy)
            all_colors = np.array((cm(np.min(labels)), cm(np.max(labels))))
        else:  # multiple classes
            all_colors = cm(list(range(len(labels_name))))
        legend_elements = [
            Line2D([0], [0],
                   marker='o',
                   color=color,
                   label=name,
                   linewidth=0,
                   linestyle=None,
                   lw=0,
                   markerfacecolor=color,
                   markersize=fontsize // 2)
            for color, name in zip(all_colors, labels_name)
        ]
        ax.legend(handles=legend_elements,
                  markerscale=1.,
                  scatterpoints=1,
                  scatteryoffsets=[0.375, 0.5, 0.3125],
                  loc=legend_loc,
                  bbox_to_anchor=(0.5, -0.01),
                  ncol=int(legend_ncol),
                  columnspacing=float(legend_colspace),
                  labelspacing=0.,
                  fontsize=fontsize - 1,
                  handletextpad=0.1)
    # ====== final configurations ====== #
    if title is not None:
        ax.set_title(str(title), fontsize=fontsize)
    if show_colorbar:
        plot_colorbar(colormap,
                      vmin=np.min(distance),
                      vmax=np.max(distance),
                      ax=ax,
                      orientation='vertical')
    return ax