Beispiel #1
0
def data_clustering(data, distance=Orange.distance.Euclidean, linkage=AVERAGE):
    """
    Return the hierarchical clustering of the data set's rows.

    :param Orange.data.Table data: Data set to cluster.
    :param Orange.distance.Distance distance: A distance measure.
    :param str linkage:
    """
    matrix = distance(data)
    return dist_matrix_clustering(matrix, linkage=linkage)
Beispiel #2
0
 def compute_distances(self):
     if self.data is None or len(self.data) == 0 \
             or self.reference is None or len(self.reference) == 0:
         self.distances = None
         return
     distance = METRICS[self.distance_index][1]
     n_ref = len(self.reference)
     all_data = Table.concatenate([self.reference, self.data], 0)
     pp_all_data = Impute()(RemoveNaNColumns()(all_data))
     pp_reference, pp_data = pp_all_data[:n_ref], pp_all_data[n_ref:]
     self.distances = distance(pp_data, pp_reference).min(axis=1)
Beispiel #3
0
def feature_clustering(data, distance=Orange.distance.PearsonR,
                       linkage=AVERAGE):
    """
    Return the hierarchical clustering of the data set's columns.

    :param Orange.data.Table data: Data set to cluster.
    :param Orange.distance.Distance distance: A distance measure.
    :param str linkage:
    """
    matrix = distance(data, axis=1)
    return dist_matrix_clustering(matrix, linkage=linkage)
Beispiel #4
0
def data_clustering(data, distance=Orange.distance.Euclidean,
                    linkage=AVERAGE):
    """
    Return the hierarchical clustering of the data set's rows.

    :param Orange.data.Table data: Data set to cluster.
    :param Orange.distance.Distance distance: A distance measure.
    :param str linkage:
    """
    matrix = distance(data)
    return dist_matrix_clustering(matrix, linkage=linkage)
Beispiel #5
0
def feature_clustering(data, distance=Orange.distance.PearsonR,
                       linkage=AVERAGE):
    """
    Return the hierarchical clustering of the data set's columns.

    :param Orange.data.Table data: Data set to cluster.
    :param Orange.distance.Distance distance: A distance measure.
    :param str linkage:
    """
    matrix = distance(data, axis=0)
    return dist_matrix_clustering(matrix, linkage=linkage)
Beispiel #6
0
    def compute_distances(self):
        self.Error.diff_domains.clear()
        if self.data is None or len(self.data) == 0 \
                or self.reference is None or len(self.reference) == 0:
            self.distances = None
            return
        if self.reference.domain != self.data.domain:
            self.Error.diff_domains()
            self.distances = None
            return

        distance = METRICS[self.distance_index][1]
        n_ref = len(self.reference)
        all_data = Table.concatenate([self.reference, self.data], 0)
        pp_all_data = Impute()(RemoveNaNColumns()(all_data))
        pp_reference, pp_data = pp_all_data[:n_ref], pp_all_data[n_ref:]
        self.distances = distance(pp_data, pp_reference).min(axis=1)
 def apply(self):
     if self.data is None or self.reference is None:
         self.send("Neighbors", None)
         return
     distance = METRICS[self.distance_index][1]
     n_data, n_ref = len(self.data), len(self.reference)
     all_data = Table.concatenate([self.reference, self.data], 0)
     pp_all_data = Impute()(RemoveNaNColumns()(all_data))
     pp_data, pp_reference = pp_all_data[n_ref:], pp_all_data[:n_ref]
     dist = distance(np.vstack((pp_data, pp_reference)))[:n_data, n_data:]
     data = self._add_similarity(self.data, dist)
     sorted_indices = list(np.argsort(dist.flatten()))[::-1]
     indices = []
     while len(sorted_indices) > 0 and len(indices) < self.n_neighbors:
         index = int(sorted_indices.pop() / len(self.reference))
         if (self.data[index] not in self.reference or
                 not self.exclude_reference) and index not in indices:
             indices.append(index)
     neighbors = data[indices]
     neighbors.attributes = self.data.attributes
     self.send("Neighbors", neighbors)
 def apply(self):
     if self.data is None or self.reference is None:
         self.send("Neighbors", None)
         return
     distance = METRICS[self.distance_index][1]
     n_data, n_ref = len(self.data), len(self.reference)
     all_data = Table.concatenate([self.reference, self.data], 0)
     pp_all_data = Impute()(RemoveNaNColumns()(all_data))
     pp_data, pp_reference = pp_all_data[n_ref:], pp_all_data[:n_ref]
     dist = distance(np.vstack((pp_data, pp_reference)))[:n_data, n_data:]
     data = self._add_similarity(self.data, dist)
     sorted_indices = list(np.argsort(dist.flatten()))[::-1]
     indices = []
     while len(sorted_indices) > 0 and len(indices) < self.n_neighbors:
         index = int(sorted_indices.pop() / len(self.reference))
         if (self.data[index] not in self.reference
                 or not self.exclude_reference) and index not in indices:
             indices.append(index)
     neighbours = data[indices]
     neighbours.attributes = self.data.attributes
     self.send("Neighbors", neighbours)
Beispiel #9
0
    def compute_distances(self):
        self.Error.diff_domains.clear()
        if self.data is None or len(self.data) == 0 \
                or self.reference is None or len(self.reference) == 0:
            self.distances = None
            return
        if set(self.reference.domain.attributes) != \
                set(self.data.domain.attributes):
            self.Error.diff_domains()
            self.distances = None
            return

        distance = METRICS[self.distance_index][1]
        n_ref = len(self.reference)

        # comparing only attributes, no metas and class-vars
        new_domain = Domain(self.data.domain.attributes)
        reference = self.reference.transform(new_domain)
        data = self.data.transform(new_domain)

        all_data = Table.concatenate([reference, data], 0)
        pp_all_data = Impute()(RemoveNaNColumns()(all_data))
        pp_reference, pp_data = pp_all_data[:n_ref], pp_all_data[n_ref:]
        self.distances = distance(pp_data, pp_reference).min(axis=1)