def data_clustering(data, distance=Orange.distance.Euclidean, linkage=AVERAGE): """ Return the hierarchical clustering of the data set's rows. :param Orange.data.Table data: Data set to cluster. :param Orange.distance.Distance distance: A distance measure. :param str linkage: """ matrix = distance(data) return dist_matrix_clustering(matrix, linkage=linkage)
def compute_distances(self): if self.data is None or len(self.data) == 0 \ or self.reference is None or len(self.reference) == 0: self.distances = None return distance = METRICS[self.distance_index][1] n_ref = len(self.reference) all_data = Table.concatenate([self.reference, self.data], 0) pp_all_data = Impute()(RemoveNaNColumns()(all_data)) pp_reference, pp_data = pp_all_data[:n_ref], pp_all_data[n_ref:] self.distances = distance(pp_data, pp_reference).min(axis=1)
def feature_clustering(data, distance=Orange.distance.PearsonR, linkage=AVERAGE): """ Return the hierarchical clustering of the data set's columns. :param Orange.data.Table data: Data set to cluster. :param Orange.distance.Distance distance: A distance measure. :param str linkage: """ matrix = distance(data, axis=1) return dist_matrix_clustering(matrix, linkage=linkage)
def feature_clustering(data, distance=Orange.distance.PearsonR, linkage=AVERAGE): """ Return the hierarchical clustering of the data set's columns. :param Orange.data.Table data: Data set to cluster. :param Orange.distance.Distance distance: A distance measure. :param str linkage: """ matrix = distance(data, axis=0) return dist_matrix_clustering(matrix, linkage=linkage)
def compute_distances(self): self.Error.diff_domains.clear() if self.data is None or len(self.data) == 0 \ or self.reference is None or len(self.reference) == 0: self.distances = None return if self.reference.domain != self.data.domain: self.Error.diff_domains() self.distances = None return distance = METRICS[self.distance_index][1] n_ref = len(self.reference) all_data = Table.concatenate([self.reference, self.data], 0) pp_all_data = Impute()(RemoveNaNColumns()(all_data)) pp_reference, pp_data = pp_all_data[:n_ref], pp_all_data[n_ref:] self.distances = distance(pp_data, pp_reference).min(axis=1)
def apply(self): if self.data is None or self.reference is None: self.send("Neighbors", None) return distance = METRICS[self.distance_index][1] n_data, n_ref = len(self.data), len(self.reference) all_data = Table.concatenate([self.reference, self.data], 0) pp_all_data = Impute()(RemoveNaNColumns()(all_data)) pp_data, pp_reference = pp_all_data[n_ref:], pp_all_data[:n_ref] dist = distance(np.vstack((pp_data, pp_reference)))[:n_data, n_data:] data = self._add_similarity(self.data, dist) sorted_indices = list(np.argsort(dist.flatten()))[::-1] indices = [] while len(sorted_indices) > 0 and len(indices) < self.n_neighbors: index = int(sorted_indices.pop() / len(self.reference)) if (self.data[index] not in self.reference or not self.exclude_reference) and index not in indices: indices.append(index) neighbors = data[indices] neighbors.attributes = self.data.attributes self.send("Neighbors", neighbors)
def apply(self): if self.data is None or self.reference is None: self.send("Neighbors", None) return distance = METRICS[self.distance_index][1] n_data, n_ref = len(self.data), len(self.reference) all_data = Table.concatenate([self.reference, self.data], 0) pp_all_data = Impute()(RemoveNaNColumns()(all_data)) pp_data, pp_reference = pp_all_data[n_ref:], pp_all_data[:n_ref] dist = distance(np.vstack((pp_data, pp_reference)))[:n_data, n_data:] data = self._add_similarity(self.data, dist) sorted_indices = list(np.argsort(dist.flatten()))[::-1] indices = [] while len(sorted_indices) > 0 and len(indices) < self.n_neighbors: index = int(sorted_indices.pop() / len(self.reference)) if (self.data[index] not in self.reference or not self.exclude_reference) and index not in indices: indices.append(index) neighbours = data[indices] neighbours.attributes = self.data.attributes self.send("Neighbors", neighbours)
def compute_distances(self): self.Error.diff_domains.clear() if self.data is None or len(self.data) == 0 \ or self.reference is None or len(self.reference) == 0: self.distances = None return if set(self.reference.domain.attributes) != \ set(self.data.domain.attributes): self.Error.diff_domains() self.distances = None return distance = METRICS[self.distance_index][1] n_ref = len(self.reference) # comparing only attributes, no metas and class-vars new_domain = Domain(self.data.domain.attributes) reference = self.reference.transform(new_domain) data = self.data.transform(new_domain) all_data = Table.concatenate([reference, data], 0) pp_all_data = Impute()(RemoveNaNColumns()(all_data)) pp_reference, pp_data = pp_all_data[:n_ref], pp_all_data[n_ref:] self.distances = distance(pp_data, pp_reference).min(axis=1)