def update_centroids(self): gather = self._points.gather(R.find_indices(self._label, Tensor([0]))).mean(axis=1) for i in range(1, self.k): ind = R.find_indices(self._label, Tensor([i])) gat = R.gather(self._points, ind).mean(axis=1) gather = R.concat(gather, gat) self.centroids = gather.reshape( shape=[self.k, len(self._points.output[0])])
def update_centroids(self): gather = R.gather(self.points, R.find_indices(self.label, values=[0])).mean(axis=1) for i in range(1, self.k): ind = R.find_indices(self.label, values=[i]) gat = R.gather(self.points, ind).mean(axis=1) gather = R.concat(gather, gat) self.centroids = gather.reshape( shape=[self.k, len(self.points.output[0])]) inform_server()
def update_centroids(self, points, label): while label.status != 'computed': pass if 0 in label.output: gather = R.gather(points, R.find_indices(label, values=[0])).mean(axis=1) else: gather = R.gather(self.centroids, Tensor([0])).expand_dims(axis=0) for i in range(1, self.k): if i in label.output: ind = R.find_indices(label, values=[i]) gat = R.gather(points, ind).mean(axis=1) else: gat = R.gather(self.centroids, Tensor([i])).expand_dims(axis=0) gather = R.concat(gather, gat) while gat.status != 'computed': pass return gather.reshape(shape=[self.k, len(self.points.output[0])])
def find_split(self, X, y): ideal_col = None ideal_threshold = None num_observations = y.shape_().gather(R.Scalar(0)) while num_observations.status != 'computed': pass num_observations = int(num_observations.output) if num_observations <= 1: return ideal_col, ideal_threshold y = y.reshape(shape=[num_observations]) count_in_parent = R.Tensor([]) for c in range(self.num_classes): count_in_parent = count_in_parent.concat( R.sum(R.equal(y, R.Scalar(c))).expand_dims()) gini = R.square( count_in_parent.foreach(operation='div', params=num_observations)) best_gini = R.sub(R.Scalar(1.0), R.sum(gini)) temp_y = y.reshape(shape=[num_observations, 1]) for col in range(self.num_features): temp_X = R.gather( R.transpose(X), R.Scalar(col)).reshape(shape=[num_observations, 1]) all_data = R.concat(temp_X, temp_y, axis=1) column = R.gather(R.transpose(X), R.Scalar(col)) ind = column.find_indices(R.sort(R.unique(column))) while ind.status != "computed": pass inform_server() sorted_data = R.Tensor([]) for i in ind.output: sorted_data = sorted_data.concat(all_data.gather( R.Tensor(i))) # need to find another way to sort sorted_data_tpose = sorted_data.transpose() thresholds = sorted_data_tpose.gather(R.Scalar(0)).gather( R.Scalar(0)) obs_classes = sorted_data_tpose.gather(R.Scalar(1)).gather( R.Scalar(0)) num_left = R.Tensor([0] * self.num_classes) # need ops num_right = count_in_parent for i in range(1, num_observations): class_ = R.gather(obs_classes, R.Tensor([i - 1])) classencoding = R.one_hot_encoding( class_, depth=self.num_classes).gather(R.Scalar(0)) num_left = num_left.add(classencoding) num_right = num_right.sub(classencoding) gini_left = R.sub( R.Scalar(1), R.sum( R.square(R.foreach(num_left, operation='div', params=i)))) gini_right = R.sub( R.Scalar(1), R.sum( R.square( R.foreach(num_right, operation='div', params=num_observations - i)))) gini = R.div( R.add( R.multiply(R.Scalar(i), gini_left), R.multiply(R.Scalar(num_observations - i), gini_right)), R.Scalar(num_observations)) decision1 = R.logical_and(thresholds.gather(R.Tensor([i])), thresholds.gather(R.Tensor([i - 1]))) decision2 = gini.less(best_gini) while decision2.status != "computed": pass print(decision2.output == 1) if decision2.output == 1 and decision1 != 1: best_gini = gini ideal_col = col ideal_threshold = R.div( R.add(thresholds.gather(R.Tensor([i])), thresholds.gather(R.Tensor([i - 1]))), R.Scalar(2)) print(ideal_col, ideal_threshold) return ideal_col, ideal_threshold