Exemple #1
0
    def update_centroids(self):

        gather = self._points.gather(R.find_indices(self._label,
                                                    Tensor([0]))).mean(axis=1)
        for i in range(1, self.k):
            ind = R.find_indices(self._label, Tensor([i]))
            gat = R.gather(self._points, ind).mean(axis=1)
            gather = R.concat(gather, gat)
        self.centroids = gather.reshape(
            shape=[self.k, len(self._points.output[0])])
Exemple #2
0
    def update_centroids(self):

        gather = R.gather(self.points, R.find_indices(self.label,
                                                      values=[0])).mean(axis=1)
        for i in range(1, self.k):
            ind = R.find_indices(self.label, values=[i])
            gat = R.gather(self.points, ind).mean(axis=1)
            gather = R.concat(gather, gat)
        self.centroids = gather.reshape(
            shape=[self.k, len(self.points.output[0])])
        inform_server()
Exemple #3
0
    def update_centroids(self, points, label):
        while label.status != 'computed':
            pass
        if 0 in label.output:
            gather = R.gather(points, R.find_indices(label,
                                                     values=[0])).mean(axis=1)
        else:
            gather = R.gather(self.centroids, Tensor([0])).expand_dims(axis=0)

        for i in range(1, self.k):
            if i in label.output:
                ind = R.find_indices(label, values=[i])
                gat = R.gather(points, ind).mean(axis=1)
            else:
                gat = R.gather(self.centroids, Tensor([i])).expand_dims(axis=0)
            gather = R.concat(gather, gat)

            while gat.status != 'computed':
                pass
        return gather.reshape(shape=[self.k, len(self.points.output[0])])
Exemple #4
0
    def find_split(self, X, y):
        ideal_col = None
        ideal_threshold = None

        num_observations = y.shape_().gather(R.Scalar(0))
        while num_observations.status != 'computed':
            pass
        num_observations = int(num_observations.output)
        if num_observations <= 1:
            return ideal_col, ideal_threshold

        y = y.reshape(shape=[num_observations])
        count_in_parent = R.Tensor([])
        for c in range(self.num_classes):
            count_in_parent = count_in_parent.concat(
                R.sum(R.equal(y, R.Scalar(c))).expand_dims())
        gini = R.square(
            count_in_parent.foreach(operation='div', params=num_observations))
        best_gini = R.sub(R.Scalar(1.0), R.sum(gini))
        temp_y = y.reshape(shape=[num_observations, 1])

        for col in range(self.num_features):
            temp_X = R.gather(
                R.transpose(X),
                R.Scalar(col)).reshape(shape=[num_observations, 1])
            all_data = R.concat(temp_X, temp_y, axis=1)

            column = R.gather(R.transpose(X), R.Scalar(col))
            ind = column.find_indices(R.sort(R.unique(column)))
            while ind.status != "computed":
                pass
            inform_server()
            sorted_data = R.Tensor([])
            for i in ind.output:
                sorted_data = sorted_data.concat(all_data.gather(
                    R.Tensor(i)))  # need to find another way to sort
            sorted_data_tpose = sorted_data.transpose()
            thresholds = sorted_data_tpose.gather(R.Scalar(0)).gather(
                R.Scalar(0))
            obs_classes = sorted_data_tpose.gather(R.Scalar(1)).gather(
                R.Scalar(0))

            num_left = R.Tensor([0] * self.num_classes)  # need ops
            num_right = count_in_parent
            for i in range(1, num_observations):
                class_ = R.gather(obs_classes, R.Tensor([i - 1]))
                classencoding = R.one_hot_encoding(
                    class_, depth=self.num_classes).gather(R.Scalar(0))
                num_left = num_left.add(classencoding)
                num_right = num_right.sub(classencoding)

                gini_left = R.sub(
                    R.Scalar(1),
                    R.sum(
                        R.square(R.foreach(num_left, operation='div',
                                           params=i))))
                gini_right = R.sub(
                    R.Scalar(1),
                    R.sum(
                        R.square(
                            R.foreach(num_right,
                                      operation='div',
                                      params=num_observations - i))))
                gini = R.div(
                    R.add(
                        R.multiply(R.Scalar(i), gini_left),
                        R.multiply(R.Scalar(num_observations - i),
                                   gini_right)), R.Scalar(num_observations))

                decision1 = R.logical_and(thresholds.gather(R.Tensor([i])),
                                          thresholds.gather(R.Tensor([i - 1])))
                decision2 = gini.less(best_gini)
                while decision2.status != "computed":
                    pass

                print(decision2.output == 1)
                if decision2.output == 1 and decision1 != 1:
                    best_gini = gini
                    ideal_col = col
                    ideal_threshold = R.div(
                        R.add(thresholds.gather(R.Tensor([i])),
                              thresholds.gather(R.Tensor([i - 1]))),
                        R.Scalar(2))
        print(ideal_col, ideal_threshold)
        return ideal_col, ideal_threshold