def grow_tree(self, X, y, depth=0): pop_per_class = R.Tensor([]) for c in range(self.num_classes): pop_per_class = pop_per_class.concat( R.sum(R.equal(y, R.Scalar(c))).expand_dims()) predicted_class = R.argmax(pop_per_class) node = Node(predicted_class=predicted_class, depth=depth) node.samples = R.shape(y).gather(R.Scalar(0)) if depth < self.max_depth: #col, threshold = self.find_split(X, y) col, threshold = 0, R.Tensor([12.895]) ''' ''' decision = R.Scalar(col).logical_and(threshold) while decision.status != "computed": pass if decision.output == 1: indices_left = X.transpose().gather( R.Scalar(col)).less(threshold) X_left, y_left = X.gather(indices_left), y.gather(indices_left) indices_right = X.transpose().gather( R.Scalar(col)).greater_equal(threshold) X_right, y_right = X.gather(indices_right), y.gather( indices_right) node.feature_index = col node.threshold = threshold node.left = self.grow_tree(X_left, y_left, depth + 1) node.left.leftbranch = True node.right = self.grow_tree(X_right, y_right, depth + 1) node.right.rightbranch = True return node
def get_TP_TN_FN_FP(true_labels, pred_labels): li = [None, None, None, None] var = R.equal(true_labels, pred_labels) TP = R.logical_and(true_labels, pred_labels) TN = R.logical_not(R.logical_or(true_labels, pred_labels)) FN = R.logical_not(R.logical_or(pred_labels, var)) FP = R.logical_and(pred_labels, R.logical_not(true_labels)) return [R.sum(TP), R.sum(TN), R.sum(FN), R.sum(FP)]
def accuracy(y_true, y_pred): if not isinstance(y_true, R.Tensor): if not isinstance(y_true, R.Op): y_true = R.Tensor(y_true) if not isinstance(y_pred, R.Tensor): if not isinstance(y_pred, R.Op): y_pred = R.Tensor(y_pred) return R.div(R.sum(R.equal(y_pred, y_true)), y_pred.shape_())
def grow_tree(self, X, y, depth=0): pop_per_class = R.Tensor([]) for c in range(self.num_classes): pop_per_class = pop_per_class.concat( R.sum(R.equal(y, R.Scalar(c))).expand_dims()) predicted_class = R.argmax(pop_per_class) while predicted_class.status != "computed": pass node = Node(predicted_class=predicted_class.output, depth=depth) node.samples = R.shape(y).gather(R.Scalar(0)) if depth < self.max_depth: col, threshold = self.find_split(X, y) while threshold.status != "computed": pass z = X.shape_() z1 = y.shape_() while z1.status != "computed": pass if col is not None and threshold.output is not [None]: indices_left = X.transpose().gather( R.Scalar(col)).less(threshold) X_left = X.gather( R.find_indices(indices_left, R.Tensor( [1])).reshape(shape=R.sum(indices_left).expand_dims())) y_left = y.gather( R.find_indices(indices_left, R.Tensor( [1])).reshape(shape=R.sum(indices_left).expand_dims())) indices_right = X.transpose().gather( R.Scalar(col)).greater_equal(threshold) X_right = X.gather( R.find_indices(indices_right, R.Tensor([ 1 ])).reshape(shape=R.sum(indices_right).expand_dims())) y_right = y.gather( R.find_indices(indices_right, R.Tensor([ 1 ])).reshape(shape=R.sum(indices_right).expand_dims())) node.feature_index = col node.threshold = threshold node.left = self.grow_tree(X_left, y_left, depth + 1) node.left.leftbranch = True node.right = self.grow_tree(X_right, y_right, depth + 1) node.right.rightbranch = True return node
def find_split(self, X, y): ideal_col = None ideal_threshold = None num_observations = y.shape_().gather(R.Scalar(0)) while num_observations.status != 'computed': pass num_observations = int(num_observations.output) if num_observations <= 1: return ideal_col, ideal_threshold y = y.reshape(shape=[num_observations]) count_in_parent = R.Tensor([]) for c in range(self.num_classes): count_in_parent = count_in_parent.concat( R.sum(R.equal(y, R.Scalar(c))).expand_dims()) gini = R.square( count_in_parent.foreach(operation='div', params=num_observations)) best_gini = R.sub(R.Scalar(1.0), R.sum(gini)) temp_y = y.reshape(shape=[num_observations, 1]) for col in range(self.num_features): temp_X = R.gather( R.transpose(X), R.Scalar(col)).reshape(shape=[num_observations, 1]) all_data = R.concat(temp_X, temp_y, axis=1) column = R.gather(R.transpose(X), R.Scalar(col)) ind = column.find_indices(R.sort(R.unique(column))) while ind.status != "computed": pass inform_server() sorted_data = R.Tensor([]) for i in ind.output: sorted_data = sorted_data.concat(all_data.gather( R.Tensor(i))) # need to find another way to sort sorted_data_tpose = sorted_data.transpose() thresholds = sorted_data_tpose.gather(R.Scalar(0)).gather( R.Scalar(0)) obs_classes = sorted_data_tpose.gather(R.Scalar(1)).gather( R.Scalar(0)) num_left = R.Tensor([0] * self.num_classes) # need ops num_right = count_in_parent for i in range(1, num_observations): class_ = R.gather(obs_classes, R.Tensor([i - 1])) classencoding = R.one_hot_encoding( class_, depth=self.num_classes).gather(R.Scalar(0)) num_left = num_left.add(classencoding) num_right = num_right.sub(classencoding) gini_left = R.sub( R.Scalar(1), R.sum( R.square(R.foreach(num_left, operation='div', params=i)))) gini_right = R.sub( R.Scalar(1), R.sum( R.square( R.foreach(num_right, operation='div', params=num_observations - i)))) gini = R.div( R.add( R.multiply(R.Scalar(i), gini_left), R.multiply(R.Scalar(num_observations - i), gini_right)), R.Scalar(num_observations)) decision1 = R.logical_and(thresholds.gather(R.Tensor([i])), thresholds.gather(R.Tensor([i - 1]))) decision2 = gini.less(best_gini) while decision2.status != "computed": pass print(decision2.output == 1) if decision2.output == 1 and decision1 != 1: best_gini = gini ideal_col = col ideal_threshold = R.div( R.add(thresholds.gather(R.Tensor([i])), thresholds.gather(R.Tensor([i - 1]))), R.Scalar(2)) print(ideal_col, ideal_threshold) return ideal_col, ideal_threshold
def recall(true_labels, pred_labels): var = R.equal(true_labels, pred_labels) [TP, TN, FN, FP] = get_TP_TN_FN_FP(true_labels, pred_labels) return R.div(TP, R.add(TP, FN))
def precision(true_labels, pred_labels): var = R.sum(R.equal(true_labels, pred_labels)) [TP, TN, FN, FP] = get_TP_TN_FN_FP(true_labels, pred_labels) return R.div(TP, R.add(TP, FP))