def predict(self, selection: List[int], dataset) -> List[Prediction]: output, unique_vals = self.processOutput() sels = np.array(selection) ids = dataset["id"] preds: List[Prediction] = [ Prediction( rank=rank_jaccard(vals.values, sels), intent=self.intentType, memberIds=self.getMemberIds(vals.values, ids), dimensions=self.getDimensionArr(), info=self.getClusterParams( dataset.loc[vals.values.astype(bool), self.getDimensionArr()].values, u, ), algorithm=self.algorithm, membership=getStats( self.getMemberIds(vals.values, ids), ids[sels.astype(bool)].tolist(), ), description=self.description, ) for (_, vals), u in zip(output.iteritems(), unique_vals) ] return preds
def range_intent(dataset, dimensions, selection, max_depth=None) -> List[Prediction]: selection = np.array(selection) selected_ids = dataset.loc[selection.astype(bool), "id"] data = dataset[dimensions] clf = tree.DecisionTreeClassifier(max_depth=max_depth) clf.fit(data, selection) rules = get_decision_paths(clf, data, selection) member_ids = dataset.id.tolist() mask: Any = get_mask_from_rules(data, rules) if mask is not None: member_ids = dataset.loc[mask, "id"].tolist() current_depth = clf.get_depth() intent = "Range" if max_depth is None else "SimplifiedRange" rank = 1 / (pow(current_depth, 2) + 1) algorithm = "DecisionTree" pred = Prediction( rank=rank, intent=intent, memberIds=member_ids, dimensions=dimensions, info={ "depth": clf.get_depth(), "rules": rules }, algorithm=algorithm, membership=getStats(member_ids, selected_ids.tolist()), description=f"Range-{algorithm}", ) if current_depth > 1 and max_depth is None: new_pred = range_intent(dataset, dimensions, selection, current_depth - 1) pred_list = [] pred_list.append(pred) pred_list.extend(new_pred) return pred_list return [pred]
def predict(self, selection: List[int], dataset): output = self.processOutput() sels = np.array(selection) ids = dataset["id"] preds: List[Prediction] = [ Prediction( rank=rank_jaccard(vals.values, sels), intent=str(col), memberIds=self.getMemberIds(vals.values, ids), dimensions=self.getDimensionArr(), info=self.getInfo(), algorithm=self.algorithm, membership=getStats( self.getMemberIds(vals.values, ids), ids[sels.astype(bool)].tolist(), ), description=self.description, ) for col, vals in output.iteritems() ] return preds
def predict(self, selection: List[int], dataset): output = self.processOutput() sels = np.array(selection) ids = dataset["id"] preds: List[Prediction] = [ Prediction( rank=rank_jaccard(output, sels), intent=self.intentType, memberIds=self.getMemberIds(output, ids), dimensions=self.getDimensionArr(), info=self.getSkylineInfo(dataset, self.getMemberIds(output, ids)), algorithm=self.algorithm, membership=getStats( self.getMemberIds(output, ids), ids[sels.astype(bool)].tolist(), ), description=self.description, ) ] return preds
def applyPrediction( prediction: Prediction, selections: List[str], target: pd.DataFrame, target_id: str, ) -> Prediction: if prediction.original_id is not None and prediction.original_id == target_id: return prediction algorithm = Algorithms(prediction.algorithm) intent = Intents(prediction.intent) dimensions = prediction.dimensions info = prediction.info sels = target.id.isin(selections) ids = np.array([]) new_info = deepcopy(info) if algorithm == Algorithms.KMEANS: ids, centers, hull, closest_center = applyKMeans( target, dimensions, info["params"]["n_clusters"], info["selected_center"], np.array(info["centers"]), ) new_info["centers"] = centers.tolist() new_info["hull"] = hull new_info["selected_center"] = closest_center elif algorithm == Algorithms.DBSCAN: eps = info["params"]["eps"] min_samples = info["params"]["min_samples"] if intent == Intents.CLUSTER: ids, hull = applyDBScanCluster(target, prediction.dimensions, eps, min_samples, prediction.memberIds) new_info["hull"] = hull elif intent == Intents.OUTLIER or intent == Intents.NONOUTLIER: ids = applyDBScanOutlier( target, prediction.dimensions, eps, min_samples, intent != Intents.NONOUTLIER, ) elif algorithm == Algorithms.DECISIONTREE: ids = apply_range(target, info["rules"]) elif algorithm == Algorithms.BNL: ids, new_info = applySkyline(target, prediction.dimensions, info["sense"]) ids = ids.astype(bool) new_info["frontier"] = target[ids][ prediction.dimensions].values.tolist() ids = target[ids].id elif algorithm == Algorithms.LR: return apply_linear_regression(target, prediction, sels) intents = target.id.isin(ids) return Prediction( rank=rank_jaccard(intents, sels), intent=intent.value, memberIds=ids.tolist() if type(ids) is not list else ids, dimensions=prediction.dimensions, info=new_info, algorithm=algorithm.value, membership=getStats(ids, selections), description=prediction.description, )