Exemple #1
0
 def get_tree_loc_auc(self, data, i):
     if "loc" not in data:
         return 8
     # self.predict will add/modify the 'prediction' column to the data
     self.predict(data, i)
     sorted_data = data.sort_values(by=["prediction", "loc"],
                                    ascending=[False, True])
     return get_auc(sorted_data)
Exemple #2
0
 def eval_range_split(self, level, cur_selected, cur_performance, data, cue,
                      indexes, interval, decision):
     TP, FP, TN, FN = cur_performance
     pos = data.iloc[indexes]
     neg = data.iloc[~data.index.isin(indexes)]
     if decision == 1:
         decided = pos
         undecided = neg
     else:
         pos, neg = neg, pos
         decided = neg
         undecided = pos
     # get auc for loc.
     if "loc" in data:
         sorted_data = pd.concat([
             df.sort_values(by=["loc"], ascending=True)
             for df in [pos, neg]
         ])
         loc_auc = get_auc(sorted_data)
     else:
         loc_auc = 0
     tp = pos.loc[pos[self.target] == 1]
     fp = pos.loc[pos[self.target] == 0]
     tn = neg.loc[neg[self.target] == 0]
     fn = neg.loc[neg[self.target] == 1]
     metrics = map(len, [tp, fp, tn, fn])
     tp, fp, tn, fn = self.update_metrics(level, self.max_depth, decision,
                                          metrics)
     # if the decision lead to no data, punish the score
     if sum([tp, fp, tn, fn]) == 0:
         score = float('inf')
     elif self.criteria == "LOC_AUC":
         score = loc_auc
     else:
         score = get_score(self.criteria,
                           [TP + tp, FP + fp, TN + tn, FN + fn])
     if not cur_selected or score < cur_selected['score']:
         direction = "inside" if decision else "outside"
         cur_selected = {'rule': (cue, direction, interval, decision),\
                         'undecided': undecided,\
                         'metrics': [TP + tp, FP + fp, TN + tn, FN + fn],\
                         'score': score}
     '''
         if self.sorted_cues:
             self.sorted_cues = [cur_selected].extend(self.sorted_cues)
         else:
             self.sorted_cues = [cur_selected]
     
     else:
         direction = "inside" if decision else "outside"
         new_cue = {'rule': (cue, direction, interval, decision),
                    'undecided': undecided,
                    'metrics': [TP + tp, FP + fp, TN + tn, FN + fn],
                    'score': score}
         self.sorted_cues.append(new_cue)
     '''
     return cur_selected
Exemple #3
0
 def eval_decision(self, data, cue, direction, threshold, decision):
     try:
         if type(threshold) != type(np.ndarray(1)):
             if direction == ">":
                 decided, undecided = data.loc[
                     data[cue] > threshold], data.loc[
                         data[cue] <= threshold]
             else:
                 decided, undecided = data.loc[
                     data[cue] < threshold], data.loc[
                         data[cue] >= threshold]
         else:
             decided = data.loc[(data[cue] >= threshold[0])
                                & (data[cue] < threshold[1])]
             undecided = data.loc[(data[cue] < threshold[0]) |
                                  (data[cue] >= threshold[1])]
     except:
         print "Exception"
         return 1, 2, 3
     if decision == 1:
         pos, neg = decided, undecided
     else:
         pos, neg = undecided, decided
     # get auc for loc.
     if "loc" in data:
         sorted_data = pd.concat([
             df.sort_values(by=["loc"], ascending=True)
             for df in [pos, neg]
         ])
         loc_auc = get_auc(sorted_data)
     else:
         loc_auc = 0
     tp = pos.loc[pos[self.target] == 1]
     fp = pos.loc[pos[self.target] == 0]
     tn = neg.loc[neg[self.target] == 0]
     fn = neg.loc[neg[self.target] == 1]
     # pre, rec, spec, fpr, npv, acc, f1 = get_performance([tp, fp, tn, fn])
     # return undecided, [tp, fp, tn, fn, pre, rec, spec, fpr, npv, acc, f1]
     return undecided, map(len, [tp, fp, tn, fn]), loc_auc