def transform(self, **Xy): if not self.slices: raise ValueError("Slicing hasn't been initialized. " "Slicers constructors such as CV or Perm should be called " "with a sample. Ex.: CV(..., y=y), Perm(..., y=y)") data_keys = self.apply_on if self.apply_on else Xy.keys() # filter out non-array or array with wrong dimension for k in data_keys: if not hasattr(Xy[k], "shape") or \ Xy[k].shape[0] != self.n: data_keys.remove(k) for data_key in data_keys: # slice input data dat = Xy.pop(data_key) if isinstance(self.slices, dict): Xy[conf.KW_SPLIT_TRAIN_TEST] = True for sample_set in self.slices: if len(dat.shape) == 2: Xy[key_push(data_key, sample_set)] = dat[self.slices[sample_set], :] else: Xy[key_push(data_key, sample_set)] = dat[self.slices[sample_set]] else: if len(dat.shape) == 2: Xy[data_key] = dat[self.slices, :] else: Xy[data_key] = dat[self.slices] return Xy
def reduce(self, result): if self.select_regexp: inputs = [key3 for key3 in result if re.search(self.select_regexp, str(key3))] else: inputs = result.keys() if len(inputs) != 2: raise KeyError("Need to find exactly two results to compute a score." " Found %i: %s" % (len(inputs), inputs)) key_true = [k for k in inputs if k.find(conf.TRUE) != -1][0] key_pred = [k for k in inputs if k.find(conf.PREDICTION) != -1][0] y_true = result[key_true] y_pred = result[key_pred] try: # If list of arrays (CV, LOO, etc.) concatenate them y_true = np.concatenate(y_true) y_pred = np.concatenate(y_pred) except ValueError: pass out = Result(key=result["key"]) p, r, f1, s = precision_recall_fscore_support(y_true, y_pred, average=None) key, _ = key_pop(key_pred, -1) out[key_push(key, conf.SCORE_PRECISION)] = p out[key_push(key, conf.SCORE_RECALL)] = r out[key_push(key, conf.SCORE_RECALL_MEAN)] = r.mean() out[key_push(key, conf.SCORE_F1)] = f1 out[key_push(key, conf.SCORE_ACCURACY)] = accuracy_score(y_true, y_pred) if self.keep: out.update(result) return out
def transform(self, **Xy): if not self.slices: raise ValueError("Slicing hasn't been initialized. ") data_keys = self.apply_on if self.apply_on else Xy.keys() for slice_key in self.slices.keys(): if slice_key in data_keys: data_key = slice_key dat = Xy.pop(data_key) if len(dat.shape) == 2: if self.col_or_row: Xy[data_key] = dat[:, self.slices[data_key]] else: Xy[data_key] = dat[self.slices[data_key], :] else: Xy[data_key] = dat[self.slices[data_key]] # only for cross-validation if conf.TRAIN in self.slices.keys() \ and conf.TEST in self.slices.keys(): Xy[conf.KW_SPLIT_TRAIN_TEST] = True for data_key in list(data_keys): dat = Xy.pop(data_key) for sample_set in self.slices: if len(dat.shape) == 2: if self.col_or_row: Xy[key_push(data_key, sample_set)] = \ dat[:, self.slices[sample_set]] else: Xy[key_push(data_key, sample_set)] = \ dat[self.slices[sample_set], :] else: Xy[key_push(data_key, sample_set)] = \ dat[self.slices[sample_set]] return Xy
def transform(self, **Xy): if not self.slices: raise ValueError( "Slicing hasn't been initialized. " "Slicers constructors such as CV or Perm should be called " "with a sample. Ex.: CV(..., y=y), Perm(..., y=y)") data_keys = self.apply_on if self.apply_on else Xy.keys() # filter out non-array or array with wrong dimension for k in data_keys: if not hasattr(Xy[k], "shape") or \ Xy[k].shape[0] != self.n: data_keys.remove(k) for data_key in data_keys: # slice input data dat = Xy.pop(data_key) if isinstance(self.slices, dict): Xy[conf.KW_SPLIT_TRAIN_TEST] = True for sample_set in self.slices: if len(dat.shape) == 2: Xy[key_push( data_key, sample_set)] = dat[self.slices[sample_set], :] else: Xy[key_push(data_key, sample_set)] = dat[self.slices[sample_set]] else: if len(dat.shape) == 2: Xy[data_key] = dat[self.slices] else: Xy[key_push(data_key, sample_set)] = dat[self.slices[sample_set]] return Xy
def transform(self, **Xy): if not self.slices: raise ValueError("Slicing hasn't been initialized. ") data_keys = self.apply_on if self.apply_on else Xy.keys() for slice_key in self.slices.keys(): if slice_key in data_keys: data_key = slice_key dat = Xy.pop(data_key) if len(dat.shape) == 2: if self.col_or_row: Xy[data_key] = dat[:, self.slices[data_key]] else: Xy[data_key] = dat[self.slices[data_key], :] else: Xy[data_key] = dat[self.slices[data_key]] # only for cross-validation if conf.TRAIN in self.slices.keys() \ and conf.TEST in self.slices.keys(): Xy[conf.KW_SPLIT_TRAIN_TEST] = True for data_key in data_keys: dat = Xy.pop(data_key) for sample_set in self.slices: if len(dat.shape) == 2: if self.col_or_row: Xy[key_push(data_key, sample_set)] = \ dat[:, self.slices[sample_set]] else: Xy[key_push(data_key, sample_set)] = \ dat[self.slices[sample_set], :] else: Xy[key_push(data_key, sample_set)] = \ dat[self.slices[sample_set]] return Xy
def reduce(self, store_results=True): # 1) Build sub-aggregates over children children_result_set = [child.reduce(store_results=False) for child in self.children] result_set = ResultSet(*children_result_set) # Append node signature in the keys for result in result_set: result["key"] = key_push(self.get_signature(), result["key"]) return result_set
def reduce(self, result): if self.select_regexp: inputs = [key3 for key3 in result if re.search(self.select_regexp, str(key3))] else: inputs = result.keys() if len(inputs) != 2: raise KeyError("Need to find exactly two results to compute a " "score. Found %i: %s" % (len(inputs), inputs)) key_true = [k for k in inputs if k.find(conf.TRUE) != -1][0] key_pred = [k for k in inputs if k.find(conf.PREDICTION) != -1][0] y_true = result[key_true] y_pred = result[key_pred] try: # If list of arrays (CV, LOO, etc.) concatenate them y_true = np.concatenate(y_true) y_pred = np.concatenate(y_pred) except ValueError: pass out = Result(key=result["key"]) p, r, f1, s = precision_recall_fscore_support(y_true, y_pred, average=None) # Compute p-value of recall for each class def recall_test(recall, n_trials, apriori_p): n_success = recall * n_trials pval = binom_test(n_success, n=n_trials, p=apriori_p) if recall > apriori_p: return (pval / 2) else: return 1 - (pval / 2) n_classes = len(s) # Number of classes n_obs = len(y_true) prior_p = s.astype(np.float)/s.sum() # A priori probability of each class r_pvalues = np.zeros_like(r) for class_index in range(n_classes): n_trials = s[class_index] #print "Class {class_index}: {n_success} success on {n_trials} trials".format(n_success=n_success, n_trials=n_trials, class_index=class_index) r_pvalues[class_index] = recall_test(r[class_index], n_trials, prior_p[class_index]) # Compute p-value of mean recall mean_r = r.mean() mean_r_pvalue = binom_test(int(mean_r * n_obs), n=n_obs, p=.5) key, _ = key_pop(key_pred, -1) out[key_push(key, conf.SCORE_PRECISION)] = p out[key_push(key, conf.SCORE_RECALL)] = r out[key_push(key, conf.SCORE_RECALL_PVALUES)] = r_pvalues out[key_push(key, conf.SCORE_RECALL_MEAN)] = mean_r out[key_push(key, conf.SCORE_RECALL_MEAN_PVALUE)] = mean_r_pvalue out[key_push(key, conf.SCORE_F1)] = f1 out[key_push(key, conf.SCORE_ACCURACY)] = accuracy_score(y_true, y_pred) if self.keep: out.update(result) return out
def train_test_merge(Xy_train, Xy_test): """Merge two dict avoiding keys collision. Parameters ---------- Xy_train: dict Xy_test: dict Returns ------- dict : merged dictionary Example ------- >>> train_test_merge(dict(a=1, b=2), dict(a=33, b=44, c=55)) {'a/test': 33, 'a/train': 1, 'b/test': 44, 'b/train': 2, 'c/test': 55} """ Xy_train = {key_push(k, conf.TRAIN): Xy_train[k] for k in Xy_train} Xy_test = {key_push(k, conf.TEST) : Xy_test[k] for k in Xy_test} Xy_train.update(Xy_test) return Xy_train
def train_test_merge(Xy_train, Xy_test): """Merge two dict avoiding keys collision. Parameters ---------- Xy_train: dict Xy_test: dict Returns ------- dict : merged dictionary Example ------- >>> train_test_merge(dict(a=1, b=2), dict(a=33, b=44, c=55)) == {'a/test': 33, 'a/train': 1, 'b/test': 44, 'b/train': 2, 'c/test': 55} True """ Xy_train = {key_push(k, conf.TRAIN): Xy_train[k] for k in Xy_train} Xy_test = {key_push(k, conf.TEST): Xy_test[k] for k in Xy_test} Xy_train.update(Xy_test) return Xy_train
def reduce(self, result): if self.select_regexp: select_keys = [key for key in result if re.search(self.select_regexp, str(key))] #if re.search(self.select_regexp) != -1] else: select_keys = result.keys() out = Result(key=result.key()) for key in select_keys: out[key] = result[key][0] randm_res = np.vstack(result[key][1:]) count = np.sum(randm_res > result[key][0], axis=0).astype("float") pval = count / (randm_res.shape[0]) out[key_push(key, "pval")] = pval if self.keep: out.update(result) return out
def reduce(self, store_results=True): results = ResultSet(self.children[0].reduce(store_results=False)) for result in results: result["key"] = key_push(self.get_signature(), result["key"]) return results