Beispiel #1
0
 def transform(self, **Xy):
     if not self.slices:
         raise ValueError("Slicing hasn't been initialized. "
         "Slicers constructors such as CV or Perm should be called "
         "with a sample. Ex.: CV(..., y=y), Perm(..., y=y)")
     data_keys = self.apply_on if self.apply_on else Xy.keys()
     # filter out non-array or array with wrong dimension
     for k in data_keys:
         if not hasattr(Xy[k], "shape") or \
             Xy[k].shape[0] != self.n:
             data_keys.remove(k)
     for data_key in data_keys:  # slice input data
         dat = Xy.pop(data_key)
         if isinstance(self.slices, dict):
             Xy[conf.KW_SPLIT_TRAIN_TEST] = True
             for sample_set in self.slices:
                 if len(dat.shape) == 2:                  
                     Xy[key_push(data_key, sample_set)] = dat[self.slices[sample_set], :]
                 else:
                     Xy[key_push(data_key, sample_set)] = dat[self.slices[sample_set]]
         else:
             if len(dat.shape) == 2:
                 Xy[data_key] = dat[self.slices, :]
             else:
                 Xy[data_key] = dat[self.slices]
     return Xy
Beispiel #2
0
 def reduce(self, result):
     if self.select_regexp:
         inputs = [key3 for key3 in result
             if re.search(self.select_regexp, str(key3))]
     else:
         inputs = result.keys()
     if len(inputs) != 2:
         raise KeyError("Need to find exactly two results to compute a score."
         " Found %i: %s" % (len(inputs), inputs))
     key_true = [k for k in inputs if k.find(conf.TRUE) != -1][0]
     key_pred = [k for k in inputs if k.find(conf.PREDICTION) != -1][0]
     y_true = result[key_true]
     y_pred = result[key_pred]
     try:  # If list of arrays (CV, LOO, etc.) concatenate them
         y_true = np.concatenate(y_true)
         y_pred = np.concatenate(y_pred)
     except ValueError:
         pass
     out = Result(key=result["key"])
     p, r, f1, s = precision_recall_fscore_support(y_true, y_pred, average=None)
     key, _ = key_pop(key_pred, -1)
     out[key_push(key, conf.SCORE_PRECISION)] = p
     out[key_push(key, conf.SCORE_RECALL)] = r
     out[key_push(key, conf.SCORE_RECALL_MEAN)] = r.mean()
     out[key_push(key, conf.SCORE_F1)] = f1
     out[key_push(key, conf.SCORE_ACCURACY)] = accuracy_score(y_true, y_pred)
     if self.keep:
         out.update(result)
     return out
Beispiel #3
0
 def transform(self, **Xy):
     if not self.slices:
         raise ValueError("Slicing hasn't been initialized. ")
     data_keys = self.apply_on if self.apply_on else Xy.keys()
     for slice_key in self.slices.keys():
         if slice_key in data_keys:
             data_key = slice_key
             dat = Xy.pop(data_key)
             if len(dat.shape) == 2:
                 if self.col_or_row:
                     Xy[data_key] = dat[:, self.slices[data_key]]
                 else:
                     Xy[data_key] = dat[self.slices[data_key], :]
             else:
                 Xy[data_key] = dat[self.slices[data_key]]
     # only for cross-validation
     if conf.TRAIN in self.slices.keys() \
             and conf.TEST in self.slices.keys():
         Xy[conf.KW_SPLIT_TRAIN_TEST] = True
         for data_key in list(data_keys):
             dat = Xy.pop(data_key)
             for sample_set in self.slices:
                 if len(dat.shape) == 2:
                     if self.col_or_row:
                         Xy[key_push(data_key, sample_set)] = \
                             dat[:, self.slices[sample_set]]
                     else:
                         Xy[key_push(data_key, sample_set)] = \
                             dat[self.slices[sample_set], :]
                 else:
                     Xy[key_push(data_key, sample_set)] = \
                         dat[self.slices[sample_set]]
     return Xy
Beispiel #4
0
 def transform(self, **Xy):
     if not self.slices:
         raise ValueError(
             "Slicing hasn't been initialized. "
             "Slicers constructors such as CV or Perm should be called "
             "with a sample. Ex.: CV(..., y=y), Perm(..., y=y)")
     data_keys = self.apply_on if self.apply_on else Xy.keys()
     # filter out non-array or array with wrong dimension
     for k in data_keys:
         if not hasattr(Xy[k], "shape") or \
             Xy[k].shape[0] != self.n:
             data_keys.remove(k)
     for data_key in data_keys:  # slice input data
         dat = Xy.pop(data_key)
         if isinstance(self.slices, dict):
             Xy[conf.KW_SPLIT_TRAIN_TEST] = True
             for sample_set in self.slices:
                 if len(dat.shape) == 2:
                     Xy[key_push(
                         data_key,
                         sample_set)] = dat[self.slices[sample_set], :]
                 else:
                     Xy[key_push(data_key,
                                 sample_set)] = dat[self.slices[sample_set]]
         else:
             if len(dat.shape) == 2:
                 Xy[data_key] = dat[self.slices]
             else:
                 Xy[key_push(data_key,
                             sample_set)] = dat[self.slices[sample_set]]
     return Xy
Beispiel #5
0
 def transform(self, **Xy):
     if not self.slices:
         raise ValueError("Slicing hasn't been initialized. ")
     data_keys = self.apply_on if self.apply_on else Xy.keys()
     for slice_key in self.slices.keys():
         if slice_key in data_keys:
             data_key = slice_key
             dat = Xy.pop(data_key)
             if len(dat.shape) == 2:
                 if self.col_or_row:
                     Xy[data_key] = dat[:, self.slices[data_key]]
                 else:
                     Xy[data_key] = dat[self.slices[data_key], :]
             else:
                 Xy[data_key] = dat[self.slices[data_key]]
     # only for cross-validation
     if conf.TRAIN in self.slices.keys() \
             and conf.TEST in self.slices.keys():
         Xy[conf.KW_SPLIT_TRAIN_TEST] = True
         for data_key in data_keys:
             dat = Xy.pop(data_key)
             for sample_set in self.slices:
                 if len(dat.shape) == 2:
                     if self.col_or_row:
                         Xy[key_push(data_key, sample_set)] = \
                             dat[:, self.slices[sample_set]]
                     else:
                         Xy[key_push(data_key, sample_set)] = \
                             dat[self.slices[sample_set], :]
                 else:
                     Xy[key_push(data_key, sample_set)] = \
                         dat[self.slices[sample_set]]
     return Xy
Beispiel #6
0
 def reduce(self, store_results=True):
     # 1) Build sub-aggregates over children
     children_result_set = [child.reduce(store_results=False) for
         child in self.children]
     result_set = ResultSet(*children_result_set)
     # Append node signature in the keys
     for result in result_set:
         result["key"] = key_push(self.get_signature(), result["key"])
     return result_set
Beispiel #7
0
    def reduce(self, result):
        if self.select_regexp:
            inputs = [key3 for key3 in result
                      if re.search(self.select_regexp, str(key3))]
        else:
            inputs = result.keys()
        if len(inputs) != 2:
            raise KeyError("Need to find exactly two results to compute a "
                           "score. Found %i: %s" % (len(inputs), inputs))
        key_true = [k for k in inputs if k.find(conf.TRUE) != -1][0]
        key_pred = [k for k in inputs if k.find(conf.PREDICTION) != -1][0]
        y_true = result[key_true]
        y_pred = result[key_pred]
        try:  # If list of arrays (CV, LOO, etc.) concatenate them
            y_true = np.concatenate(y_true)
            y_pred = np.concatenate(y_pred)
        except ValueError:
            pass
        out = Result(key=result["key"])
        p, r, f1, s = precision_recall_fscore_support(y_true,
                                                      y_pred,
                                                      average=None)

        # Compute p-value of recall for each class
        def recall_test(recall, n_trials, apriori_p):
            n_success = recall * n_trials
            pval = binom_test(n_success, n=n_trials, p=apriori_p)
            if recall > apriori_p:
                return (pval / 2)
            else:
                return 1 - (pval / 2)

        n_classes = len(s)  # Number of classes
        n_obs = len(y_true)
        prior_p = s.astype(np.float)/s.sum()  # A priori probability of each class
        r_pvalues = np.zeros_like(r)
        for class_index in range(n_classes):
            n_trials = s[class_index]
            #print "Class {class_index}: {n_success} success on {n_trials} trials".format(n_success=n_success, n_trials=n_trials, class_index=class_index)
            r_pvalues[class_index] = recall_test(r[class_index],
                                                 n_trials,
                                                 prior_p[class_index])

        # Compute p-value of mean recall
        mean_r = r.mean()
        mean_r_pvalue = binom_test(int(mean_r * n_obs), n=n_obs, p=.5)

        key, _ = key_pop(key_pred, -1)
        out[key_push(key, conf.SCORE_PRECISION)] = p
        out[key_push(key, conf.SCORE_RECALL)] = r
        out[key_push(key, conf.SCORE_RECALL_PVALUES)] = r_pvalues
        out[key_push(key, conf.SCORE_RECALL_MEAN)] = mean_r
        out[key_push(key, conf.SCORE_RECALL_MEAN_PVALUE)] = mean_r_pvalue
        out[key_push(key, conf.SCORE_F1)] = f1
        out[key_push(key, conf.SCORE_ACCURACY)] = accuracy_score(y_true,
                                                                 y_pred)
        if self.keep:
            out.update(result)
        return out
Beispiel #8
0
def train_test_merge(Xy_train, Xy_test):
    """Merge two dict avoiding keys collision.

    Parameters
    ----------
    Xy_train: dict
    Xy_test: dict

    Returns
    -------
    dict : merged dictionary

    Example
    -------
    >>> train_test_merge(dict(a=1, b=2), dict(a=33, b=44, c=55))
    {'a/test': 33, 'a/train': 1, 'b/test': 44, 'b/train': 2, 'c/test': 55}
    """
    Xy_train = {key_push(k, conf.TRAIN): Xy_train[k] for k in Xy_train}
    Xy_test = {key_push(k, conf.TEST) : Xy_test[k] for k in Xy_test}
    Xy_train.update(Xy_test)
    return Xy_train
Beispiel #9
0
def train_test_merge(Xy_train, Xy_test):
    """Merge two dict avoiding keys collision.

    Parameters
    ----------
    Xy_train: dict
    Xy_test: dict

    Returns
    -------
    dict : merged dictionary

    Example
    -------
    >>> train_test_merge(dict(a=1, b=2), dict(a=33, b=44, c=55)) == {'a/test': 33, 'a/train': 1, 'b/test': 44, 'b/train': 2, 'c/test': 55}
    True
    """
    Xy_train = {key_push(k, conf.TRAIN): Xy_train[k] for k in Xy_train}
    Xy_test = {key_push(k, conf.TEST): Xy_test[k] for k in Xy_test}
    Xy_train.update(Xy_test)
    return Xy_train
Beispiel #10
0
 def reduce(self, result):
     if self.select_regexp:
         select_keys = [key for key in result
             if re.search(self.select_regexp, str(key))]
             #if re.search(self.select_regexp) != -1]
     else:
         select_keys = result.keys()
     out = Result(key=result.key())
     for key in select_keys:
         out[key] = result[key][0]
         randm_res = np.vstack(result[key][1:])
         count = np.sum(randm_res > result[key][0], axis=0).astype("float")
         pval = count / (randm_res.shape[0])
         out[key_push(key, "pval")] = pval
     if self.keep:
         out.update(result)
     return out
Beispiel #11
0
 def reduce(self, store_results=True):
     results = ResultSet(self.children[0].reduce(store_results=False))
     for result in results:
         result["key"] = key_push(self.get_signature(), result["key"])
     return results
Beispiel #12
0
 def reduce(self, store_results=True):
     results = ResultSet(self.children[0].reduce(store_results=False))
     for result in results:
         result["key"] = key_push(self.get_signature(), result["key"])
     return results