Exemple #1
0
def reducer(key, values):
    output_collectors = values
    global N_COMP
    import mapreduce as GLOBAL
    N_FOLDS = GLOBAL.N_FOLDS
    components = np.zeros((67665, N_COMP, 5))
    frobenius_train = np.zeros((N_COMP, 5))
    frobenius_test = np.zeros((1, 5))
    evr_train = np.zeros((N_COMP, 5))
    evr_test = np.zeros((N_COMP, 5))
    l0 = np.zeros((N_COMP, 5))
    l1 = np.zeros((N_COMP, 5))
    l2 = np.zeros((N_COMP, 5))
    tv = np.zeros((N_COMP, 5))
    # N_FOLDS is the number of true folds (not the number of resamplings)
    # key : string of intermediary key
    # load return dict corresponding to mapper ouput. they need to be loaded.]
    # Avoid taking into account the fold 0
    for item in output_collectors:
        if item != N_FOLDS:
            values = output_collectors[item + 1].load()
            components[:, :, item] = values["components"]
            frobenius_train[:, item] = values["frobenius_train"]
            frobenius_test[:, item] = values["frobenius_test"]
#            l0[:,item] = values["l0"]
#            l1[:,item] = values["l1"]
#            l2[:,item] = values["l2"]
#            tv[:,item] = values["tv"]
#            evr_train[:,item] = values["evr_train"]
#            evr_test[:,item] = values["evr_test"]
#            times[:,item] = values["time"]

#Solve non-identifiability problem  (baseline = first fold)
    for i in range(1, 5):
        if np.abs(np.corrcoef(
                components[:, 0, 0], components[:, 0, i])[0, 1]) < np.abs(
                    np.corrcoef(components[:, 0, 0], components[:, 1, i])[0,
                                                                          1]):
            print("components inverted")
            print(i)
            temp_comp1 = np.copy(components[:, 1, i])
            components[:, 1, i] = components[:, 0, i]
            components[:, 0, i] = temp_comp1

        if np.abs(np.corrcoef(
                components[:, 1, 0], components[:, 1, i])[0, 1]) < np.abs(
                    np.corrcoef(components[:, 1, 0], components[:, 2, i])[0,
                                                                          1]):
            print("components inverted")
            print(i)
            temp_comp2 = np.copy(components[:, 2, i])
            components[:, 2, i] = components[:, 1, i]
            components[:, 1, i] = temp_comp2

    # Thesholded components (list of tuples (comp, threshold))
    thresh_components = np.empty(components.shape)
    thresholds = np.empty((N_COMP, N_FOLDS))
    for l in range(N_FOLDS):
        for k in range(N_COMP):
            thresh_comp, t = array_utils.arr_threshold_from_norm2_ratio(
                components[:, k, l], .99)
            thresh_components[:, k, l] = thresh_comp
            thresholds[k, l] = t
    # Average precision/recall across folds for each component
    av_frobenius_train = frobenius_train.mean(axis=1)
    av_frobenius_test = frobenius_test.mean(axis=1)

    # Align sign of loading vectors to the first fold for each component
    aligned_thresh_comp = np.copy(thresh_components)
    REF_FOLD_NUMBER = 0
    for k in range(N_COMP):
        for i in range(N_FOLDS):
            ref = thresh_components[:, k, REF_FOLD_NUMBER].T
            if i != REF_FOLD_NUMBER:
                r = np.corrcoef(thresh_components[:, k, i].T, ref)
                if r[0, 1] < 0:
                    #print "Reverting comp {k} of fold {i} for model {key}".format(i=i+1, k=k, key=key)
                    aligned_thresh_comp[:, k, i] *= -1
    # Compute fleiss_kappa and DICE on thresholded components
    fleiss_kappas = np.empty(N_COMP)
    dice_bars = np.empty(N_COMP)
    dices = np.zeros((10, N_COMP))

    for k in range(N_COMP):
        # One component accross folds
        thresh_comp = aligned_thresh_comp[:, k, :]
        fleiss_kappas[k] = metrics.fleiss_kappa(thresh_comp)
        dice_bars[k], dices[:, k] = metrics.dice_bar(thresh_comp)


#    print dices.mean(axis=1)
#    dices_mean_path = os.path.join(OUTPUT_DIR,'fmri_5folds/results','dices_mean_%s.npy' %key[0])
#    if key[0] == 'struct_pca' and key[2]==1e-6:
#        dices_mean_path = os.path.join(OUTPUT_DIR,'fmri_5folds/results','dices_mean_%s.npy' %'enet_pca')

#    print dices_mean_path
#    np.save(dices_mean_path,dices.mean(axis=1) )

    print(key)
    scores = OrderedDict(
        (('model', key[0]), ('global_pen', key[1]), ('tv_ratio', key[2]),
         ('l1_ratio', key[3]), ('frobenius_train', av_frobenius_train[0]),
         ('frobenius_test', av_frobenius_test[0]), ('kappa_0',
                                                    fleiss_kappas[0]),
         ('kappa_1', fleiss_kappas[1]), ('kappa_2', fleiss_kappas[2]),
         ('kappa_mean', np.mean(fleiss_kappas)), ('dice_bar_0', dice_bars[0]),
         ('dice_bar_1', dice_bars[1]), ('dice_bar_2', dice_bars[2]),
         ('dices_mean', dice_bars.mean()), ('evr_test_0',
                                            evr_test.mean(axis=1)[0]),
         ('evr_test_1', evr_test.mean(axis=1)[1]), ('evr_test_2',
                                                    evr_test.mean(axis=1)[2]),
         ('evr_test_sum', evr_test.mean(axis=1)[0] + evr_test.mean(axis=1)[1] +
          evr_test.mean(axis=1)[2]), ('frob_test_fold1', frobenius_test[0][0]),
         ('frob_test_fold2', frobenius_test[0][1]), ('frob_test_fold3',
                                                     frobenius_test[0][2]),
         ('frob_test_fold4', frobenius_test[0][3]), ('frob_test_fold5',
                                                     frobenius_test[0][4])))

    return scores
Exemple #2
0
def reducer(key, values):
    output_collectors = values
    global N_COMP
    import mapreduce as GLOBAL
    N_FOLDS = GLOBAL.N_FOLDS
    # N_FOLDS is the number of true folds (not the number of resamplings)
    # key : string of intermediary key
    # load return dict corresponding to mapper ouput. they need to be loaded.]
    # Avoid taking into account the fold 0
    if GLOBAL.FULL_RESAMPLE:
        values = [item.load() for item in values[1:]]
    else:
        values = [item.load() for item in values]

    # Load components: each file is n_voxelsxN_COMP matrix.
    # We stack them on the third dimension (folds)
    components = np.dstack([item["components"] for item in values])
    # Thesholded components (list of tuples (comp, threshold))
    thresh_components = np.empty(components.shape)
    thresholds = np.empty((N_COMP, N_FOLDS))
    for l in range(N_FOLDS):
        for k in range(N_COMP):
            thresh_comp, t = array_utils.arr_threshold_from_norm2_ratio(
                components[:, k, l], .99)
            thresh_components[:, k, l] = thresh_comp
            thresholds[k, l] = t
    # Save thresholded comp
    for l, oc in zip(range(N_FOLDS), output_collectors[1:]):
        filename = os.path.join(oc.output_dir, "thresh_comp.npz")
        np.savez(filename, thresh_components[:, :, l])
    frobenius_train = np.vstack([item["frobenius_train"] for item in values])
    frobenius_test = np.vstack([item["frobenius_test"] for item in values])
    l0 = np.vstack([item["l0"] for item in values])
    l1 = np.vstack([item["l1"] for item in values])
    l2 = np.vstack([item["l2"] for item in values])
    tv = np.vstack([item["tv"] for item in values])
    evr_train = np.vstack([item["evr_train"] for item in values])
    evr_test = np.vstack([item["evr_test"] for item in values])
    times = [item["time"] for item in values]

    # Average precision/recall across folds for each component
    av_frobenius_train = frobenius_train.mean(axis=0)
    av_frobenius_test = frobenius_test.mean(axis=0)
    av_evr_train = evr_train.mean(axis=0)
    av_evr_test = evr_test.mean(axis=0)
    av_l0 = l0.mean(axis=0)
    av_l1 = l1.mean(axis=0)
    av_l2 = l2.mean(axis=0)
    av_tv = tv.mean(axis=0)

    # Compute correlations of components between all folds
    n_corr = N_FOLDS * (N_FOLDS - 1) / 2
    correlations = np.zeros((N_COMP, n_corr))
    for k in range(N_COMP):
        R = np.corrcoef(np.abs(components[:, k, :].T))
        # Extract interesting coefficients (upper-triangle)
        correlations[k] = R[np.triu_indices_from(R, 1)]

    # Transform to z-score
    Z = 1. / 2. * np.log((1 + correlations) / (1 - correlations))
    # Average for each component
    z_bar = np.mean(Z, axis=1)
    # Transform back to average correlation for each component
    r_bar = (np.exp(2 * z_bar) - 1) / (np.exp(2 * z_bar) + 1)

    # Align sign of loading vectors to the first fold for each component
    aligned_thresh_comp = np.copy(thresh_components)
    REF_FOLD_NUMBER = 0
    for k in range(N_COMP):
        for i in range(N_FOLDS):
            ref = thresh_components[:, k, REF_FOLD_NUMBER].T
            if i != REF_FOLD_NUMBER:
                r = np.corrcoef(thresh_components[:, k, i].T, ref)
                if r[0, 1] < 0:
                    #print "Reverting comp {k} of fold {i} for model {key}".format(i=i+1, k=k, key=key)
                    aligned_thresh_comp[:, k, i] *= -1
    # Save aligned comp
    for l, oc in zip(range(N_FOLDS), output_collectors[1:]):
        filename = os.path.join(oc.output_dir, "aligned_thresh_comp.npz")
        np.savez(filename, aligned_thresh_comp[:, :, l])

    # Compute fleiss_kappa and DICE on thresholded components
    fleiss_kappas = np.empty(N_COMP)
    dice_bars = np.empty(N_COMP)
    for k in range(N_COMP):
        # One component accross folds
        thresh_comp = aligned_thresh_comp[:, k, :]
        fleiss_kappas[k] = metrics.fleiss_kappa(thresh_comp)
        dice_bars[k] = metrics.dice_bar(thresh_comp)

    scores = OrderedDict(
        (('model', key[0]), ('global_pen', key[1]), ('tv_ratio', key[2]),
         ('l1_ratio', key[3]), ('frobenius_train', av_frobenius_train[0]),
         ('frobenius_test', av_frobenius_test[0]), ('correlation_0', r_bar[0]),
         ('correlation_1', r_bar[1]), ('correlation_2', r_bar[2]),
         ('correlation_mean', np.mean(r_bar)), ('kappa_0', fleiss_kappas[0]),
         ('kappa_1', fleiss_kappas[1]), ('kappa_2', fleiss_kappas[2]),
         ('kappa_mean', np.mean(fleiss_kappas)), ('dice_bar_0', dice_bars[0]),
         ('dice_bar_1', dice_bars[1]), ('dice_bar_2',
                                        dice_bars[2]), ('dice_bar_mean',
                                                        np.mean(dice_bars)),
         ('evr_train_0', av_evr_train[0]), ('evr_train_1', av_evr_train[1]),
         ('evr_train_2', av_evr_train[2]), ('evr_test_0', av_evr_test[0]),
         ('evr_test_1', av_evr_test[1]), ('evr_test_2',
                                          av_evr_test[2]), ('l0_0', av_l0[0]),
         ('l0_1', av_l0[1]), ('l0_2', av_l0[2]), ('l1_0', av_l1[0]),
         ('l1_1', av_l1[1]), ('l1_2', av_l1[2]), ('l2_0', av_l2[0]),
         ('l2_1', av_l2[1]), ('l2_2', av_l2[2]), ('tv_0', av_tv[0]),
         ('tv_1', av_tv[1]), ('tv_2', av_tv[2]), ('time', np.mean(times))))

    return scores