def reducer(key, values): output_collectors = values global N_COMP import mapreduce as GLOBAL N_FOLDS = GLOBAL.N_FOLDS components = np.zeros((67665, N_COMP, 5)) frobenius_train = np.zeros((N_COMP, 5)) frobenius_test = np.zeros((1, 5)) evr_train = np.zeros((N_COMP, 5)) evr_test = np.zeros((N_COMP, 5)) l0 = np.zeros((N_COMP, 5)) l1 = np.zeros((N_COMP, 5)) l2 = np.zeros((N_COMP, 5)) tv = np.zeros((N_COMP, 5)) # N_FOLDS is the number of true folds (not the number of resamplings) # key : string of intermediary key # load return dict corresponding to mapper ouput. they need to be loaded.] # Avoid taking into account the fold 0 for item in output_collectors: if item != N_FOLDS: values = output_collectors[item + 1].load() components[:, :, item] = values["components"] frobenius_train[:, item] = values["frobenius_train"] frobenius_test[:, item] = values["frobenius_test"] # l0[:,item] = values["l0"] # l1[:,item] = values["l1"] # l2[:,item] = values["l2"] # tv[:,item] = values["tv"] # evr_train[:,item] = values["evr_train"] # evr_test[:,item] = values["evr_test"] # times[:,item] = values["time"] #Solve non-identifiability problem (baseline = first fold) for i in range(1, 5): if np.abs(np.corrcoef( components[:, 0, 0], components[:, 0, i])[0, 1]) < np.abs( np.corrcoef(components[:, 0, 0], components[:, 1, i])[0, 1]): print("components inverted") print(i) temp_comp1 = np.copy(components[:, 1, i]) components[:, 1, i] = components[:, 0, i] components[:, 0, i] = temp_comp1 if np.abs(np.corrcoef( components[:, 1, 0], components[:, 1, i])[0, 1]) < np.abs( np.corrcoef(components[:, 1, 0], components[:, 2, i])[0, 1]): print("components inverted") print(i) temp_comp2 = np.copy(components[:, 2, i]) components[:, 2, i] = components[:, 1, i] components[:, 1, i] = temp_comp2 # Thesholded components (list of tuples (comp, threshold)) thresh_components = np.empty(components.shape) thresholds = np.empty((N_COMP, N_FOLDS)) for l in range(N_FOLDS): for k in range(N_COMP): thresh_comp, t = array_utils.arr_threshold_from_norm2_ratio( components[:, k, l], .99) thresh_components[:, k, l] = thresh_comp thresholds[k, l] = t # Average precision/recall across folds for each component av_frobenius_train = frobenius_train.mean(axis=1) av_frobenius_test = frobenius_test.mean(axis=1) # Align sign of loading vectors to the first fold for each component aligned_thresh_comp = np.copy(thresh_components) REF_FOLD_NUMBER = 0 for k in range(N_COMP): for i in range(N_FOLDS): ref = thresh_components[:, k, REF_FOLD_NUMBER].T if i != REF_FOLD_NUMBER: r = np.corrcoef(thresh_components[:, k, i].T, ref) if r[0, 1] < 0: #print "Reverting comp {k} of fold {i} for model {key}".format(i=i+1, k=k, key=key) aligned_thresh_comp[:, k, i] *= -1 # Compute fleiss_kappa and DICE on thresholded components fleiss_kappas = np.empty(N_COMP) dice_bars = np.empty(N_COMP) dices = np.zeros((10, N_COMP)) for k in range(N_COMP): # One component accross folds thresh_comp = aligned_thresh_comp[:, k, :] fleiss_kappas[k] = metrics.fleiss_kappa(thresh_comp) dice_bars[k], dices[:, k] = metrics.dice_bar(thresh_comp) # print dices.mean(axis=1) # dices_mean_path = os.path.join(OUTPUT_DIR,'fmri_5folds/results','dices_mean_%s.npy' %key[0]) # if key[0] == 'struct_pca' and key[2]==1e-6: # dices_mean_path = os.path.join(OUTPUT_DIR,'fmri_5folds/results','dices_mean_%s.npy' %'enet_pca') # print dices_mean_path # np.save(dices_mean_path,dices.mean(axis=1) ) print(key) scores = OrderedDict( (('model', key[0]), ('global_pen', key[1]), ('tv_ratio', key[2]), ('l1_ratio', key[3]), ('frobenius_train', av_frobenius_train[0]), ('frobenius_test', av_frobenius_test[0]), ('kappa_0', fleiss_kappas[0]), ('kappa_1', fleiss_kappas[1]), ('kappa_2', fleiss_kappas[2]), ('kappa_mean', np.mean(fleiss_kappas)), ('dice_bar_0', dice_bars[0]), ('dice_bar_1', dice_bars[1]), ('dice_bar_2', dice_bars[2]), ('dices_mean', dice_bars.mean()), ('evr_test_0', evr_test.mean(axis=1)[0]), ('evr_test_1', evr_test.mean(axis=1)[1]), ('evr_test_2', evr_test.mean(axis=1)[2]), ('evr_test_sum', evr_test.mean(axis=1)[0] + evr_test.mean(axis=1)[1] + evr_test.mean(axis=1)[2]), ('frob_test_fold1', frobenius_test[0][0]), ('frob_test_fold2', frobenius_test[0][1]), ('frob_test_fold3', frobenius_test[0][2]), ('frob_test_fold4', frobenius_test[0][3]), ('frob_test_fold5', frobenius_test[0][4]))) return scores
def reducer(key, values): output_collectors = values global N_COMP import mapreduce as GLOBAL N_FOLDS = GLOBAL.N_FOLDS # N_FOLDS is the number of true folds (not the number of resamplings) # key : string of intermediary key # load return dict corresponding to mapper ouput. they need to be loaded.] # Avoid taking into account the fold 0 if GLOBAL.FULL_RESAMPLE: values = [item.load() for item in values[1:]] else: values = [item.load() for item in values] # Load components: each file is n_voxelsxN_COMP matrix. # We stack them on the third dimension (folds) components = np.dstack([item["components"] for item in values]) # Thesholded components (list of tuples (comp, threshold)) thresh_components = np.empty(components.shape) thresholds = np.empty((N_COMP, N_FOLDS)) for l in range(N_FOLDS): for k in range(N_COMP): thresh_comp, t = array_utils.arr_threshold_from_norm2_ratio( components[:, k, l], .99) thresh_components[:, k, l] = thresh_comp thresholds[k, l] = t # Save thresholded comp for l, oc in zip(range(N_FOLDS), output_collectors[1:]): filename = os.path.join(oc.output_dir, "thresh_comp.npz") np.savez(filename, thresh_components[:, :, l]) frobenius_train = np.vstack([item["frobenius_train"] for item in values]) frobenius_test = np.vstack([item["frobenius_test"] for item in values]) l0 = np.vstack([item["l0"] for item in values]) l1 = np.vstack([item["l1"] for item in values]) l2 = np.vstack([item["l2"] for item in values]) tv = np.vstack([item["tv"] for item in values]) evr_train = np.vstack([item["evr_train"] for item in values]) evr_test = np.vstack([item["evr_test"] for item in values]) times = [item["time"] for item in values] # Average precision/recall across folds for each component av_frobenius_train = frobenius_train.mean(axis=0) av_frobenius_test = frobenius_test.mean(axis=0) av_evr_train = evr_train.mean(axis=0) av_evr_test = evr_test.mean(axis=0) av_l0 = l0.mean(axis=0) av_l1 = l1.mean(axis=0) av_l2 = l2.mean(axis=0) av_tv = tv.mean(axis=0) # Compute correlations of components between all folds n_corr = N_FOLDS * (N_FOLDS - 1) / 2 correlations = np.zeros((N_COMP, n_corr)) for k in range(N_COMP): R = np.corrcoef(np.abs(components[:, k, :].T)) # Extract interesting coefficients (upper-triangle) correlations[k] = R[np.triu_indices_from(R, 1)] # Transform to z-score Z = 1. / 2. * np.log((1 + correlations) / (1 - correlations)) # Average for each component z_bar = np.mean(Z, axis=1) # Transform back to average correlation for each component r_bar = (np.exp(2 * z_bar) - 1) / (np.exp(2 * z_bar) + 1) # Align sign of loading vectors to the first fold for each component aligned_thresh_comp = np.copy(thresh_components) REF_FOLD_NUMBER = 0 for k in range(N_COMP): for i in range(N_FOLDS): ref = thresh_components[:, k, REF_FOLD_NUMBER].T if i != REF_FOLD_NUMBER: r = np.corrcoef(thresh_components[:, k, i].T, ref) if r[0, 1] < 0: #print "Reverting comp {k} of fold {i} for model {key}".format(i=i+1, k=k, key=key) aligned_thresh_comp[:, k, i] *= -1 # Save aligned comp for l, oc in zip(range(N_FOLDS), output_collectors[1:]): filename = os.path.join(oc.output_dir, "aligned_thresh_comp.npz") np.savez(filename, aligned_thresh_comp[:, :, l]) # Compute fleiss_kappa and DICE on thresholded components fleiss_kappas = np.empty(N_COMP) dice_bars = np.empty(N_COMP) for k in range(N_COMP): # One component accross folds thresh_comp = aligned_thresh_comp[:, k, :] fleiss_kappas[k] = metrics.fleiss_kappa(thresh_comp) dice_bars[k] = metrics.dice_bar(thresh_comp) scores = OrderedDict( (('model', key[0]), ('global_pen', key[1]), ('tv_ratio', key[2]), ('l1_ratio', key[3]), ('frobenius_train', av_frobenius_train[0]), ('frobenius_test', av_frobenius_test[0]), ('correlation_0', r_bar[0]), ('correlation_1', r_bar[1]), ('correlation_2', r_bar[2]), ('correlation_mean', np.mean(r_bar)), ('kappa_0', fleiss_kappas[0]), ('kappa_1', fleiss_kappas[1]), ('kappa_2', fleiss_kappas[2]), ('kappa_mean', np.mean(fleiss_kappas)), ('dice_bar_0', dice_bars[0]), ('dice_bar_1', dice_bars[1]), ('dice_bar_2', dice_bars[2]), ('dice_bar_mean', np.mean(dice_bars)), ('evr_train_0', av_evr_train[0]), ('evr_train_1', av_evr_train[1]), ('evr_train_2', av_evr_train[2]), ('evr_test_0', av_evr_test[0]), ('evr_test_1', av_evr_test[1]), ('evr_test_2', av_evr_test[2]), ('l0_0', av_l0[0]), ('l0_1', av_l0[1]), ('l0_2', av_l0[2]), ('l1_0', av_l1[0]), ('l1_1', av_l1[1]), ('l1_2', av_l1[2]), ('l2_0', av_l2[0]), ('l2_1', av_l2[1]), ('l2_2', av_l2[2]), ('tv_0', av_tv[0]), ('tv_1', av_tv[1]), ('tv_2', av_tv[2]), ('time', np.mean(times)))) return scores