Exemple #1
0
def dir_rating_correlate(run,
                         post,
                         epochs,
                         rating_norm='none',
                         clustered_rating_distance=True,
                         n_groups=5):
    pear_corr = [[] for i in range(n_groups)]
    kend_corr = [[] for i in range(n_groups)]
    plot_data_filename = './Plots/Data/rating_correlation_{}{}.p'.format(
        'dirR', run)
    try:
        print('SKIPING')
        assert False
        pear_corr, kend_corr = pickle.load(open(plot_data_filename, 'br'))
        print("Loaded results for {}".format(run))
    except:
        print("Evaluating Rating Correlation for {}".format(run))
        for c, run_config in enumerate(
            [run + 'c{}'.format(config) for config in range(n_groups)]):
            PredFile = FileManager.Pred(type='rating', pre='dirR')
            Reg = RatingCorrelator(PredFile(run=run_config, dset=post),
                                   multi_epoch=True)
            for e in epochs:
                Reg.evaluate_embed_distance_matrix(
                    method='euclidean',
                    epoch=e,
                    round=(rating_norm == 'Round'))
                Reg.evaluate_rating_space(norm=rating_norm)
                Reg.evaluate_rating_distance_matrix(
                    method='euclidean',
                    clustered_rating_distance=clustered_rating_distance)

                Reg.linear_regression()
                # Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=False)
                p, s, k = Reg.correlate_retrieval(
                    'embed',
                    'rating',
                    round=(rating_norm == 'Round'),
                    verbose=False)
                pear_corr[c].append(p)
                kend_corr[c].append(k)

            pear_corr[c] = np.array(pear_corr[c])
            kend_corr[c] = np.array(kend_corr[c])

        pear_corr = np.mean(pear_corr, axis=0)
        kend_corr = np.mean(kend_corr, axis=0)
        print('NO DUMP')
        #pickle.dump((pear_corr, kend_corr), open(plot_data_filename, 'bw'))

    pear_corr = smooth(pear_corr[:, 0]), smooth(pear_corr[:, 1])
    kend_corr = smooth(kend_corr[:, 0]), smooth(kend_corr[:, 1])
    epochs = np.array(epochs)

    plt.figure('Rating2Rating:' + run + '-' + post)
    q = plt.plot(epochs, pear_corr[0])
    plt.plot(epochs,
             pear_corr[0] + pear_corr[1],
             color=q[0].get_color(),
             ls='--',
             alpha=alpha)
    plt.plot(epochs,
             pear_corr[0] - pear_corr[1],
             color=q[0].get_color(),
             ls='--',
             alpha=alpha)

    q = plt.plot(epochs, kend_corr[0])
    plt.plot(epochs,
             kend_corr[0] + kend_corr[1],
             color=q[0].get_color(),
             ls='--',
             alpha=alpha)
    plt.plot(epochs,
             kend_corr[0] - kend_corr[1],
             color=q[0].get_color(),
             ls='--',
             alpha=alpha)

    plt.grid(which='major', axis='y')
    plt.title('rating_' + run + '_' + post)
    plt.xlabel('epochs')
    plt.ylabel('correlation')
    plt.legend(['pearson', '', '', 'kendall', '', ''])
Exemple #2
0
def embed_correlate(network_type, run, post, epochs, rating_norm='none'):
    pear_corr = []
    kend_corr = []
    for e in epochs:
        # pred, labels_test, meta = pickle.load(open(loader.pred_filename(run, epoch=e, post=post), 'br'))
        file = FileManager.Embed(network_type)
        Reg = RatingCorrelator(file.name(run=run, epoch=e, dset=post))

        Reg.evaluate_embed_distance_matrix(method='euclidean',
                                           round=(rating_norm == 'Round'))

        Reg.evaluate_rating_space(norm=rating_norm)
        Reg.evaluate_rating_distance_matrix(method='euclidean')

        Reg.linear_regression()
        # Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=False)
        p, s, k = Reg.correlate_retrieval('embed', 'rating')
        pear_corr.append(p)
        kend_corr.append(k)

    epochs = np.array(epochs)
    pear_corr = np.array(pear_corr)
    kend_corr = np.array(kend_corr)

    plt.figure()
    plt.plot(epochs, pear_corr)
    plt.plot(epochs, kend_corr)
    plt.grid(which='major', axis='y')
    plt.title('embed_' + run + '_' + post)
    plt.xlabel('epochs')
    plt.ylabel('correlation')
    plt.legend(['pearson', 'kendall'])
def eval_correlation(embed_source,
                     metric,
                     rating_metric,
                     epochs,
                     objective='rating',
                     rating_norm='none',
                     local_scaling=False,
                     seq=False):
    n_configs = len(embed_source)
    valid_epochs = [[] for i in range(n_configs)]
    Pm, Km, Pr, Kr = [[[] for i in range(n_configs)] for j in range(4)]
    PmStd, KmStd, PrStd, KrStd = [[[] for i in range(n_configs)]
                                  for j in range(4)]

    for c_idx, source in enumerate(embed_source):
        Reg = RatingCorrelator(source, conf=c_idx, multi_epoch=True, seq=seq)

        # load rating data
        cache_filename = 'output/cached_{}_{}_{}.p'.format(
            objective,
            source.split('/')[-1][6:-2], c_idx)
        if True:  # not Reg.load_cached_rating_distance(cache_filename):
            print('evaluating rating distance matrix...')
            Reg.evaluate_rating_space(norm=rating_norm, ignore_labels=False)
            Reg.evaluate_rating_distance_matrix(method=rating_metric,
                                                clustered_rating_distance=True,
                                                weighted=True,
                                                local_scaling=local_scaling)
            Reg.dump_rating_distance_to_cache(cache_filename)
            #print('\tno dump for rating distance matrix...')

        if objective == 'size':
            print('evaluating size distance matrix...')
            Reg.evaluate_size_distance_matrix()

        for E in epochs:
            # Calc
            try:
                Reg.evaluate_embed_distance_matrix(method=metric, epoch=E)
            except:
                #print("Epoch {} - no calculated embedding".format(E))
                continue

            pm, _, km = Reg.correlate_retrieval(
                'embed',
                'malig' if objective == 'rating' else 'size',
                verbose=False)
            pr, _, kr = Reg.correlate_retrieval('embed',
                                                'rating',
                                                verbose=False)
            valid_epochs[c_idx].append(E)

            Pm[c_idx].append(pm[0])
            Km[c_idx].append(km[0])
            Pr[c_idx].append(pr[0])
            Kr[c_idx].append(kr[0])
            PmStd[c_idx].append(pm[1])
            KmStd[c_idx].append(km[1])
            PrStd[c_idx].append(pr[1])
            KrStd[c_idx].append(kr[1])

        Pm[c_idx] = np.expand_dims(Pm[c_idx], axis=0)
        Km[c_idx] = np.expand_dims(Km[c_idx], axis=0)
        Pr[c_idx] = np.expand_dims(Pr[c_idx], axis=0)
        Kr[c_idx] = np.expand_dims(Kr[c_idx], axis=0)
        PmStd[c_idx] = np.expand_dims(PmStd[c_idx], axis=0)
        KmStd[c_idx] = np.expand_dims(KmStd[c_idx], axis=0)
        PrStd[c_idx] = np.expand_dims(PrStd[c_idx], axis=0)
        KrStd[c_idx] = np.expand_dims(KrStd[c_idx], axis=0)

    merged_epochs = merge_epochs(valid_epochs,
                                 min_element=max(n_configs - 1, 1))
    Pm = mean_cross_validated_index(Pm, valid_epochs, merged_epochs)
    Km = mean_cross_validated_index(Km, valid_epochs, merged_epochs)
    Pr = mean_cross_validated_index(Pr, valid_epochs, merged_epochs)
    Kr = mean_cross_validated_index(Kr, valid_epochs, merged_epochs)
    PmStd = std_cross_validated_index(PmStd, valid_epochs, merged_epochs)
    KmStd = std_cross_validated_index(KmStd, valid_epochs, merged_epochs)
    PrStd = std_cross_validated_index(PrStd, valid_epochs, merged_epochs)
    KrStd = std_cross_validated_index(KrStd, valid_epochs, merged_epochs)

    return np.squeeze(Pm), np.squeeze(PmStd), np.squeeze(Km), np.squeeze(
        KmStd), np.squeeze(Pr), np.squeeze(PrStd), np.squeeze(Kr), np.squeeze(
            KrStd), np.array(merged_epochs)
Exemple #4
0
def dir_rating_params_correlate(run,
                                post,
                                epochs,
                                net_type,
                                rating_norm='none',
                                configurations=list(range(5)),
                                USE_CACHE=True,
                                DUMP=True):

    reference = [0.7567, 0.5945, 0.7394, 0.5777, 0.6155, 0.7445,
                 0.6481]  # 0, 0,
    rating_property = [
        'Subtlety', 'Sphericity', 'Margin', 'Lobulation', 'Spiculation',
        'Texture', 'Malignancy'
    ]  # 'Internalstructure', 'Calcification',
    mask = [True, False, False, True, True, True, True, True, True]

    pear_corr = [[] for i in configurations]
    plot_data_filename = './Plots/Data/rating_params_correlation_{}{}.p'.format(
        net_type, run)
    try:
        if USE_CACHE is False:
            print('SKIPPING')
            assert False
        pear_corr = pickle.load(open(plot_data_filename, 'br'))
        print("Loaded results for {}".format(run))
    except:
        print("Evaluating Rating Correlation for {}".format(run))
        for c, run_config in enumerate(
            [run + 'c{}'.format(config) for config in configurations]):
            PredFile = FileManager.Pred(type='rating', pre=net_type)
            Reg = RatingCorrelator(PredFile(run=run_config, dset=post),
                                   multi_epoch=True,
                                   conf=c)
            Reg.evaluate_rating_space(norm=rating_norm)
            #valid_epochs = []
            for e in epochs:
                p = Reg.correlate_to_ratings(epoch=e,
                                             round=(rating_norm == 'Round'))
                if not np.all(np.isfinite(p[mask])):
                    print('nan at: conf={}, epoch={}'.format(c, e))
                pear_corr[c].append(p[mask])
                #valid_epochs.append(e)

            pear_corr[c] = np.array(pear_corr[c])

        pear_corr = np.mean(pear_corr, axis=0)
        if DUMP:
            pickle.dump(pear_corr, open(plot_data_filename, 'bw'))
        else:
            print('NO DUMP')

    for i, e in enumerate(epochs):
        print("=" * 20)
        print(" Epoch {}:".format(e))
        print("-" * 20)
        for p, property in enumerate(rating_property):
            print("\t{}: \t{:.2f}".format(property, pear_corr[i, p]))
        #print("\t" + ("-" * 10))
        #print("\toverall: \t{:.2f}".format(R[i, 9]))

    for p in range(pear_corr.shape[1]):
        pear_corr[:, p] = smooth(pear_corr[:, p], window_length=5, polyorder=2)
    epochs = np.array(epochs)

    plt.figure('RatingParams2Rating:' + run + '-' + post)
    q = plt.plot(epochs, pear_corr, linewidth=2.5)
    for line, ref in zip(q, reference):
        plt.plot(epochs,
                 ref * np.ones_like(epochs),
                 color=line.get_color(),
                 ls='--',
                 linewidth=4,
                 alpha=0.6)

    plt.grid(which='major', axis='y')
    plt.title('rating_' + run + '_' + post)
    plt.xlabel('epochs')
    plt.ylabel('correlation')
    plt.legend(rating_property)
Exemple #5
0
def eval_correlation(run,
                     net_type,
                     metric,
                     rating_metric,
                     epochs,
                     dset,
                     objective='rating',
                     rating_norm='none',
                     cross_validation=False,
                     n_groups=5,
                     seq=False):

    Embed = FileManager.Embed(net_type)

    if cross_validation:
        # Load
        if n_groups > 1:
            embed_source = [
                Embed(run + 'c{}'.format(c), dset) for c in range(n_groups)
            ]
        else:
            embed_source = [Embed(run + 'c{}'.format(c), dset) for c in [1]]

        valid_epochs = [[] for i in range(n_groups)]
        Pm, Km, Pr, Kr = [[] for i in range(n_groups)
                          ], [[] for i in range(n_groups)
                              ], [[] for i in range(n_groups)
                                  ], [[] for i in range(n_groups)]
        PmStd, KmStd, PrStd, KrStd = [[] for i in range(n_groups)
                                      ], [[] for i in range(n_groups)
                                          ], [[] for i in range(n_groups)
                                              ], [[] for i in range(n_groups)]

        for c_idx, source in enumerate(embed_source):
            Reg = RatingCorrelator(source,
                                   conf=c_idx,
                                   multi_epoch=True,
                                   seq=seq)

            # load rating data
            cache_filename = 'output/cached_{}_{}_{}.p'.format(
                objective,
                source.split('/')[-1][6:-2], c_idx)
            if not Reg.load_cached_rating_distance(cache_filename):
                print('evaluating rating distance matrix...')
                Reg.evaluate_rating_space(norm=rating_norm,
                                          ignore_labels=False)
                Reg.evaluate_rating_distance_matrix(
                    method=rating_metric,
                    clustered_rating_distance=True,
                    weighted=True)
                Reg.dump_rating_distance_to_cache(cache_filename)
                #print('\tno dump for rating distance matrix...')

            if objective == 'size':
                print('evaluating size distance matrix...')
                Reg.evaluate_size_distance_matrix()

            for E in epochs:
                # Calc
                try:
                    Reg.evaluate_embed_distance_matrix(method=metric, epoch=E)
                except:
                    #print("Epoch {} - no calculated embedding".format(E))
                    continue

                pm, _, km = Reg.correlate_retrieval(
                    'embed',
                    'malig' if objective == 'rating' else 'size',
                    verbose=False)
                pr, _, kr = Reg.correlate_retrieval('embed',
                                                    'rating',
                                                    verbose=False)
                valid_epochs[c_idx].append(E)

                Pm[c_idx].append(pm[0])
                Km[c_idx].append(km[0])
                Pr[c_idx].append(pr[0])
                Kr[c_idx].append(kr[0])
                PmStd[c_idx].append(pm[1])
                KmStd[c_idx].append(km[1])
                PrStd[c_idx].append(pr[1])
                KrStd[c_idx].append(kr[1])

            Pm[c_idx] = np.expand_dims(Pm[c_idx], axis=0)
            Km[c_idx] = np.expand_dims(Km[c_idx], axis=0)
            Pr[c_idx] = np.expand_dims(Pr[c_idx], axis=0)
            Kr[c_idx] = np.expand_dims(Kr[c_idx], axis=0)
            PmStd[c_idx] = np.expand_dims(PmStd[c_idx], axis=0)
            KmStd[c_idx] = np.expand_dims(KmStd[c_idx], axis=0)
            PrStd[c_idx] = np.expand_dims(PrStd[c_idx], axis=0)
            KrStd[c_idx] = np.expand_dims(KrStd[c_idx], axis=0)

    else:
        assert False
        for E in epochs:
            Ret = Retriever(title='', dset='')
            if cross_validation:
                embed_source = [
                    Embed(run + 'c{}'.format(c), E, dset)
                    for c in range(n_groups)
                ]
            else:
                embed_source = Embed(run, E, dset)
            Ret.load_embedding(embed_source)

            prec, prec_b, prec_m = [], [], []
            Ret.fit(np.max(NN), metric=metric)
            for N in NN:
                p, pm, pb = Ret.evaluate_precision(n=N)
                prec.append(p)
                prec_b.append(pb)
                prec_m.append(pm)
            Prec.append(np.array(prec))
            Prec_b.append(np.array(prec_b))
            Prec_m.append(np.array(prec_m))

    merged_epochs = merge_epochs(valid_epochs,
                                 min_element=max(n_groups - 1, 1))
    Pm = mean_cross_validated_index(Pm, valid_epochs, merged_epochs)
    Km = mean_cross_validated_index(Km, valid_epochs, merged_epochs)
    Pr = mean_cross_validated_index(Pr, valid_epochs, merged_epochs)
    Kr = mean_cross_validated_index(Kr, valid_epochs, merged_epochs)
    PmStd = std_cross_validated_index(PmStd, valid_epochs, merged_epochs)
    KmStd = std_cross_validated_index(KmStd, valid_epochs, merged_epochs)
    PrStd = std_cross_validated_index(PrStd, valid_epochs, merged_epochs)
    KrStd = std_cross_validated_index(KrStd, valid_epochs, merged_epochs)

    return np.squeeze(Pm), np.squeeze(PmStd), np.squeeze(Km), np.squeeze(
        KmStd), np.squeeze(Pr), np.squeeze(PrStd), np.squeeze(Kr), np.squeeze(
            KrStd), np.array(merged_epochs)