def config_filenames(self, net_type, use_core, keep_spatial_dim=False):
        # init file managers
        Weights = File.Weights(net_type, output_dir=input_dir)
        if use_core:
            if keep_spatial_dim:
                Embed = File.Embed('SP_' + net_type, output_dir=output_dir)
            else:
                Embed = File.Embed(net_type, output_dir=output_dir)
        else:
            if net_type == 'dir':
                Embed = File.Pred(type='malig', pre='dir', output_dir=output_dir)
            elif net_type == 'dirR':
                Embed = File.Pred(type='rating', pre='dirR', output_dir=output_dir)
            elif net_type == 'dirS':
                Embed = File.Pred(type='size', pre='dirS', output_dir=output_dir)
            elif net_type == 'dirRS':
                # assert False # save rating and size in seperate files
                Embed = {}
                Embed['R'] = File.Pred(type='rating', pre='dirRS', output_dir=output_dir)
                Embed['S'] = File.Pred(type='size', pre='dirRS', output_dir=output_dir)
            else:
                print('{} not recognized'.format(net_type))
                assert False

        return Weights, Embed
Exemple #2
0
def embed_correlate(network_type, run, post, epochs, rating_norm='none'):
    pear_corr = []
    kend_corr = []
    for e in epochs:
        # pred, labels_test, meta = pickle.load(open(loader.pred_filename(run, epoch=e, post=post), 'br'))
        file = FileManager.Embed(network_type)
        Reg = RatingCorrelator(file.name(run=run, epoch=e, dset=post))

        Reg.evaluate_embed_distance_matrix(method='euclidean',
                                           round=(rating_norm == 'Round'))

        Reg.evaluate_rating_space(norm=rating_norm)
        Reg.evaluate_rating_distance_matrix(method='euclidean')

        Reg.linear_regression()
        # Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=False)
        p, s, k = Reg.correlate_retrieval('embed', 'rating')
        pear_corr.append(p)
        kend_corr.append(k)

    epochs = np.array(epochs)
    pear_corr = np.array(pear_corr)
    kend_corr = np.array(kend_corr)

    plt.figure()
    plt.plot(epochs, pear_corr)
    plt.plot(epochs, kend_corr)
    plt.grid(which='major', axis='y')
    plt.title('embed_' + run + '_' + post)
    plt.xlabel('epochs')
    plt.ylabel('correlation')
    plt.legend(['pearson', 'kendall'])
def eval_classification(run,
                        net_type,
                        metric,
                        epochs,
                        dset,
                        NN=[7, 11, 17],
                        cross_validation=False,
                        n_groups=5):
    Embed = FileManager.Embed(net_type)
    Pred_L1O = [[] for i in range(n_groups)]
    valid_epochs = [[] for i in range(n_groups)]
    if cross_validation:
        # Load
        embed_source = [
            Embed(run + 'c{}'.format(c), dset) for c in range(n_groups)
        ]
        Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset)
        for i, source in enumerate(embed_source):
            Ret.load_embedding([source], multi_epcch=True)
            for E in epochs:
                # Calc
                pred_l1o = []
                try:
                    for N in NN:
                        pred_l1o.append(
                            Ret.classify_kfold(epoch=E,
                                               n=N,
                                               k_fold=10,
                                               metric=metric))
                    Pred_L1O[i].append(np.array(pred_l1o))
                    valid_epochs[i].append(E)
                except:
                    print("Epoch {} - no calculated embedding".format(E))
            Pred_L1O[i] = np.array(Pred_L1O[i])
            valid_epochs[i] = np.array(valid_epochs[i])

        combined_epochs = merge_epochs(valid_epochs,
                                       min_element=max(n_groups - 1, 1))
        P, P_std = mean_cross_validated_index_with_std(Pred_L1O, valid_epochs,
                                                       combined_epochs)

    else:
        for E in epochs:
            # Load
            embed_source = Embed(run, E, dset)
            Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset)
            Ret.load_embedding(embed_source)
            # Calc
            pred_l1o = []
            for N in NN:
                pred_l1o.append(Ret.classify_leave1out(n=N, metric=metric)[1])
            Pred_L1O.append(np.array(pred_l1o))
            P, P_std = np.mean(Pred_L1O, axis=-1), np.std(Pred_L1O, axis=-1)

    return P, P_std, combined_epochs
Exemple #4
0
    def __init__(self, network = 'dir', pooling='max', categorize=False):
        self.network = network
        self.Weights = FileManager.Weights(network)
        self.Embed = FileManager.Embed(network)

        self.data_size = 144
        self.data_res = '0.5I'  # 'Legacy'
        self.data_sample = 'Normal'

        self.net_in_size = 128
        self.net_input_shape = (self.net_in_size, self.net_in_size, 1)
        self.net_out_size = 128
        self.net_normalize = True
        self.net_pool = pooling
        self.categorize = categorize

        self.model = None
Exemple #5
0
if __name__ == "__main__":
    #
    # Current Metrics:
    #   'chebyshev'
    #   'euclidean'
    #   'cosine'
    #   'corrlation'
    #
    # To evaluate similarity of two Distance-Metrices:
    #   Kendall tau distance
    #   Spearman's rank correlation
    #   Distance Correlation
    from Network import FileManager

    Embed = FileManager.Embed('siam')

    Reg = RatingCorrelator(Embed(run='064X',epoch=30,dset='Valid'))

    Reg.evaluate_embed_distance_matrix(method='euclidean')

    Reg.evaluate_rating_space()
    Reg.evaluate_rating_distance_matrix(method='euclidean')

    Reg.linear_regression()
    Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=True)
    #Reg.scatter('malig', 'rating', yMethod='euclidean', sub=True)
    #Reg.scatter('embed', 'malig', sub=True)
    #Reg.malig_regression(method='euclidean')

    Reg.correlate('malig', 'rating')
from experiments import CrossValidationManager
import numpy as np

run = '888'

DataGroups = [
    FileManager.Dataset('Primary', i, './Dataset').load(size=160, res=0.5)
    for i in range(5)
]
expected_datast_size = [len(d) for d in DataGroups]
label_stats = [
    np.bincount([element['label'] for element in DataGroups[i]])
    for i in range(5)
]
[
    print('group id {} => total:{}, benign:{}, malig:{}, unknown:{}'.format(
        i, expected_datast_size[i], label_stats[i][0], label_stats[i][1],
        label_stats[i][2])) for i in range(5)
]

cv = CrossValidationManager('RET')

for i in range(10):  # conf in conf_names:
    conf = cv.get_run_id(i)
    #dataset_size = len(FileManager.DatasetFromPredication().load(run='{}c{}'.format(run, conf), goal='Test', epoch=70))
    dataset_size = len(
        FileManager.Embed(pre='dirRD').load(run='{}c{}'.format(run, conf),
                                            dset='Valid'))
    group_id = cv.get_test(i)
    print('#{} ({})- expected: {}, actual: {} (group id = {})'.format(
        i, conf, expected_datast_size[group_id[0]], dataset_size, group_id))
Exemple #7
0
            run=run,
            net_type=net_type,
            dset=dset,
            metric=metric,
            epochs=epochs,
            cross_validation=True)

        data[run_id, 0] = acc
        data[run_id, 1] = prec
        data[run_id, 2] = index

        dataStd[run_id, 0] = acc_std
        dataStd[run_id, 1] = prec_std
        dataStd[run_id, 2] = index_std

        Embed = FileManager.Embed(net_type)
        embed_source = [
            Embed(run + 'c{}'.format(c), dset) for c in configurations
        ]

        pm, pm_std, km, km_std, pr, pr_std, kr, kr_std, _ = eval_correlation(
            embed_source,
            metric=metric,
            rating_metric='euclidean',
            rating_norm=rating_norm,
            epochs=epochs)

        data[run_id, 3] = pm
        data[run_id, 4] = pr
        #data[run_id, 5] = km
        #data[run_id, 6] = kr
Exemple #8
0
    wRuns = [
        '813c0'
    ]  # ['512cc0', '251c0']  #['064X', '078X', '026'] #['064X', '071' (is actually 071X), '078X', '081', '082']
    wRunsNet = ['dirR']  # ['dirRS', 'dirR']  #, 'dir']
    run_metrics = ['l2']

    select = 0

    rating_normalizaion = 'None'  # 'None', 'Normal', 'Scale'

    doRatingRet = True
    doPCA = False

    if doRatingRet:
        Embed = FileManager.Embed(wRunsNet[select])
        #N = 5
        #testData, validData, trainData = load_nodule_raw_dataset(size=160, res=0.5, sample='Normal')
        ##if dset is 'Train':  data = trainData
        #if dset is 'Test':   data = testData
        #if dset is 'Valid':  data = validData

        #Ret = Retriever(title='Ratings', dset=set)
        #Ret.load_rating(data)
        #et.fit(N)

        #info, nod_ids = Ret.show_ret(15)
        #info, nod_ids = Ret.show_ret(135)
        #info, nod_ids = Ret.show_ret(135)
        #anns = getAnnotation(info, nodule_ids=nod_ids, return_all=True)
        #pickle.dump(anns, open('tmp.p', 'bw'))
Exemple #9
0
import numpy as np
import matplotlib.pyplot as plt
from Network import FileManager
from Analysis import Retriever
from Analysis.metric_space_indexes import k_occurrences

net_type = 'dirD'
config = 0
dset = 'Valid'
K = 2

res = {}
for run, label in zip(['821', '822'], ['Pearson-loss', 'KL-loss']):
    print(run + ': ' + label + '\n' + '*' * 20)
    embed_source = FileManager.Embed(net_type)(run + 'c{}'.format(config),
                                               dset)

    Ret = Retriever(title='{}'.format(''), dset=dset)
    Ret.load_embedding(embed_source, multi_epcch=True)
    Ret.fit(metric='euclidean', epoch=60)
    indices, distances = Ret.ret_nbrs()

    # get Hubs
    k_occ = k_occurrences(indices, K)
    hubs_indices = np.argsort(k_occ)[-3:]
    res[run] = hubs_indices, indices
    print([(a, b) for a, b in zip(hubs_indices, k_occ[hubs_indices])])

for run, label in zip(['821', '822'], ['Pearson-loss', 'KL-loss']):
    print(run + ': ' + label + '\n' + '*' * 20)
def eval_retrieval(run,
                   net_type,
                   metric,
                   epochs,
                   dset,
                   NN=[7, 11, 17],
                   cross_validation=False,
                   n_groups=5):
    Embed = FileManager.Embed(net_type)
    Prec, Prec_b, Prec_m = [[] for i in range(n_groups)
                            ], [[] for i in range(n_groups)
                                ], [[] for i in range(n_groups)]
    valid_epochs = [[] for i in range(n_groups)]
    if cross_validation:
        # Load
        embed_source = [
            Embed(run + 'c{}'.format(c), dset) for c in range(n_groups)
        ]
        Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset)
        for i, source in enumerate(embed_source):
            Ret.load_embedding(source, multi_epcch=True)
            for E in epochs:
                # Calc
                prec, prec_b, prec_m = [], [], []
                try:
                    Ret.fit(np.max(NN), metric=metric, epoch=E)
                except:
                    print("Epoch {} - no calculated embedding".format(E))
                    continue
                for N in NN:
                    p, pb, pm = Ret.evaluate_precision(n=N)
                    prec.append(p)
                    prec_b.append(pb)
                    prec_m.append(pm)
                Prec[i].append(np.array(prec))
                Prec_b[i].append(np.array(prec_b))
                Prec_m[i].append(np.array(prec_m))
                valid_epochs[i].append(E)

            Prec[i] = np.array(Prec[i])
            Prec_b[i] = np.array(Prec_b[i])
            Prec_m[i] = np.array(Prec_m[i])
            valid_epochs[i] = np.array(valid_epochs[i])

        combined_epochs = epochs  # merge_epochs(valid_epochs)
        P, P_std = mean_cross_validated_index_with_std(Prec, valid_epochs,
                                                       combined_epochs)
        #P, P_std = np.mean(np.mean(Prec, axis=-1), axis=0), np.mean(np.std(Prec, axis=-1), axis=0)
        combined = 2 * np.array(Prec_b) * np.array(Prec_m) / (
            np.array(Prec_b) + np.array(Prec_m))
        #F1, F1_std = np.mean(np.mean(combined, axis=-1), axis=0), np.mean(np.std(combined, axis=-1), axis=0)
        F1, F1_std = mean_cross_validated_index_with_std(
            combined, valid_epochs, combined_epochs)

    else:
        for E in epochs:
            Ret = Retriever(title='', dset='')
            if cross_validation:
                embed_source = [
                    Embed(run + 'c{}'.format(c), E, dset)
                    for c in range(n_groups)
                ]
            else:
                embed_source = Embed(run, E, dset)
            Ret.load_embedding(embed_source)

            prec, prec_b, prec_m = [], [], []
            Ret.fit(np.max(NN), metric=metric)
            for N in NN:
                p, pm, pb = Ret.evaluate_precision(n=N)
                prec.append(p)
                prec_b.append(pb)
                prec_m.append(pm)
            Prec.append(np.array(prec))
            Prec_b.append(np.array(prec_b))
            Prec_m.append(np.array(prec_m))

        Prec = np.array(Prec)
        Prec_m = np.array(Prec_m)
        Prec_b = np.array(Prec_b)
        f1 = 2 * Prec_b * Prec_m / (Prec_b + Prec_m)
        P, P_std = np.mean(Prec, axis=-1), np.std(Prec, axis=-1)
        F1, F1_std = np.mean(f1, axis=-1), np.std(f1, axis=-1)

    return P, P_std, F1, F1_std, valid_epochs
Exemple #11
0
def eval_embed_space(run,
                     net_type,
                     metric,
                     rating_metric,
                     epochs,
                     dset,
                     rating_norm='none',
                     cross_validation=False,
                     n_groups=5):
    # init
    Embed = FileManager.Embed(net_type)
    embed_source = [
        Embed(run + 'c{}'.format(c), dset) for c in range(n_groups)
    ]
    idx_hubness, idx_symmetry, idx_concentration, idx_contrast, idx_kummar, idx_featCorr, idx_sampCorr \
        = [[] for i in range(n_groups)], [[] for i in range(n_groups)], [[] for i in range(n_groups)], \
          [[] for i in range(n_groups)], [[] for i in range(n_groups)], [[] for i in range(n_groups)], \
          [[] for i in range(n_groups)]
    valid_epochs = [[] for i in range(n_groups)]
    # calculate
    Ret = Retriever(title='{}'.format(run), dset=dset)
    for i, source in enumerate(embed_source):
        embd, epoch_mask = Ret.load_embedding(source, multi_epcch=True)
        for e in epochs:
            try:
                epoch_idx = np.argwhere(e == epoch_mask)[0][0]
                Ret.fit(metric=metric, epoch=e)
                indices, distances = Ret.ret_nbrs()
                # hubness
                idx_hubness[i].append(calc_hubness(indices))
                #   symmetry
                idx_symmetry[i].append(calc_symmetry(indices))
                # kumar index
                tau, l_e = kumar(distances, res=0.01)
                idx_kummar[i].append(tau)
                # concentration & contrast
                idx_concentration[i].append(concentration(distances))
                idx_contrast[i].append(relative_contrast_imp(distances))
                valid_epochs[i].append(e)
                # correlation
                idx_featCorr[i].append(features_correlation(embd[epoch_idx]))
                idx_sampCorr[i].append(samples_correlation(embd[epoch_idx]))
            except:
                print("Epoch {} - no calculated embedding".format(e))
        valid_epochs[i] = np.array(valid_epochs[i])
        idx_hubness[i] = np.array(list(zip(*idx_hubness[i])))
        idx_symmetry[i] = np.array(list(zip(*idx_symmetry[i])))
        idx_concentration[i] = np.array(list(zip(*idx_concentration[i])))
        idx_contrast[i] = np.array(list(zip(*idx_contrast[i])))
        idx_kummar[i] = np.array([idx_kummar[i]])
        idx_featCorr[i] = np.array([idx_featCorr[i]])
        idx_sampCorr[i] = np.array([idx_sampCorr[i]])

    combined_epochs = [
        i for i, c in enumerate(np.bincount(np.concatenate(valid_epochs)))
        if c > 3
    ]

    idx_hubness = mean_cross_validated_index(idx_hubness, valid_epochs,
                                             combined_epochs)
    idx_symmetry = mean_cross_validated_index(idx_symmetry, valid_epochs,
                                              combined_epochs)
    idx_concentration = np.zeros_like(
        idx_hubness
    )  #mean_cross_validated_index(idx_concentration, valid_epochs, combined_epochs)
    idx_contrast = np.zeros_like(
        idx_hubness
    )  # mean_cross_validated_index(idx_contrast, valid_epochs, combined_epochs)
    idx_kummar = np.zeros_like(
        idx_hubness
    )  # mean_cross_validated_index(idx_kummar, valid_epochs, combined_epochs)
    idx_featCorr = np.zeros_like(
        idx_hubness
    )  # mean_cross_validated_index(idx_featCorr, valid_epochs, combined_epochs)
    idx_sampCorr = np.zeros_like(
        idx_hubness
    )  # mean_cross_validated_index(idx_sampCorr, valid_epochs, combined_epochs)

    return combined_epochs, idx_hubness, idx_symmetry, idx_concentration, idx_contrast, idx_kummar, idx_featCorr, idx_sampCorr
Exemple #12
0
def eval_correlation(run,
                     net_type,
                     metric,
                     rating_metric,
                     epochs,
                     dset,
                     objective='rating',
                     rating_norm='none',
                     cross_validation=False,
                     n_groups=5,
                     seq=False):

    Embed = FileManager.Embed(net_type)

    if cross_validation:
        # Load
        if n_groups > 1:
            embed_source = [
                Embed(run + 'c{}'.format(c), dset) for c in range(n_groups)
            ]
        else:
            embed_source = [Embed(run + 'c{}'.format(c), dset) for c in [1]]

        valid_epochs = [[] for i in range(n_groups)]
        Pm, Km, Pr, Kr = [[] for i in range(n_groups)
                          ], [[] for i in range(n_groups)
                              ], [[] for i in range(n_groups)
                                  ], [[] for i in range(n_groups)]
        PmStd, KmStd, PrStd, KrStd = [[] for i in range(n_groups)
                                      ], [[] for i in range(n_groups)
                                          ], [[] for i in range(n_groups)
                                              ], [[] for i in range(n_groups)]

        for c_idx, source in enumerate(embed_source):
            Reg = RatingCorrelator(source,
                                   conf=c_idx,
                                   multi_epoch=True,
                                   seq=seq)

            # load rating data
            cache_filename = 'output/cached_{}_{}_{}.p'.format(
                objective,
                source.split('/')[-1][6:-2], c_idx)
            if not Reg.load_cached_rating_distance(cache_filename):
                print('evaluating rating distance matrix...')
                Reg.evaluate_rating_space(norm=rating_norm,
                                          ignore_labels=False)
                Reg.evaluate_rating_distance_matrix(
                    method=rating_metric,
                    clustered_rating_distance=True,
                    weighted=True)
                Reg.dump_rating_distance_to_cache(cache_filename)
                #print('\tno dump for rating distance matrix...')

            if objective == 'size':
                print('evaluating size distance matrix...')
                Reg.evaluate_size_distance_matrix()

            for E in epochs:
                # Calc
                try:
                    Reg.evaluate_embed_distance_matrix(method=metric, epoch=E)
                except:
                    #print("Epoch {} - no calculated embedding".format(E))
                    continue

                pm, _, km = Reg.correlate_retrieval(
                    'embed',
                    'malig' if objective == 'rating' else 'size',
                    verbose=False)
                pr, _, kr = Reg.correlate_retrieval('embed',
                                                    'rating',
                                                    verbose=False)
                valid_epochs[c_idx].append(E)

                Pm[c_idx].append(pm[0])
                Km[c_idx].append(km[0])
                Pr[c_idx].append(pr[0])
                Kr[c_idx].append(kr[0])
                PmStd[c_idx].append(pm[1])
                KmStd[c_idx].append(km[1])
                PrStd[c_idx].append(pr[1])
                KrStd[c_idx].append(kr[1])

            Pm[c_idx] = np.expand_dims(Pm[c_idx], axis=0)
            Km[c_idx] = np.expand_dims(Km[c_idx], axis=0)
            Pr[c_idx] = np.expand_dims(Pr[c_idx], axis=0)
            Kr[c_idx] = np.expand_dims(Kr[c_idx], axis=0)
            PmStd[c_idx] = np.expand_dims(PmStd[c_idx], axis=0)
            KmStd[c_idx] = np.expand_dims(KmStd[c_idx], axis=0)
            PrStd[c_idx] = np.expand_dims(PrStd[c_idx], axis=0)
            KrStd[c_idx] = np.expand_dims(KrStd[c_idx], axis=0)

    else:
        assert False
        for E in epochs:
            Ret = Retriever(title='', dset='')
            if cross_validation:
                embed_source = [
                    Embed(run + 'c{}'.format(c), E, dset)
                    for c in range(n_groups)
                ]
            else:
                embed_source = Embed(run, E, dset)
            Ret.load_embedding(embed_source)

            prec, prec_b, prec_m = [], [], []
            Ret.fit(np.max(NN), metric=metric)
            for N in NN:
                p, pm, pb = Ret.evaluate_precision(n=N)
                prec.append(p)
                prec_b.append(pb)
                prec_m.append(pm)
            Prec.append(np.array(prec))
            Prec_b.append(np.array(prec_b))
            Prec_m.append(np.array(prec_m))

    merged_epochs = merge_epochs(valid_epochs,
                                 min_element=max(n_groups - 1, 1))
    Pm = mean_cross_validated_index(Pm, valid_epochs, merged_epochs)
    Km = mean_cross_validated_index(Km, valid_epochs, merged_epochs)
    Pr = mean_cross_validated_index(Pr, valid_epochs, merged_epochs)
    Kr = mean_cross_validated_index(Kr, valid_epochs, merged_epochs)
    PmStd = std_cross_validated_index(PmStd, valid_epochs, merged_epochs)
    KmStd = std_cross_validated_index(KmStd, valid_epochs, merged_epochs)
    PrStd = std_cross_validated_index(PrStd, valid_epochs, merged_epochs)
    KrStd = std_cross_validated_index(KrStd, valid_epochs, merged_epochs)

    return np.squeeze(Pm), np.squeeze(PmStd), np.squeeze(Km), np.squeeze(
        KmStd), np.squeeze(Pr), np.squeeze(PrStd), np.squeeze(Kr), np.squeeze(
            KrStd), np.array(merged_epochs)