Beispiel #1
0
def eval_embed_space(run,
                     net_type,
                     metric,
                     rating_metric,
                     epochs,
                     dset,
                     rating_norm='none',
                     cross_validation=False,
                     n_groups=5):
    # init
    Embed = FileManager.Embed(net_type)
    embed_source = [
        Embed(run + 'c{}'.format(c), dset) for c in range(n_groups)
    ]
    idx_hubness, idx_symmetry, idx_concentration, idx_contrast, idx_kummar, idx_featCorr, idx_sampCorr \
        = [[] for i in range(n_groups)], [[] for i in range(n_groups)], [[] for i in range(n_groups)], \
          [[] for i in range(n_groups)], [[] for i in range(n_groups)], [[] for i in range(n_groups)], \
          [[] for i in range(n_groups)]
    valid_epochs = [[] for i in range(n_groups)]
    # calculate
    Ret = Retriever(title='{}'.format(run), dset=dset)
    for i, source in enumerate(embed_source):
        embd, epoch_mask = Ret.load_embedding(source, multi_epcch=True)
        for e in epochs:
            try:
                epoch_idx = np.argwhere(e == epoch_mask)[0][0]
                Ret.fit(metric=metric, epoch=e)
                indices, distances = Ret.ret_nbrs()
                # hubness
                idx_hubness[i].append(calc_hubness(indices))
                #   symmetry
                idx_symmetry[i].append(calc_symmetry(indices))
                # kumar index
                tau, l_e = kumar(distances, res=0.01)
                idx_kummar[i].append(tau)
                # concentration & contrast
                idx_concentration[i].append(concentration(distances))
                idx_contrast[i].append(relative_contrast_imp(distances))
                valid_epochs[i].append(e)
                # correlation
                idx_featCorr[i].append(features_correlation(embd[epoch_idx]))
                idx_sampCorr[i].append(samples_correlation(embd[epoch_idx]))
            except:
                print("Epoch {} - no calculated embedding".format(e))
        valid_epochs[i] = np.array(valid_epochs[i])
        idx_hubness[i] = np.array(list(zip(*idx_hubness[i])))
        idx_symmetry[i] = np.array(list(zip(*idx_symmetry[i])))
        idx_concentration[i] = np.array(list(zip(*idx_concentration[i])))
        idx_contrast[i] = np.array(list(zip(*idx_contrast[i])))
        idx_kummar[i] = np.array([idx_kummar[i]])
        idx_featCorr[i] = np.array([idx_featCorr[i]])
        idx_sampCorr[i] = np.array([idx_sampCorr[i]])

    combined_epochs = [
        i for i, c in enumerate(np.bincount(np.concatenate(valid_epochs)))
        if c > 3
    ]

    idx_hubness = mean_cross_validated_index(idx_hubness, valid_epochs,
                                             combined_epochs)
    idx_symmetry = mean_cross_validated_index(idx_symmetry, valid_epochs,
                                              combined_epochs)
    idx_concentration = np.zeros_like(
        idx_hubness
    )  #mean_cross_validated_index(idx_concentration, valid_epochs, combined_epochs)
    idx_contrast = np.zeros_like(
        idx_hubness
    )  # mean_cross_validated_index(idx_contrast, valid_epochs, combined_epochs)
    idx_kummar = np.zeros_like(
        idx_hubness
    )  # mean_cross_validated_index(idx_kummar, valid_epochs, combined_epochs)
    idx_featCorr = np.zeros_like(
        idx_hubness
    )  # mean_cross_validated_index(idx_featCorr, valid_epochs, combined_epochs)
    idx_sampCorr = np.zeros_like(
        idx_hubness
    )  # mean_cross_validated_index(idx_sampCorr, valid_epochs, combined_epochs)

    return combined_epochs, idx_hubness, idx_symmetry, idx_concentration, idx_contrast, idx_kummar, idx_featCorr, idx_sampCorr
Beispiel #2
0
        plt.figure("Distances - {}".format(name))
        p = [None] * 9
        for i in range(9):
            p[i] = plt.subplot(3, 3, i + 1)
        # init
        Embed = FileManager.Embed(net_type)
        embed_source = [
            Embed(run + 'c{}'.format(c), dset) for c in range(n_groups)
        ]
        idx_hubness, idx_symmetry, idx_concentration, idx_contrast, idx_kummar, valid_epochs = [], [], [], [], [], []
        # calculate
        Ret = Retriever(title='{}'.format(run), dset=dset)
        embd, epoch_mask = Ret.load_embedding(embed_source, multi_epcch=True)
        for e in [60]:  # epochs:
            # full
            Ret.fit(metric=metric, epoch=e)
            _, distances = Ret.ret_nbrs()
            plot_row(0, distances)

            # benign
            Ret.fit(metric=metric, epoch=e, label=0)
            _, distances = Ret.ret_nbrs()
            plot_row(3, distances, label=0)

            # malignant
            Ret.fit(metric=metric, epoch=e, label=1)
            _, distances = Ret.ret_nbrs()
            plot_row(6, distances, label=1)

#p[-1].legend(run_names)
print('Done distance analysis')
Beispiel #3
0
    #plt.ylabel('Normalized')

    #plt.show()

    for metric, m, in zip(metrics, range(len(metrics))):

        norm = 'None'
        if len(metric) > 5 and metric[-4:] == 'Norm':
            metric = metric[:-5]
            norm = 'Norm'
        elif len(metric) > 6 and metric[-5:] == 'Scale':
            metric = metric[:-6]
            norm = 'Scale'

        #distance_matrix = calc_distance_matrix(rating_normalize(rating, norm), method=metric)
        Ret.fit(len(data) - 1, metric=metric, normalization=norm)
        indices, distances = Ret.ret_nbrs()

        plt.figure()
        plt.hist(distances.flatten(), bins=500)
        plt.title('distance distribution')

        plt.figure()

        #   Hubness
        K = [3, 5, 7, 11, 17]
        #K = 1+np.array(range(20))
        h = np.zeros(len(K))

        plt.figure('k_occ')
        plt.title('k_occ distribution')