Exemple #1
0
def run_experiment():
    tcav_dict = load_tcavs(
        None,
        None,
        tcav_file_name="/Users/yannick/Documents/Studium/interVis/models/"
        "tensorflow_inception_v3/imagenetinception-tcavscores.pkl",
        absolute_path=True)

    concepts = []
    for score in list(tcav_dict.values())[0]:
        concept = score['concept']
        if concept not in concepts:
            concepts.append(concept)
    concepts.remove('random')

    layer5ds = np.zeros(shape=(len(tcav_dict.keys()), len(concepts)))
    layer7cs = np.zeros(shape=(len(tcav_dict.keys()), len(concepts)))
    i = 0
    for c1 in tcav_dict.keys():
        layer5d = np.zeros(len(concepts))
        layer7c = np.zeros(len(concepts))
        for idx, concept in enumerate(concepts):
            layer5d[idx] = [
                result['score'] for result in tcav_dict[c1]
                if result['concept'] == concept
                and result['bottleneck'] == 'Mixed_5d'
            ][0]
            layer7c[idx] = [
                result['score'] for result in tcav_dict[c1]
                if result['concept'] == concept
                and result['bottleneck'] == 'Mixed_7c'
            ][0]
        layer5ds[i, :] = layer5d
        layer7cs[i, :] = layer7c

        i += 1

    sort_order5ds = np.median(layer5ds, axis=0).argsort()
    sort_order7cs = np.median(layer7cs, axis=0).argsort()
    layer5ds = layer5ds[:, np.median(layer5ds, axis=0).argsort()]
    layer7cs = layer7cs[:, np.median(layer7cs, axis=0).argsort()]
    concepts5ds = list(np.asarray(concepts)[sort_order5ds])
    concepts7cs = list(np.asarray(concepts)[sort_order7cs])
    avg_distances = np.zeros(len(concepts))
    #for idx, concept in enumerate(concepts):
    #    avg_distances[idx] = np.average(layer5ds[:, idx])
    #    print(concept, ": concept correlation: ", avg_distances[idx])

    result_dict = {}
    result_dict['total_dist'] = {}
    for concept in concepts:
        sum_dist, sum_n = 0.0, 0
        result_dict[concept] = {}
        for class_id in tcav_dict:
            tcav_class = tcav_dict[class_id]
            class_dist_sum = []
            for score in [tc for tc in tcav_class if concept == tc['concept']]:
                score['class'] = class_id
                if class_id not in result_dict[concept]:
                    result_dict[concept][class_id] = []
                result_dict[concept][class_id].append(score)
                class_dist_sum.append(score['score'])
            sum_dist += max(class_dist_sum) - min(class_dist_sum)
            sum_n += 1
        result_dict['total_dist'][concept] = sum_dist / sum_n

    fig1, (ax1, ax2) = plt.subplots(2)
    ax1.set_title('TCAV concept scores at layer 5d')
    ax1.set_ylabel('concept scores')
    ax1.set_xlabel('concepts')
    ax1.boxplot(layer5ds, positions=range(len(concepts5ds)))
    #locs1, labels1 = plt.xticks()
    ax1.set_xticklabels(concepts5ds)

    for idx, c in enumerate(concepts5ds):
        ax1.scatter([c] * len(layer5ds[:, idx]), layer5ds[:, idx], alpha=0.4)

    ax2.set_title('TCAV concept scores at layer 7c')
    ax2.set_ylabel('concept scores')
    ax2.set_xlabel('concepts')
    ax2.boxplot(layer7cs, positions=range(len(concepts7cs)))
    #locs2, labels2 = plt.xticks()
    #plt.xticks(locs2, concepts7cs[:10])
    ax2.set_xticklabels(concepts7cs)

    for idx, c in enumerate(concepts7cs):
        ax2.scatter([c] * len(layer7cs[:, idx]), layer7cs[:, idx], alpha=0.4)

    plt.show()

    return {
        "exp_info": {
            "name": "layer scores",
            "description": "Evaluate the influence of the choice of bottleneck\
     layer for tcav scores. Returns",
            "nr_of_return_elements": 1
        },
        "exp_result": result_dict
    }
Exemple #2
0
import itertools
import numpy as np
from matplotlib import cm
import matplotlib.pyplot as plt

from project.server.explanations.tcav_explainer import load_tcavs

tcav_dict = load_tcavs(
    None,
    None,
    tcav_file_name="/Users/yannick/Documents/Studium/interVis/models/"
    "tensorflow_inception_v3/imagenetinception-tcavscores.pkl",
    absolute_path=True)


def run_experiment():
    tcav_dict = load_tcavs(
        None,
        None,
        tcav_file_name="/Users/yannick/Documents/Studium/interVis/models/"
        "tensorflow_inception_v3/imagenetinception-tcavscores.pkl",
        absolute_path=True)

    concepts = []
    for score in list(tcav_dict.values())[0]:
        concept = score['concept']
        if concept not in concepts:
            concepts.append(concept)
    concepts.remove('random')

    layer5ds = np.zeros(shape=(len(tcav_dict.keys()), len(concepts)))
def run_experiment():

    #d = {}
    #with open("/Users/yannick/Documents/Studium/interVis/project/tcav_score_analyzer/synsets.txt") as f:
    #    for i, line in enumerate(f):
    #       d[i] = line

    tcav_dict = load_tcavs(
        None,
        None,
        tcav_file_name="/Users/yannick/Documents/Studium/interVis/models/"
        "tensorflow_inception_v3/imagenetinception-tcavscores.pkl",
        absolute_path=True)

    dataset = get_dataset_list("../../datasets/")[0]
    the_model = tensorflow_models.InceptionModel(
        0, "", "")  #mode lrp to return logits
    print(dataset.dataset_name, the_model.model_name)

    tcav_classes = [*tcav_dict]
    file_list = []
    for elem in dataset.label_to_elements[65]:
        if int(elem.split('.')[-2][-8:]) <= 5000:
            file_list.append(elem)

    transformed_images = the_model.transform_images(
        [os.path.join(dataset.dataset_path, file) for file in file_list])

    preds0 = the_model.predict_images(transformed_images)[0]

    file_list = []
    for elem in dataset.label_to_elements[57]:
        if int(elem.split('.')[-2][-8:]) <= 5000:
            file_list.append(elem)

    transformed_images = the_model.transform_images(
        [os.path.join(dataset.dataset_path, file) for file in file_list])

    preds1 = the_model.predict_images(transformed_images)[0]

    sum_diff = ssd.euclidean(preds0, preds1)
    print(sum_diff, np.average(sum_diff))

    return

    synsets = {}
    similarities = {}
    for key in tcav_dict:
        synsets[key] = wn._synset_from_pos_and_offset('n',
                                                      int(d[int(key)][1:]))

    concepts = []
    for concept in [
            result['concept'] for result in list(tcav_dict.values())[0]
            if result['bottleneck'] == 'Mixed_7c'
    ]:
        concepts.append(concept)

    wordnet_similarities = np.zeros(
        len([e for e in itertools.combinations(tcav_dict.keys(), r=2)]))
    distances = np.zeros(
        shape=(len([e for e in itertools.combinations(tcav_dict.keys(), r=2)]),
               len(concepts)))
    nr_of_pairs = 0

    concept_pairs = {}
    concept_pair_list = []
    for (c1, c2) in itertools.combinations(tcav_dict.keys(), r=2):
        concept_pairs[(c1, c2)] = synsets[c1].lch_similarity(
            synsets[c2]) / synsets[c1].lch_similarity(synsets[c1])

    sorted_dict = [
        k for k in sorted(concept_pairs, key=concept_pairs.get, reverse=True)
    ]
    print("# of concept pairs:", len(sorted_dict))
    concept_pair_list += sorted_dict[:25] + random.sample(
        sorted_dict[25:-25], 40)  #+ sorted_dict[-10:]

    for (c1, c2) in concept_pair_list:
        wordnet_similarities[nr_of_pairs] = synsets[c1].lch_similarity(
            synsets[c2]) / synsets[c1].lch_similarity(synsets[c1])
        # print([result['score'] for result in tcav_dict[c1] if result['bottleneck'] == 'Mixed_7c'])
        # print([result['score'] for result in tcav_dict[c2] if result['bottleneck'] == 'Mixed_7c'])
        # shift input by one (as negative values can occur), then normalize  with X_max = 2, X_min = 0
        class1 = np.zeros(len(concepts))
        class2 = np.zeros(len(concepts))
        for idx, concept in enumerate(concepts):
            class1[idx] = [
                result['score'] for result in tcav_dict[c1]
                if result['concept'] == concept
                and result['bottleneck'] == 'Mixed_7c'
            ][0]
            class2[idx] = [
                result['score'] for result in tcav_dict[c2]
                if result['concept'] == concept
                and result['bottleneck'] == 'Mixed_7c'
            ][0]
        distances[nr_of_pairs, :] = 1.0 - abs(class1 - class2)
        nr_of_pairs += 1

    fig1, ax1 = plt.subplots(1)
    # ax1.set_title('Distances between concept scores/wordnet similarity')
    # ax1.set_ylabel('Wordnet Similarity')
    # ax1.set_xlabel('Concept Score Distance')
    # plt.scatter(distances[:, -1], wordnet_similarities)
    # plt.show()

    # print(distances)

    corrs = np.zeros(len(concepts))
    for idx, concept in enumerate(concepts):
        corrs[idx] = np.corrcoef(distances[:, idx], wordnet_similarities)[1, 0]

    concepts = np.array(concepts)[corrs.argsort()]
    corrs = corrs[corrs.argsort()]

    result_dict = {}
    for pair in zip(concepts, corrs):
        result_dict[pair[0]] = pair[1]
        print(pair[0], ": concept correlation: ", pair[1])

    return {
        "exp_info": {
            "name": "wordnet correlation scores",
            "description": "Determine the correlation between\
     wordnet similarity scores and tcav score similarity",
            "nr_of_return_elements": 1
        },
        "exp_result": result_dict
    }
def run_experiment():

    rb_score_dict = load_tcavs(
        None,
        None,
        tcav_file_name=
        "/Users/yannick/Documents/Studium/interVis/project/tcav_score_analyzer/ILSVRC2012inception_v3-tcavscores-stratified-robustness.pkl",
        absolute_path=True)

    rerun_score_dict = load_tcavs(
        None,
        None,
        tcav_file_name=
        "/Users/yannick/Documents/Studium/interVis/project/tcav_score_analyzer/ILSVRC2012inception_v3-tcavscore_rerun_stratified_full_concepts.pkl",
        absolute_path=True)

    concepts = []

    for score in list(rb_score_dict.values())[0]:
        concept = score['concept'].split('_')[0]
        if concept not in concepts:
            concepts.append(concept)

    result_dict = {}
    result_dict['total_dist'] = {}
    for concept in concepts:
        result_dict[concept] = {}
        main_concept_val = 0.0
        other_concept_val = []
        for class_id in rb_score_dict:
            if class_id == 674:
                continue
            tcav_class = rb_score_dict[class_id]
            for score in [
                    tc for tc in tcav_class if concept in tc['concept']
                    and '_' in tc['concept'] and tc['bottleneck'] == 'Mixed_7c'
            ]:
                # if score['bottleneck'] not in result_dict['total_dist']:
                #    result_dict['total_dist'][score['bottleneck']] = {}
                score['class'] = class_id
                if class_id not in result_dict[concept]:
                    result_dict[concept][class_id] = []
                if score['bottleneck'] == 'Mixed_7c' and score[
                        'concept'] in concepts:
                    main_concept_val = score['score']
                elif score['bottleneck'] == 'Mixed_7c':
                    other_concept_val.append(score['score'])
                result_dict[concept][class_id].append(score)
            for score in [
                    tc for tc in rerun_score_dict[class_id] if
                    concept == tc['concept'] and tc['bottleneck'] == 'Mixed_7c'
            ]:
                score['class'] = class_id
                result_dict[concept][class_id].append(score)
        result_dict['total_dist'][concept] = sum(
            [abs(main_concept_val - val) for val in other_concept_val])

    return {
        "exp_info": {
            "name": "stratified concept sampling",
            "description": "Evaluate how the choice of the concept\
    dataset influences the scores. Returns",
            "nr_of_return_elements": 2
        },
        "exp_result": result_dict
    }
Exemple #5
0
def run_experiment():
    tcav_scores_dict = {}
    tcav_iter_path = '/Users/yannick/Documents/Studium/interVis/project/tcav_score_analyzer/tcav_iter_runs'
    iter_run_list = []
    for file in os.listdir(tcav_iter_path):
        if file == '.DS_Store':
                continue
        iter_runs = file.split('_')[-1].split('.')[0]
        iter_run_list.append(int(iter_runs))
        tcav_scores_dict[iter_runs] = load_tcavs(None, None, tcav_file_name=os.path.join(tcav_iter_path, file),
                                                      absolute_path=True)
    iter_run_list.sort()

    concepts = []

    for score in list(tcav_scores_dict[iter_runs[0]].values())[0]:
        concept = score['concept'].split('_')[0]
        if concept not in concepts:
            concepts.append(concept)

    result_dict = dict()
    distances = dict()
    for iters in iter_run_list:
        if iters == 50:
            continue
        distances[iters] = {}
    for concept in concepts:
        result_dict[concept] = {}
        for nr_iter in tcav_scores_dict:
            rb_score_dict = tcav_scores_dict[nr_iter]
            for class_id in rb_score_dict:
                tcav_class = rb_score_dict[class_id]
                for score in [tc for tc in tcav_class if concept in tc['concept'] and tc['bottleneck'] == 'Mixed_7c']:
                    score['class'] = class_id
                    score['nr_iter'] = int(nr_iter)
                    if class_id not in result_dict[concept]:
                        result_dict[concept][class_id] = []
                    result_dict[concept][class_id].append(score)

        best_results = {}
        other_results = {}
        for the_class in result_dict[concept].items():
            best_results[the_class[0]] = [score for score in the_class[1] if score['nr_iter'] == 50][0]
            other_results[the_class[0]] = [score for score in the_class[1] if score['nr_iter'] != 50]

        for nr_iters in iter_run_list:
            if nr_iters == 50:
                continue
            if concept not in distances[nr_iters]:
                distances[nr_iters][concept] = []
            for the_class in result_dict[concept].items():
                distances[nr_iters][concept].extend(
                    [(best_results[the_class[0]]['p_val'] <= 0.05) == (score['p_val'] <= 0.05) for score in
                     other_results[the_class[0]] if score['nr_iter'] == int(nr_iters)])
                # distances[nr_iters][concept].extend([abs(best_results[the_class[0]]['score'] - score['score']) for score in other_results[the_class[0]] if score['nr_iter'] == int(nr_iters)])
        # result_dict[concept]['dist_from_best'] =

    iterwise_dists = []
    x,y = [],[]
    for nr_of_iters in iter_run_list:
        avg_concept_dists = []
        if nr_of_iters == 50:
            iterwise_dists.append(1)
            continue
        dists = distances[nr_of_iters]
        for concept in dists:
            avg_concept_dists.append(np.count_nonzero(np.array(dists[concept])) / len(dists[concept]))
            #x.append(nr_of_iters)
            #y.append(statistics.mean(dists[concept]))
        iterwise_dists.append(statistics.mean(avg_concept_dists))


    fig, ax = plt.subplots()
    ax.plot(np.array(iter_run_list, dtype=np.int8), np.array(iterwise_dists), '-bD', markevery=True)
    ax.scatter(np.array(x, dtype=np.int8), np.array(y))

    ax.set_xlim(xmin=0)
    ax.set_ylim(ymin=0.5)
    ax.set(xlabel='#of random experiments', ylabel='% of scores assigned same significance',
           title='Consensus in significance test for differing nr. of random experiments')
    ax.grid()

    fig.savefig("test.png")
    plt.show()