Exemple #1
0
def layer_deviations():
    original_scores = collect_scores(benchmark='Pereira2018-encoding',
                                     models=models)
    experiment2_scores, experiment3_scores = collect_Pereira_experiment_scores(
        best_layer=False)
    # filter trained only
    experiment2_scores = experiment2_scores[~experiment2_scores['model'].str.
                                            endswith('-untrained')]
    experiment3_scores = experiment3_scores[~experiment3_scores['model'].str.
                                            endswith('-untrained')]
    # compute deviation between the exp3 score of the layer chosen on exp2 and the max exp3 score (layer chosen on exp3)
    deviations = []
    assert (experiment2_scores['model'].values ==
            experiment3_scores['model'].values).all()
    for model in set(experiment2_scores['model']):
        model_data2 = experiment2_scores[experiment2_scores['model'] == model]
        model_data3 = experiment3_scores[experiment3_scores['model'] == model]
        best_layer2 = model_data2['layer'][model_data2['score'] == max(
            model_data2['score'])].values[0]
        best_layer3 = model_data3['layer'][model_data3['score'] == max(
            model_data3['score'])].values[0]
        best3 = model_data3[model_data3['layer'] == best_layer3]
        chosen = model_data3[model_data3['layer'] == best_layer2]
        deviations.append({
            'model': model,
            'best_layer': best_layer3,
            'chosen_layer': best_layer2,
            'max_score': best3['score'].values[0],
            'error1': best3['error'].values[0],
            'chosen_score': chosen['score'].values[0],
            'error2': chosen['error'].values[0],
            'reference_error': original_scores['error']
        })
    deviations = pd.DataFrame(deviations)
    deviations[
        'deviation'] = deviations['max_score'] - deviations['chosen_score']
    deviations['avg_error'] = deviations.loc[:,
                                             ["error1", "error2"]].mean(axis=1)

    # plot
    fig, ax = pyplot.subplots()
    width = 0.5
    step = (len(models) + 1) * width
    offset = len(models) / 2
    for model_iter, model in enumerate(models):
        model_score = deviations[deviations['model'] == model]
        y, yerr = model_score['deviation'], model_score['avg_error']
        x = np.arange(start=0, stop=len(y) * step, step=step)
        model_x = x - offset * width + model_iter * width
        ax.bar(model_x,
               height=y,
               yerr=yerr,
               width=width,
               edgecolor='none',
               color=model_colors[model],
               ecolor='gray',
               error_kw=dict(elinewidth=1, alpha=.5))
        for xpos in model_x:
            ax.text(x=xpos + .6 * width / 2,
                    y=.005,
                    s=model_label_replace[model],
                    rotation=90,
                    rotation_mode='anchor',
                    fontdict=dict(fontsize=6.5),
                    color='gray')
    ax.set_xticks([])
    ax.set_ylim([-.15, 1])
    ax.set_ylabel('train/test deviation of layer choice')
    savefig(fig, savename=Path(__file__).parent / 'layer_deviations')
Exemple #2
0
def metric_generalizations():
    data_identifiers = ['Pereira2018', 'Fedorenko2016v3', 'Blank2014fROI']
    base_metric = 'encoding'
    comparison_metrics = ['rdm']  # , 'cka']
    fig = pyplot.figure(figsize=(20, 10 * len(comparison_metrics)),
                        constrained_layout=True)
    gridspec = fig.add_gridspec(nrows=len(data_identifiers) *
                                len(comparison_metrics),
                                ncols=4)
    for benchmark_index, benchmark_prefix in enumerate(data_identifiers):
        settings = dict(xlim=[0, 1.2], ylim=[0, 1.8]) if benchmark_prefix.startswith("Pereira") else \
            dict(xlim=[0, .4], ylim=[0, .4]) if benchmark_prefix.startswith('Blank') else dict()
        for metric_index, comparison_metric in enumerate(comparison_metrics):
            grid_row = (benchmark_index *
                        len(comparison_metrics)) + metric_index
            correlations = []
            # all as well as trained and untrained separately
            for train_index, include_untrained in enumerate(
                [True, False, 'only']):
                gridpos = gridspec[grid_row, 1 + train_index]
                ax = fig.add_subplot(gridpos)
                train_description = 'all' if include_untrained is True \
                    else 'trained' if include_untrained is False \
                    else 'untrained'
                ax.set_title(f"{comparison_metric} {train_description}")
                _, _, info = compare(
                    benchmark1=f"{benchmark_prefix}-{base_metric}",
                    benchmark2=f"{benchmark_prefix}-{comparison_metric}",
                    **settings,
                    plot_significance_p=False,
                    plot_significance_stars=True,
                    include_untrained=include_untrained,
                    ax=ax)
                ax.set_xlabel(ax.get_xlabel() + '-' + base_metric)
                correlations.append({
                    'trained': train_description,
                    'r': info['r'],
                    'p': info['p'],
                    'index': train_index
                })
            # plot bars
            correlations = pd.DataFrame(correlations).sort_values(by='index')
            ax = fig.add_subplot(gridspec[grid_row, 0])
            ticks = list(reversed(correlations['index']))
            bar_width = 0.5
            ax.barh(y=ticks,
                    width=correlations['r'],
                    height=bar_width,
                    color='#ababab')
            ax.set_yticks([])
            for ypos, label, pvalue in zip(ticks, correlations['trained'],
                                           correlations['p']):
                ax.text(y=ypos + .15 * bar_width / 2,
                        x=.01,
                        s=label,
                        verticalalignment='center',
                        fontdict=dict(fontsize=20),
                        color='black')
                ax.text(
                    y=ypos,
                    x=0,
                    s=significance_stars(pvalue) if pvalue < .05 else 'n.s.',
                    rotation=90,
                    rotation_mode='anchor',
                    horizontalalignment='center',
                    fontdict=dict(fontsize=14, fontweight='normal'))
            ax.set_title(comparison_metric)
        fig.text(x=0.007,
                 y=1 - (.33 * benchmark_index + .15),
                 s=benchmark_prefix,
                 rotation=90,
                 horizontalalignment='center',
                 verticalalignment='center',
                 fontdict=dict(fontsize=20, fontweight='bold'))

    savefig(fig, savename=Path(__file__).parent / f"metric_generalizations")
Exemple #3
0
def untrained_vs_trained(benchmark='Pereira2018-encoding',
                         layer_mode='best',
                         model_selection=None,
                         analyze_only=False,
                         **kwargs):
    """
    :param layer_mode: 'best' to select the best layer per model,
      'group' to keep all layers and color them based on their model,
      'pos' to keep all layers and color them based on their relative position.
    """
    all_models = model_selection or models
    all_models = [[model, f"{model}-untrained"] for model in all_models]
    all_models = [model for model_tuple in all_models for model in model_tuple]
    scores = collect_scores(benchmark=benchmark, models=all_models)
    scores = average_adjacent(scores)  # average experiments & atlases
    scores = scores.dropna(
    )  # embedding layers in xlnets and t5s have nan scores
    if layer_mode == 'best':
        scores = choose_best_scores(scores)
    elif layer_mode == 'pos':
        scores['layer_position'] = [
            model_layers[model].index(layer) / len(model_layers[model]) for
            model, layer in zip(scores['model'].values, scores['layer'].values)
        ]
    # separate into trained / untrained
    untrained_rows = np.array(
        [model.endswith('-untrained') for model in scores['model']])
    scores_trained, scores_untrained = scores[~untrained_rows], scores[
        untrained_rows]
    # align
    scores_untrained['model'] = [
        model.replace('-untrained', '')
        for model in scores_untrained['model'].values
    ]
    scores_trained, scores_untrained = align_scores(
        scores_trained,
        scores_untrained,
        identifier_set=('model', ) if layer_mode == 'best' else
        ('model', 'layer'))
    if layer_mode != 'best':
        assert (scores_trained['layer'].values ==
                scores_untrained['layer'].values).all()
    # analyze
    average_trained, average_untrained = np.mean(
        scores_trained['score']), np.mean(scores_untrained['score'])
    _, p_diff = pearsonr(scores_trained['score'], scores_untrained['score'])
    logger.info(
        f"Trained/untrained on {benchmark}: "
        f"score trained={average_trained:.2f}, untrained={average_untrained:.2f} | "
        f"diff {average_trained - average_untrained:.2f} ({average_trained / average_untrained * 100:.0f}%, "
        f"p={p_diff}")
    if analyze_only:
        return
    # plot
    if layer_mode in ('best', 'group'):
        colors = [model_colors[model] for model in scores_trained['model']]
        colors = [to_rgba(named_color) for named_color in colors]
    else:
        cmap = matplotlib.cm.get_cmap('binary')
        colors = cmap(scores_trained['layer_position'].values)
    fig, ax = pyplot.subplots(figsize=(6, 6))
    _plot_scores1_2(scores_untrained,
                    scores_trained,
                    alpha=None if layer_mode == 'best' else 0.4,
                    color=colors,
                    xlabel="architecture (no training)",
                    ylabel="architecture + training",
                    plot_significance_stars=False,
                    ax=ax,
                    **kwargs)
    lims = [-.05, 1.1] if benchmark.startswith('Fedorenko') else [-.05, 1.2] if benchmark.startswith('Pereira') \
        else [8, 4] if benchmark.startswith('wikitext-2') else [0, 1.335] if benchmark.startswith('Futrell') \
        else [-.05, 1.]
    ax.set_xlim(lims)
    ax.set_ylim(lims)
    ax.plot(ax.get_xlim(), ax.get_xlim(), linestyle='dashed', color='darkgray')
    ax.set_title(benchmark_label_replace[benchmark])
    savefig(fig,
            savename=Path(__file__).parent / f"untrained_trained-{benchmark}")
Exemple #4
0
def num_features_vs_score(benchmark='Pereira2018-encoding',
                          per_layer=True,
                          include_untrained=True):
    if include_untrained:
        all_models = [(model, f"{model}-untrained") for model in models]
        all_models = [
            model for model_tuple in all_models for model in model_tuple
        ]
    else:
        all_models = models
    scores = collect_scores(benchmark=benchmark, models=all_models)
    scores = average_adjacent(scores)
    scores = scores.dropna()
    if not per_layer:
        scores = choose_best_scores(scores)
    # count number of features
    store_file = Path(__file__).parent / "num_features.csv"
    if store_file.is_file():
        num_features = pd.read_csv(store_file)
    else:
        num_features = []
        for model in tqdm(ordered_set(scores['model'].values), desc='models'):
            # mock-run stimuli that are already stored
            mock_extractor = ActivationsExtractorHelper(get_activations=None,
                                                        reset=None)
            features = mock_extractor._from_sentences_stored(
                layers=model_layers[model.replace('-untrained', '')],
                sentences=None,
                identifier=model.replace('-untrained', ''),
                stimuli_identifier='Pereira2018-243sentences.astronaut')
            if per_layer:
                for layer in scores['layer'].values[scores['model'] == model]:
                    num_features.append({
                        'model':
                        model,
                        'layer':
                        layer,
                        'score':
                        len(features.sel(layer=layer)['neuroid'])
                    })
            else:
                num_features.append({
                    'model': model,
                    'score': len(features['neuroid'])
                })
        num_features = pd.DataFrame(num_features)
        num_features['error'] = np.nan
        num_features.to_csv(store_file, index=False)
    if per_layer:
        assert (scores['layer'].values == num_features['layer'].values).all()
    # plot
    colors = [
        model_colors[model.replace('-untrained', '')]
        for model in scores['model'].values
    ]
    fig, ax = _plot_scores1_2(num_features,
                              scores,
                              color=colors,
                              xlabel="number of features",
                              ylabel=benchmark)
    savefig(fig,
            savename=Path(__file__).parent / f"num_features-{benchmark}" +
            ("-layerwise" if per_layer else ""))
Exemple #5
0
def compare(benchmark1='wikitext-2',
            benchmark2='Blank2014fROI-encoding',
            include_untrained=False,
            best_layer=True,
            normalize=True,
            reference_best=False,
            identity_line=False,
            annotate=False,
            plot_ceiling=False,
            xlim=None,
            ylim=None,
            ax=None,
            **kwargs):
    ax_given = ax is not None
    all_models = models
    if include_untrained:
        all_models = [([model] if include_untrained != 'only' else []) +
                      [f"{model}-untrained"] for model in all_models]
        all_models = [
            model for model_tuple in all_models for model in model_tuple
        ]
    scores1 = collect_scores(benchmark=benchmark1,
                             models=all_models,
                             normalize=normalize)
    scores2 = collect_scores(benchmark=benchmark2,
                             models=all_models,
                             normalize=normalize)
    scores1, scores2 = average_adjacent(scores1).dropna(), average_adjacent(
        scores2).dropna()
    if best_layer:
        choose_best = choose_best_scores if not reference_best else reference_best_scores
        scores1, scores2 = choose_best(scores1), choose_best(scores2)
    scores1, scores2 = align_scores(
        scores1,
        scores2,
        identifier_set=['model'] if best_layer else ['model', 'layer'])
    colors = [
        model_colors[model.replace('-untrained', '')]
        for model in scores1['model'].values
    ]
    colors = [to_rgba(named_color) for named_color in colors]
    if not best_layer or not annotate:
        score_annotations = None
    elif annotate is True:
        score_annotations = scores1['model'].values
    else:
        score_annotations = [
            model if model in annotate else None
            for model in scores1['model'].values
        ]
    fig, ax, info = _plot_scores1_2(
        scores1,
        scores2,
        color=colors,
        alpha=None if best_layer else .2,
        score_annotations=score_annotations,
        xlabel=benchmark1,
        ylabel=benchmark2,
        loss_xaxis=benchmark1.startswith('wikitext'),
        ax=ax,
        return_info=True,
        **kwargs)
    xlim_given, ylim_given = xlim is not None, ylim is not None
    xlim, ylim = ax.get_xlim() if not xlim_given else xlim, ax.get_ylim(
    ) if not ylim_given else ylim
    normalize_x = normalize and not any(
        benchmark1.startswith(perf_prefix)
        for perf_prefix in performance_benchmarks)
    normalize_y = normalize and not any(
        benchmark2.startswith(perf_prefix)
        for perf_prefix in performance_benchmarks)
    if normalize_x and not xlim_given:
        xlim = [0, 1.1]
    if normalize_y and not ylim_given:
        ylim = [0, 1.1]
    if normalize_x and plot_ceiling:
        ceiling_err = get_ceiling(benchmark1)
        shaded_errorbar(y=ylim,
                        x=np.array([1, 1]),
                        error=ceiling_err,
                        ax=ax,
                        vertical=True,
                        alpha=0,
                        shaded_kwargs=dict(color='gray', alpha=.5))
    if normalize_y and plot_ceiling:
        ceiling_err = get_ceiling(benchmark2)
        shaded_errorbar(x=xlim,
                        y=np.array([1, 1]),
                        error=ceiling_err,
                        ax=ax,
                        alpha=0,
                        shaded_kwargs=dict(color='gray', alpha=.5))
    if identity_line:
        lim = [min(xlim[0], ylim[0]), max(xlim[1], ylim[1])]
        if not xlim_given:
            xlim = lim
        if not ylim_given:
            ylim = lim
        ax.plot(lim, lim, linestyle='dashed', color='gray')
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    if not ax_given:
        savefig(fig,
                savename=Path(__file__).parent /
                (f"{benchmark1}__{benchmark2}" +
                 ('-best' if best_layer else '-layers')))
    return fig, ax, info