Ejemplo n.º 1
0
def plot_global_learning_slope(length, user_length, context_answer_limit, with_confidence, bootstrap_samples, balance):
    rcParams['figure.figsize'] = 7.5, 5
    data = global_learning_curve(length, user_length, context_answer_limit, bootstrap_samples)
    if not balance:
        data = data[~data['balanced']]
    for i, (data_balanced, data) in enumerate(data[data['variable'] == 'slope'].groupby('balanced')):
        data = data.sort_values(by='experiment_setup_name')
        plt.bar(
            numpy.arange(len(data)) + i * 0.4,
            data['value'], 0.4 if balance else 0.8,
            color=output.palette()[i],
            label=None if balance else ('balanced' if data_balanced else 'not balanced'),
            yerr=[data['value'] - data['confidence_min'], data['confidence_max'] - data['value']],
            error_kw={'ecolor': 'black'},
        )
        plt.xticks(
            numpy.arange(len(data)) + 0.4,
            data['experiment_setup_name']
        )
    if balance:
        plt.legend(frameon=True, loc=3)
    plt.xlabel('Condition')
    plt.ylabel('k')
    plt.gca().yaxis.grid(True)
    output.savefig('learning_slope')
Ejemplo n.º 2
0
def execute(bins=10, ylim=False):
    data = pandas.merge(load_terms(), load_search_results().rename(columns={'identifier': 'term_id'}), on=['term_id'], how='inner')
    data = data[data['term_name'].apply(lambda x: len(x.split(';')[0]) > 5)]
    data = data[data['term_id'].apply(lambda x: x.startswith('A'))]
    data = pandas.merge(data, load_radiopaedia_terms(), on=['term_id', 'term_name'], how='inner')
    # load_radiopaedia_terms()
    # g = sns.pairplot(data, vars=['search_results_log', 'pagerank', 'difficulty_prob'])
    # for ax in g.axes.flat:
        # if ax.get_xlabel() in ['difficulty_prob', 'pagerank']:
            # ax.set_xlim(0, 1)
        # if ax.get_ylabel() in ['difficulty_prob', 'pagerank']:
            # ax.set_ylim(0, 1)
        # if min(ax.get_xticks()) < 0:
            # ax.set_xlim(0, max(ax.get_xticks()))
        # if min(ax.get_yticks()) < 0:
            # ax.set_ylim(0, max(ax.get_yticks()))
    # output.savefig('importance_pair', tight_layout=False)
    rcParams['figure.figsize'] = 30, 20
    for term_name, difficulty_prob, pagerank in data[['term_name', 'difficulty_prob', 'pagerank']].values:
        plt.plot(1 - difficulty_prob, pagerank, color='red', marker='s', markersize=10)
        plt.text(1 - difficulty_prob, pagerank, term_name)
        if ylim:
            plt.ylim(0, 0.5)
        plt.xlabel('Predicted error rate')
        plt.ylabel('Pagerank')
    output.savefig('importance_pagerank')
Ejemplo n.º 3
0
def plot_metrics_correlation():
    data = load_data_to_correlate().rename(columns={
        'quit_score': 'quit score',
        'survival_answers_10': 'survival (10 ans.)',
        'survival_answers_100': 'survival (100 ans.)',
        'survival_time_60': 'survival (1 min.)',
        'survival_time_600': 'survival (10 min.)',
        'learning_slope_5': 'learning (5)',
        'learning_slope_10': 'learning (10)',
        'learning_slope_20': 'learning (20)',
    })
    data = data[~data['context'].apply(lambda c: 'region_cz' in c)]
    plt.title('Correlation of different metrics')
    sns.heatmap(data.corr().abs(), annot=True, fmt='.2f')
    output.savefig('abexp_metric_corr')
    g = sns.PairGrid(
        data[[
            # 'quit score',
            'survival (100 ans.)',
            'survival (10 min.)',
            'survival (10 ans.)',
            'survival (1 min.)',
            # 'learning (10)',
            'experiment',
        ]], hue='experiment')
    g = g.map_diag(plt.hist)
    g = g.map_offdiag(plt.scatter)
    g = g.add_legend()
    output.savefig('abexp_metrics', tight_layout=False)
Ejemplo n.º 4
0
def plot_error_by_attempt(length, vertical=False, with_confidence=False):
    if vertical:
        rcParams['figure.figsize'] = 7.5, 15
    else:
        rcParams['figure.figsize'] = 22.5, 5
    for zoom_column in ['context_difficulty_label', 'context_size_label']:
        data = load_error_by_attempt(zoom_column)
        data = data[data['attempt'] < length]
        for i, (zoom_column_value, zoom_data) in enumerate(data.groupby(zoom_column)):
            plt.subplot(3, 1, i + 1) if vertical else plt.subplot(1, 3, i + 1)
            for j, (setup, setup_data) in enumerate(zoom_data.groupby('experiment_setup_name')):
                plt.plot(setup_data['attempt'], setup_data['value'], label=setup, color=output.palette()[j])
                if with_confidence:
                    plt.fill_between(
                        setup_data['attempt'],
                        setup_data['confidence_min'],
                        setup_data['confidence_max'],
                        color=output.palette()[j], alpha=0.35
                    )
            plt.title(zoom_column_value)
            plt.ylim(0, 70)
            if vertical or i == 0:
                plt.ylabel('Error rate')
            if not vertical or i == 2:
                plt.xlabel('Attempt (non reference)')
            if i == 1:
                plt.legend(loc=1)
        output.savefig('error_by_attempt_zoom_{}'.format(zoom_column))
Ejemplo n.º 5
0
def plot_number_of_options_by_attempt(length):
    data = load_options_by_attempt()
    data['value'] = data['value'].apply(lambda x: x * 100)
    data = data[(data['attempt'] < length)]
    max_options = data['options'][data['options'] != 0].max()
    data['options'] = data['options'].apply(lambda x: max_options + 1 if x == 0 else x)
    cols = len(data['experiment_setup_name'].unique())
    gs = gridspec.GridSpec(1, cols, width_ratios=[3.5] * (cols - 1) + [4])
    rcParams['figure.figsize'] = cols * 2, int(5 * length / 50)
    rcParams['axes.linewidth'] = 1
    for j, (setup, setup_data) in enumerate(data.groupby('experiment_setup_name')):
        for opt in range(2, max_options + 1):
            if opt not in setup_data['options'].unique():
                for attempt in range(0, int(length)):
                    setup_data = setup_data.append(pandas.DataFrame([{'attempt': attempt, 'options': opt, 'value': 0}]))
        plt.subplot(gs[j])
        to_plot = setup_data.pivot_table(columns='options', index='attempt', values='value', dropna=False, fill_value=0)
        plt.title(setup)
        sns.heatmap(to_plot, annot=False, cbar=(j == cols - 1), linewidths=0.1, cbar_kws={'format': '%.0f%%'})
        plt.xticks(plt.xticks()[0], [lab.get_text() if int(lab.get_text()) <= max_options else 'O' for lab in plt.xticks()[1]])
        if j != 0:
            plt.gca().axes.get_yaxis().set_ticks([])
            plt.ylabel('')
        else:
            pos = plt.yticks()[0]
            lab = plt.yticks()[1]
            plt.yticks([pos[0], pos[-1]], [int(lab[0].get_text()) + 1, int(lab[-1].get_text()) + 1])
    output.savefig('options_by_attempt')
Ejemplo n.º 6
0
def plot_number_of_user_ratings_per_context():
    nums = load_ratings_with_contexts().groupby(['user', 'context_name', 'term_type']).apply(len).reset_index().rename(columns={0: 'num'}).groupby('num').apply(len).reset_index().rename(columns={0: 'count'})
    nums = nums.head(n=20)
    sns.barplot(x='num', y='count', data=nums, color=output.palette()[0])
    plt.ylabel('Number of users')
    plt.xlabel('Number of ratings per context')
    output.savefig('number_of_ratings')
Ejemplo n.º 7
0
def execute(answers=60, time=600, vertical=False, with_confidence=False):
    for zoom_column in ['context_size_label', 'context_difficulty_label']:
        raw_data = load_survival_curve_answers(answers, zoom_column).append(load_survival_curve_time(time, zoom_column))
        for variable in ['survival_time', 'survival_answers']:
            data = raw_data[raw_data['variable'] == variable]
            plot_global_survival_curve(data[data['zoom_column'] == zoom_column], vertical=vertical, with_confidence=with_confidence)
            output.savefig('{}_zoom_{}'.format(variable, zoom_column))
def sobel(img):
    # Change datatype to float, to match `Sobel` method.
    lenna = skimage.img_as_float(img)

    # first image
    sigmas = [1, 2, 4, 8]

    fig, axs = plt.subplots(len(sigmas), 3, tight_layout=True)

    prev_img_gradient = None

    for i, sigma in enumerate(sigmas):
        lenna_blurred = cv2.GaussianBlur(
            lenna,
            (0, 0),
            sigmaX=sigma,
            sigmaY=sigma
        )
        lenna_gradients = get_sobel(lenna_blurred)

        axs[i, 0].imshow(lenna, cmap='gray')
        axs[i, 0].set_title("Original")

        axs[i, 1].imshow(lenna_blurred, cmap='gray')
        axs[i, 1].set_title("Blur - sigma: %s" % (sigma))

        axs[i, 2].imshow(lenna_gradients, cmap='gray')
        axs[i, 2].set_title("Gradient - default kernel")

    savefig(fig, "sobel.png")
Ejemplo n.º 9
0
def laplace(img):
    lenna = skimage.img_as_float(img)
    ksize = 5  # for the internal Sobel operative.
    sigmas = [1, 2, 4, 8]

    fig, axs = plt.subplots(len(sigmas) + 1, 2, tight_layout=True)

    # original
    lenna_laplace = cv2.Laplacian(lenna, ksize=ksize, ddepth=-1)

    axs[0, 0].imshow(lenna, cmap='gray')
    axs[0, 0].set_title("Original")

    axs[0, 1].imshow(lenna_laplace, cmap='gray')
    axs[0, 1].set_title("Laplace")

    for i, sigma in enumerate(sigmas):
        lenna_blurred = cv2.GaussianBlur(lenna, (0, 0),
                                         sigmaX=sigma,
                                         sigmaY=sigma)
        lenna_laplace = cv2.Laplacian(lenna_blurred, ksize=ksize, ddepth=-1)

        axs[i + 1, 0].imshow(lenna_blurred, cmap='gray')
        axs[i + 1, 0].set_title("Blur - sigma: %s" % (sigma))

        axs[i + 1, 1].imshow(lenna_laplace, cmap='gray')
        axs[i + 1, 1].set_title("Laplace")

    savefig(fig, "laplace.png")
Ejemplo n.º 10
0
def execute(title=None):
    answers = load_answers()
    print('Number of answers:', len(answers))
    print('Running from:', answers['time'].min())
    print('Running to:', answers['time'].max())
    plot_summary(title=title)
    output.savefig('abexp_summary', tight_layout=False)
Ejemplo n.º 11
0
def execute():
    data = chapter_part_survival(100)
    data = data[data['size'] > 10000]
    g = sns.FacetGrid(data, col="chapter_part", col_wrap=4, margin_titles=True, ylim=(0, 1))
    g.map(sns.pointplot, "i", "value", markers='')
    g.set(xticks=[1, 50, 100])
    output.savefig('filter')
Ejemplo n.º 12
0
def plot_answer_frequency_all(wrong_only=True, contexts=20, show_names=False, normalize=True, top=5):
    plot_cols = 4 if contexts >= 20 else 2
    plot_rows = math.ceil(contexts / plot_cols)
    context_answers = get_context_answers()['count'].to_dict()
    data_all = prepare_answer_frequency_all()
    plot_contexts = sorted(data_all['group_name'].unique(), key=lambda c: -context_answers[c])[:contexts]
    data_all = data_all[data_all['group_name'].isin(plot_contexts)]
    if wrong_only:
        data_all = data_all[data_all['term_name_asked'] != data_all['term_name_answered']]
    if normalize:
        def _normalize(group):
            group['answer_frequency'] = group['answer_frequency'] / group['answer_frequency'].sum()
            return group
        data_all = data_all.groupby(['group_name', 'term_name_asked']).apply(_normalize)
    rcParams['figure.figsize'] = 7.5 * plot_cols, 5 * plot_rows
    for i, (group_name, data) in enumerate(data_all.groupby('group_name')):
        plt.subplot(plot_rows, plot_cols, i + 1)
        to_plot = defaultdict(list)
        for term, term_data in data.groupby('term_name_asked'):
            to_plot[term] = list(term_data['answer_frequency'].head(top).cumsum().sort_values(ascending=False, inplace=False))
        terms, terms_data = zip(*sorted(to_plot.items(), key=lambda x: x[1][-1], reverse=True))
        plt.title(group_name[:30])
        for i in range(top):
            sns.barplot(list(range(len(terms))), list(map(lambda x: ([0] * (top - len(x)) + x)[i], terms_data)), color=output.palette()[i])
        plt.xticks(plt.xticks()[0], terms, rotation=90)
    output.savefig(filename='answer_frequencies_all')
Ejemplo n.º 13
0
def plot_difficulty_zoom():

    def _swap(data):
        if len(data['experiment_setup_name'].unique()) == 2:
            first = data[data['experiment_setup_name'] == data['experiment_setup_name'].unique()[0]].sort_values('attempt').set_index('attempt')
            second = data[data['experiment_setup_name'] == data['experiment_setup_name'].unique()[1]].sort_values('attempt').set_index('attempt')
            if first.ix[5]['value'] > second.ix[10]['value']:
                first, second = second, first
            found = first[(first['value'] > second['value'])].reset_index()[['attempt', 'value']]
            found = found[found['attempt'] > 5]
            if len(found) == 0:
                return
            found = found.values[0]
            plt.plot(found[0] + 1, 100 * found[1], '.', markersize=20, color='black')
            plt.axvline(x=found[0] + 1, ymin=0, ymax=found[1], linewidth=2, color='black')
            plt.text(found[0] + 2, 5 + 100 * found[1], 'swap')

    answers = load_survival_curve_answers_zoom(100, 'context_difficulty_label')
    rcParams['figure.figsize'] = 7.5, 8

    plt.subplot(211)
    plt.title('Top 25% easiest contexts')
    easy_data = answers[answers['zoom_column_value'] =='too easy']
    plot_survival_curve_zoom(easy_data, with_confidence=True, legend=True, colorshift=3)
    _swap(easy_data)
    plt.ylabel('Proportion of learners')

    plt.subplot(212)
    plt.title('Top 25% most difficult contexts')
    difficulty_data = answers[answers['zoom_column_value'] =='difficult']
    plot_survival_curve_zoom(difficulty_data, with_confidence=True, legend=False, colorshift=3)
    plt.ylabel('Proportion of learners')
    plt.xlabel('Attempts')

    output.savefig('abexp_survival_zoom_difficulty')
Ejemplo n.º 14
0
def plot_all_learning_curves(length, user_length, context_answer_limit, bootstrap_samples):
    data = learning_curve(length, user_length, context_answer_limit, bootstrap_samples)
    data.sort_values(by=['experiment_setup_name', 'ratings_group'], inplace=True)
    rcParams['figure.figsize'] = 14, 8
    for curve_type in ['fit', 'raw']:
        for i, (ratings_group, ratings_data) in enumerate(data[data['variable'] == curve_type].groupby('ratings_group')):
            plt.subplot(2, 2, i + 1)
            plt.title(ratings_group)
            plot_learning_curve(ratings_data, with_confidence=True)
            plt.ylim(0, 70)
        output.savefig('learning_by_ratings_{}'.format(curve_type))
    rcParams['figure.figsize'] = 7.5, 4
    for i, (setup_name, to_plot) in enumerate(data[data['variable'] == 'slope'].groupby('experiment_setup_name')):
        plt.title(ratings_group)
        plt.bar(
            numpy.arange(len(to_plot)) + 0.4 * i,
            to_plot['value'], 0.4,
            label=setup_name,
            color=output.palette()[i],
            yerr=[to_plot['value'] - to_plot['confidence_min'], to_plot['confidence_max'] - to_plot['value']],
            error_kw={'ecolor': 'black'},
        )
        plt.xticks(
            numpy.arange(len(to_plot)) + 0.4,
            to_plot['ratings_group']
        )
    plt.ylim(0.2, plt.ylim()[1])
    plt.yticks(
        numpy.linspace(min(plt.yticks()[0]), max(plt.yticks()[0]), 25),
        [plt.yticks()[0][0]] + [''] * 23 + [plt.yticks()[0][-1]]
    )
    plt.gca().yaxis.grid(True)
    plt.legend(loc=0, frameon=True, ncol=2, fontsize='x-small')
    plt.title('Learning rate')
    output.savefig('learning_by_ratings_slope')
Ejemplo n.º 15
0
def plot_global_learning_slope(length, zoom_column, user_length, with_confidence, bootstrap_samples, vertical):
    rcParams['figure.figsize'] = 15, 8
    data = global_learning_curve(length, zoom_column=zoom_column, user_length=user_length, bootstrap_samples=bootstrap_samples)
    for i, (experiment_setup_name, data) in enumerate(data[data['variable'] == 'slope'].groupby('experiment_setup_name')):
        data = data.sort_values(by=zoom_column)
        plt.bar(
            numpy.arange(len(data)) + i * 0.3,
            data['value'], 0.3,
            color=output.palette()[i],
            label=experiment_setup_name,
            yerr=[data['value'] - data['confidence_min'], data['confidence_max'] - data['value']],
            error_kw={'ecolor': 'black'},
        )
        plt.xticks(
            numpy.arange(len(data)) + 0.3,
            data[zoom_column]
        )
        plt.yticks(
            numpy.linspace(min(plt.yticks()[0]), max(plt.yticks()[0]), 21),
            [plt.yticks()[0][0]] + [''] * 19 + [plt.yticks()[0][-1]]
        )
    plt.legend(frameon=True, loc=0)
    plt.xlabel(zoom_column)
    plt.ylabel('k')
    plt.gca().yaxis.grid(True)
    output.savefig('learning_slope_zoom_{}'.format(zoom_column))
Ejemplo n.º 16
0
def execute():
    nums = list(sorted(load_answers().groupby('item_asked').apply(len).to_dict().values(), key=lambda x: -x))
    plt.plot(nums)
    plt.xlabel('Item (sorted according to the number of answers)')
    plt.ylabel('Number of answers')
    plt.title('Distribution of answers')
    output.savefig('answers_distribition')
Ejemplo n.º 17
0
def plot_judge_projection(projection):
    projection = projection[['judge', 'x', 'y']].groupby(
        ['judge']).mean().reset_index().sort_values(by='judge')
    projection['judge_acronym'] = projection['judge'].apply(
        lambda name: '{}{}'.format(name.split()[0][:3],
                                   name.split()[1][0]))
    rcParams['figure.figsize'] = 10, 10
    for judge, acronym, x, y in projection[[
            'judge', 'judge_acronym', 'x', 'y'
    ]].values:
        plt.scatter(x,
                    y,
                    label='{}: {}'.format(acronym, judge),
                    color='white',
                    linewidth=0)
        plt.scatter(x, y, color='black')
        plt.text(
            x,
            y,
            acronym,
            fontsize='small',
        )
    plt.legend(loc='center left',
               fontsize='xx-small',
               bbox_to_anchor=(0.98, 0.5))
    output.savefig('judge_projection')
Ejemplo n.º 18
0
def plot_all_learning(length, user_length, context_answer_limit, bootstrap_samples):
    data = learning_curve(length, user_length, context_answer_limit, bootstrap_samples)
    data = data[data['variable'] == 'slope']
    data.sort_values(by='experiment_setup_name', inplace=True)
    rows = int(math.ceil(len(data['context'].unique()) / 4))
    rcParams['figure.figsize'] = 20, 4 * rows
    for i, (context, to_plot) in enumerate(data.groupby('context')):
        plt.subplot(rows, 4, i + 1)
        plt.title(context)
        plt.bar(
            numpy.arange(len(to_plot)) + 0.4,
            to_plot['value'], 0.8,
            color=output.palette()[0],
            yerr=[to_plot['value'] - to_plot['confidence_min'], to_plot['confidence_max'] - to_plot['value']],
            error_kw={'ecolor': 'black'},
        )
        plt.xticks(
            numpy.arange(len(to_plot)) + 0.8,
            to_plot['experiment_setup_name']
        )
        ylim = plt.ylim()
        plt.yticks(numpy.linspace(ylim[0], ylim[1], 11), [ylim[0]] + [''] * 9 + [ylim[1]])
        plt.yticks(
            plt.yticks()[0],
            [plt.yticks()[0][0]] + [''] * (len(plt.yticks()[0]) - 2) + [plt.yticks()[0][-1]]
        )
        plt.gca().yaxis.grid(True)
    output.savefig('learning_by_context')
Ejemplo n.º 19
0
def save_first_image(I, name):
    plt.imshow(I[:, :, 0], cmap="gray")
    ax = plt.gca()
    ax.set_axis_off()
    fig = plt.figure(1)
    savefig(fig, "%s_first_image.png" % name)
    remove_previous_plot_windows()
    return
Ejemplo n.º 20
0
def plot_user_success_hist():
    rcParams['figure.figsize'] = 3.75, 2.5
    plt.gca().yaxis.set_ticks([])
    plt.hist(list(get_user_success().values()))
    plt.xlabel('Success rate')
    plt.ylabel('Number of users')
    plt.xticks(plt.xticks()[0], map(lambda x: '{}%'.format(int(x * 100)), plt.xticks()[0]))
    output.savefig(filename='user_success_hist')
Ejemplo n.º 21
0
def plot_attempts_vs_time():
    rcParams['figure.figsize'] = 7.5, 8

    plt.subplot(211)
    plot_survival_curve_answers_orig(100, True, legend=True)

    plt.subplot(212)
    plot_survival_curve_time_orig(600, True)
    output.savefig('abexp_survival_attempt_vs_time')
Ejemplo n.º 22
0
def save_albedo(albedo_for_display, name):
    # save albedo
    plt.imshow(albedo_for_display, cmap="gray")
    ax = plt.gca()
    ax.set_axis_off()
    fig = plt.figure(1)
    savefig(fig, "%s_albedo.png" % name)
    remove_previous_plot_windows()
    return
Ejemplo n.º 23
0
def execute():
    flashcards = load_flashcards()
    contexts = list(list(zip(*sorted(flashcards.groupby('context_id').apply(len).to_dict().items(), key=lambda x: - x[1])))[0])[:20]
    flashcards = flashcards[flashcards['context_id'].isin(contexts)]
    g = sns.FacetGrid(flashcards, col="context_name", col_wrap=2, aspect=2)
    g.map(plt.hist, 'difficulty_prob', bins=numpy.linspace(0, 1, 11)).set_titles('{col_name}').set_xlabels('Prediction')
    for ax in g.axes.flat:
        if len(ax.get_title()) > 40:
            ax.set_title('{} ...'.format(ax.get_title()[:40]))
    output.savefig('difficulty_hist_per_context')
Ejemplo n.º 24
0
def plot_average_difficulty_by_attempt(n=12, length=10):
    rcParams['figure.figsize'] = 15, 10
    data = load_data(n, length)
    data = data.groupby(['context', 'order', 'school']).apply(lambda g: g['value'].mean()).reset_index().rename(columns={0: 'value'})
    g = sns.FacetGrid(data, col="context", col_wrap=4, hue='school', aspect=1.5)
    bp = g.map(plt.plot, 'order', 'value').set_titles('{col_name}')
    for ax in bp.axes:
        ax.yaxis.grid(True)
    g.add_legend()

    output.savefig('average_ratings_by_attempt', tight_layout=False)
Ejemplo n.º 25
0
def execute(n=10):
    context_answers = load_answers().groupby('context_name_asked').apply(len)
    context_answers.sort_values(ascending=False, inplace=True)
    context_answers = context_answers.reset_index().rename(columns={0: 'answers'}).head(n=n)
    ticks = numpy.array(range(len(context_answers)))[::-1]
    plt.barh(ticks, context_answers['answers'])
    plt.yticks(ticks + 0.4, context_answers['context_name_asked'])
    plt.xticks([0, max(plt.xticks()[0])], [0, int(max(plt.xticks()[0]))])
    plt.xlabel('Number of answers')
    plt.title('Top {} contexts'.format(n))
    output.savefig('answers_per_context')
Ejemplo n.º 26
0
def plot_distractors(number_of_distractors=10):
    rcParams['figure.figsize'] = 7.5, 4
    data = load_distractors_usage().sort_values(by=['confusing_rank'])
    data = data[data['confusing_rank'] < 10]
    data['value'] *= 100
    data['confusing_rank'] += 1
    sns.barplot(x='confusing_rank', y='value', hue='Condition', data=data.rename(columns={'experiment_setup_name': 'Condition'}), ci=None)
    plt.ylabel('Average usage (%)')
    plt.legend(title=None)
    plt.xlabel('Top {} most competitive distractors'.format(number_of_distractors))
    output.savefig('distractors_usage')
Ejemplo n.º 27
0
def plot_projection(projection):
    projection = projection.sort_values(by='judge')
    xmin, xmax = projection['x'].quantile(0.01), projection['x'].quantile(0.99)
    ymin, ymax = projection['y'].quantile(0.01), projection['y'].quantile(0.99)
    g = sns.FacetGrid(projection,
                      col='judge',
                      col_wrap=5,
                      ylim=(ymin, ymax),
                      xlim=(xmin, xmax))
    g.map(plt.scatter, 'x', 'y', alpha=0.5).set_titles('{col_name}')
    output.savefig('projection')
Ejemplo n.º 28
0
def plot_grid_search(model_name, model_params, grid_search_params, plot_params):
    grid_search_result = grid_search(model_name, model_params, grid_search_params)
    print(grid_search_result)
    if len(plot_params) == 0:
        plot_params = grid_search_params
    if len(plot_params) != 2:
        msg.error("Can't plot grid search result, because there are {} parameters to plot (2 required).".format(len(plot_params)))
        return
    to_plot = grid_search_result.pivot(*plot_params)['metric']
    to_plot.sort_index(ascending=False, inplace=True)
    sns.heatmap(to_plot)
    plt.title(model_name)
    output.savefig('grid_search')
Ejemplo n.º 29
0
def plot_misanswers(term_name, n):
    term_names, confusing_factor = list(zip(*sorted(load_confusing_factor(term_name).items(), reverse=True, key=lambda x: x[1])))
    if n is not None:
        term_names = term_names[:n]
        confusing_factor = confusing_factor[:n]
    plt.plot(confusing_factor, linewidth=2)
    plt.fill_between(list(range(len(confusing_factor))), [0] * len(confusing_factor), confusing_factor, alpha=0.2)
    plt.xlim(0, len(confusing_factor) - 1)
    plt.ylabel('Misanswers (%)')
    plt.title('{}'.format(term_name))
    plt.xticks(list(range(len(confusing_factor))), term_names, rotation=90)
    plt.tight_layout()
    output.savefig('misanswers')
Ejemplo n.º 30
0
def execute(n=100, bins=10):
    data = pandas.merge(load_terms(), load_search_results(n=n), on=['identifier'], how='inner')
    print(data['answers'])
    data.to_csv('./anatomy-terms.csv', index=False)
    data['search_results_bin'] = pandas.to_numeric(pandas.cut(
        data['search_results'],
        bins=numpy.percentile(data['search_results'], numpy.linspace(0, 100, bins + 1)),
        labels=list(range(1, bins + 1))
    ))
    data.plot(kind='scatter', x='search_results', y='difficulty_prob')
    plt.ylim(0, 1)
    # plt.xlim(0, bins + 1)
    output.savefig('importance')
Ejemplo n.º 31
0
def plot_ratings(user_limit, with_confidence):
    plot_ratings_per_success(user_limit, with_confidence=with_confidence)
    output.savefig('ratings_per_success')
    rcParams['figure.figsize'] = 15, 5
    plt.subplot(121)
    plt.title('Out-of-school users')
    plot_ratings_per_success(user_limit, in_school=False, with_confidence=with_confidence)
    plt.ylabel('Ratings (%)')
    plt.xlim(50, 100)
    plt.subplot(122)
    plt.title('In-school users')
    plot_ratings_per_success(user_limit, in_school=True, with_confidence=with_confidence)
    plt.xlim(50, 100)
    output.savefig('ratings_per_success_school_usage')
Ejemplo n.º 32
0
def brier_graphs(model_name):
    model = train(model_name)[0]
    brier = test_brier(model)
    plt.figure()
    plt.plot(brier['detail']['bin_prediction_means'], brier['detail']['bin_correct_means'])
    plt.plot((0, 1), (0, 1))

    bin_count = brier['detail']['bin_count']
    counts = np.array(brier['detail']['bin_counts'])
    bins = (np.arange(bin_count) + 0.5) / bin_count
    plt.bar(bins, counts / max(counts), width=(0.5 / bin_count), alpha=0.5)
    plt.title(model.__class__.__name__)

    output.savefig('brier_detail')
Ejemplo n.º 33
0
def plot_comparison(a, b):
    data = load_data_to_correlate()
    print(data[['experiment_setup_name', 'context', 'quit_score']].sort_values(by='quit_score', ascending=False))
    rcParams['figure.figsize'] = 15, 15
    plt.scatter(data[a], data[b], color=output.palette()[0])
    weired = data['context'].apply(lambda c: 'region_cz' in c)
    filtered = data[weired]
    plt.scatter(filtered[a], filtered[b], color='red')
    for n, ax, bx, c in data[['experiment_setup_name', a, b, 'context']].values:
        plt.annotate('{}:{}'.format(n, c), (ax, bx), fontsize=6)
    plt.xlabel(a)
    plt.ylabel(b)
    print(data[~weired][['experiment_setup_name', a, b, 'context']].corr())
    output.savefig('abexp_compare_{}_{}'.format(a, b))
Ejemplo n.º 34
0
def plot_global_learning_curve(length, zoom_column, user_length, with_confidence, bootstrap_samples, vertical):
    if vertical:
        rcParams['figure.figsize'] = 7.5, 15
    else:
        rcParams['figure.figsize'] = 22.5, 5
    data = global_learning_curve(length, zoom_column=zoom_column, user_length=user_length, bootstrap_samples=bootstrap_samples)
    for i, (zoom_column_key, data) in enumerate(data.groupby(zoom_column)):
        plt.subplot(3, 1, i + 1) if vertical else plt.subplot(1, 3, i + 1)
        plt.title(zoom_column_key)
        plot_learning_curve(data[(data['variable'] == 'fit')], with_confidence=with_confidence)
        if vertical or i == 0:
            plt.ylabel('Error rate')
        # plt.ylim(0, 60)
    output.savefig('learning_curve_zoom_{}'.format(zoom_column))
Ejemplo n.º 35
0
def plot_quit_score_summary():
    rcParams['figure.figsize'] = 9, 6
    LABELS = {
        'slepemapy-ab-random-random': 'Adaptive vs. Random',
        'slepemapy-ab-target-difficulty': 'Question Difficulty',
        'slepemapy-ab-max-options-count': 'Number of Options',
    }
    for i, data_dir in enumerate(['slepemapy-ab-random-random', 'slepemapy-ab-target-difficulty', 'slepemapy-ab-max-options-count']):
        execution_context().add_global_kwargs(data_dir=os.path.join(main.BASE_DIR, data_dir))
        ax = plt.subplot(1, 3, i + 1)
        plot_quit_score()
        if i != 0:
            ax.get_yaxis().get_label().set_visible(False)
        plt.title(LABELS[data_dir])
    output.savefig('abexp_quit_score')
Ejemplo n.º 36
0
def execute(group_name, factor=0.01):
    data_biased = A_A_learning_curve(group_name, factor, 10, user_length=None, context_answer_limit=100)
    data_pure = A_A_learning_curve(group_name, 2, 10, user_length=None, context_answer_limit=100)
    plt.gcf().set_size_inches(15, 10)
    plt.subplot(221)
    plt.title('Fitted learning curve')
    plot_learning_curve(data_biased[(data_biased['variable'] == 'fit') & ~data_biased['balanced']], with_confidence=True)
    plt.subplot(222)
    plt.title('Fitted learning curve with balancing')
    plot_learning_curve(data_biased[(data_biased['variable'] == 'fit') & data_biased['balanced']], with_confidence=True)
    plt.subplot(223)
    plt.title('Fitted learning curve\n(pure A-A experiment)')
    plot_learning_curve(data_pure[(data_pure['variable'] == 'fit') & ~data_pure['balanced']], with_confidence=True)
    plt.subplot(224)
    plt.title('Attrition bias')
    plot_attrition_bias(A_A_attrition_bias(group_name, factor, False), with_confidence=True)
    output.savefig('attrition_bias_fix')
Ejemplo n.º 37
0
def main():
    if not os.path.exists("out"):
        os.mkdir("out")
    """# # Results and Load Images
    # Load images using OpenCV
    # For comparison"""
    for image_left, image_right, true_disp, name in get_datasets():
        print("Handling %s..." % name)
        # Create Pyramids
        left_pyramid = construct_pyramid_cv2(image_left, 3)
        right_pyramid = construct_pyramid_cv2(image_right, 3)

        savefig(create_figure_pyramid(left_pyramid),
                "%s_left_pyramid.png" % name)
        savefig(create_figure_pyramid(right_pyramid),
                "%s_right_pyramid.png" % name)
        save_true_disp(true_disp, name)

        for patch_size in PATCH_SIZES:
            print("Patch size = %s" % patch_size)

            # Find matches using pyramid
            search_window_size = 10
            left_to_right_matches = pyramid_matching(left_pyramid,
                                                     right_pyramid, patch_size,
                                                     search_window_size)
            right_to_left_matches = pyramid_matching(right_pyramid,
                                                     left_pyramid, patch_size,
                                                     search_window_size)

            ltr_disparity = generate_disparity_map(left_to_right_matches,
                                                   patch_size)

            # I am ONLY plotting left to right. This step does NOT remove bad matches
            save_ltr_initial(ltr_disparity, name, patch_size)

            # Using function chechk_for_two_way_matches
            two_way_matches = check_for_two_way_matches(
                left_to_right_matches, right_to_left_matches)
            disparity = generate_disparity_map(two_way_matches, patch_size)

            # Using Local Mean with Two Way Matching
            half_patch = math.floor(patch_size / 2)
            mean_disparity_map = local_mean_disparity_value(
                disparity, patch_size, 25, half_patch)
            save_disparity_final(mean_disparity_map, name, patch_size)
Ejemplo n.º 38
0
def generate_totalimg(img_left, img_right, kp_left, kp_right,
                      kp_left_idx_of_matches_in_kp_right,
                      kp_right_idx_of_matches_in_kp_left, filename):
    extra_size = 50
    x_is_X = False

    # the amount of Y, the amount of X = (600, 800)
    # the amount of rows, the amount of columns
    count = 0
    totalimgY = img_left.shape[0]
    totalimgX = img_left.shape[1] + extra_size + 1 + img_right.shape[1]
    totalimg = np.zeros((totalimgY, totalimgX), dtype=np.float32)

    totalimg[0:img_left.shape[0], 0:img_left.shape[1]] = img_left
    totalimg[0:img_right.shape[0],
             img_left.shape[1] + extra_size:img_left.shape[1] + extra_size +
             img_right.shape[1]] = img_right

    fig, ax = plt.subplots(frameon=False)
    ax.imshow(totalimg, cmap="gray")
    ax.set_axis_off()

    for left_match_index, right_match_index in enumerate(
            kp_left_idx_of_matches_in_kp_right):
        if right_match_index is not None:
            left_match_pt = kp_left[left_match_index]
            right_match_pt = kp_right[right_match_index]
            y1 = left_match_pt[0]
            x1 = left_match_pt[1]  # is the horizontal value
            y2 = right_match_pt[0]
            x2 = right_match_pt[1]
            #             print(x1, y1)
            #             print(x2, y2)
            #             print()
            # x1, x2 - y1, y2 , y is the Y axis (vertical), x is the X axis (horizontal)
            #             cv2.line(totalimg, (int(x1), int(y1)), (int(x2) + extra_size + img_left.shape[1], int(y2)), (200, 50, 50), 1)
            ax.plot([x1, x2 + extra_size + img_left.shape[1]], [y1, y2],
                    linewidth=0.5)

    savefig(fig, filename)

    return
def gaussian_filter(img):
    sigmas = [1, 2, 4, 8]

    fig, axs = plt.subplots(len(sigmas), 3, tight_layout=True)

    for i, sigma in enumerate([1, 2, 4, 8]):
        # Notice that in calling the `GaussianBlur` method I provide a kernel
        # size of `(0,0)`. In this situation the kernel size will be chosen
        # based on the provided $\sigma$ value.
        img_gaussian_blur = cv2.GaussianBlur(img, (0, 0),
                                             sigmaX=sigma,
                                             sigmaY=sigma)

        axs[i, 0].imshow(img, cmap='gray')
        axs[i, 0].set_title("Original")
        axs[i, 1].imshow(img_gaussian_blur, cmap='gray')
        axs[i, 1].set_title("Blur - sigma: %s" % sigma)

        score, diff = compare_ssim(img, img_gaussian_blur, full=True)
        axs[i, 2].imshow(diff, cmap='gray')
        axs[i, 2].set_title("Similarity to original: %s%%" % (score * 100))

    filename = "gaussian.png"
    savefig(fig, filename)
def main():
    print("Running ex. 1...")
    #We first explore the threshold parameter to understand its use and to determine its fixed value.
    #Therefore we will focus on only one image
    image = io.imread('Img001.png')

    #Apply blob_log feature from Skimage http://scikit-image.org/docs/dev/api/skimage.feature.html#skimage.feature.canny
    blobs_log01 = blob_log(image, max_sigma=10, num_sigma=10, threshold=.1) 
    blobs_log02 = blob_log(image, max_sigma=10, num_sigma=10, threshold=.2) #same image, different threshold value
    blobs_log03 = blob_log(image,  max_sigma=10, num_sigma=10, threshold=.3) #same image, different threshold value 

    #Output is x and y coordinates of blob and sigma value the blob was detected at. 

    # Compute radii in the 3rd column, size of the blob is related to the sigma value. 
    blobs_log01[:, 2] = blobs_log01[:, 2] * sqrt(2) #Do this three times becuase of 3 different threshold values
    blobs_log02[:, 2] = blobs_log02[:, 2] * sqrt(2)
    blobs_log03[:, 2] = blobs_log03[:, 2] * sqrt(2)

    blobs_img = [blobs_log01, blobs_log02, blobs_log03] #put manipulated images into a list so that we can run a loop
    blobs_colors = ['orange', 'orange', 'orange'] #choose color to show detected blobs
    img_titles = ['LoG_01 T:0.1', 'LoG_01 T:0.2','LoG_01 T:0.3'] #title displays threshold value 
    all_lists = zip(blobs_img, blobs_colors, img_titles) #python built in function, returns a list of tuples

    #create subplots via Matplotlib
    fig, axes = plt.subplots(1, 3, figsize=(15, 15), sharex=True, sharey=True, subplot_kw={'adjustable': 'box-forced'})
    ax = axes.ravel() #NumPuy function returns a contiguous flattened array

    for index, (blobs, color, titles) in enumerate(all_lists): #Returns an enumerate object https://docs.python.org/3/library/functions.html#enumerate
        ax[index].set_title(titles)
        ax[index].imshow(image, interpolation='nearest', cmap='gray') #creates grayscale image
        for blob in blobs:
            y, x, r = blob #remember: output is x, y coordinates and sigma value (equivalent to blob)
            c = plt.Circle((x, y), r, color=color, linewidth=2, fill=False) #plot the actual circles onto the image!!! 
            ax[index].add_patch(c)
        ax[index].set_axis_off()

    # plt.show()
    savefig(fig, "ex1-fig1.png")

    #now we play with the max sigma and num sigma values until we discover a suitable set

    #still only using image 1
    image01a = io.imread('Img001.png')
    image01b = io.imread('Img001.png')
    image01c = io.imread('Img001.png')

    #Comparing different paramters, threshold is fixed 
    blobs_log01a = blob_log(image01a, max_sigma=20, num_sigma=10, threshold=.1) 
    blobs_log01b = blob_log(image01b, max_sigma=10, num_sigma=10, threshold=.1)
    blobs_log01c = blob_log(image01c, max_sigma=5, num_sigma=5, threshold=.1)    

    #compute radii
    blobs_log01a[:, 2] = blobs_log01a[:, 2] * sqrt(2)
    blobs_log01b[:, 2] = blobs_log01b[:, 2] * sqrt(2)
    blobs_log01c[:, 2] = blobs_log01c[:, 2] * sqrt(2)

    #lists for for loops!
    images_list = [image01a,image01b,image01c]
    blobs_list = [blobs_log01a, blobs_log01b, blobs_log01c]
    blobs_colors = ['yellow', 'yellow', 'yellow']
    img_titles = ['LoG_01 Max_SD:20 Num_SD:10', 'LoG_01 Max_SD:10 Num_SD:10', 'LoG_01 Max_SD:05 Num_SD:05']
    all_lists = zip(blobs_list, blobs_colors, img_titles)

    #subplots
    fig, axes = plt.subplots(1, 3, figsize=(15, 15), sharex=True, sharey=True,subplot_kw={'adjustable': 'box-forced'})
    ax = axes.ravel()

    #Loop through lists to apply blobs and colors 
    for index, (blobs, color, titles) in enumerate(all_lists):
        ax[index].set_title(titles)
        ax[index].imshow(images_list[index], interpolation='nearest', cmap='gray')
        for blob in blobs:
            y, x, r = blob
            c = plt.Circle((x, y), r, color=color, linewidth=2, fill=False)
            ax[index].add_patch(c)
        ax[index].set_axis_off()

    # plt.show()
    savefig(fig, "ex1-fig2.png")


    #Blob detection for Images 02 and 09 - to compare with middle image above (Image 01 ms:10, ns:10, t:0.1)

    image2 = io.imread('Img002.png')
    image9 = io.imread('Img009.png')

    #set to suitable paramters experimented with above 
    blobs_log2 = blob_log(image2, max_sigma=10, num_sigma=10, threshold=.1)
    blobs_log9 = blob_log(image9, max_sigma=10, num_sigma=10, threshold=.1)

    #compute radii
    blobs_log2[:, 2] = blobs_log2[:, 2] * sqrt(2)
    blobs_log9[:, 2] = blobs_log9[:, 2] * sqrt(2)

    #lists for the for loop!
    images_list = [image2,image9]
    blobs_list = [blobs_log2, blobs_log9]
    blob_colors = ['dodgerblue', 'lime']
    img_titles = ['LoG_02 Max_SD:10 Num_SD:10', 'LoG_09 Max_SD:10 Num_SD:10']
    all_lists = zip(blobs_list, blob_colors, img_titles)

    fig, axes = plt.subplots(1, 2, figsize=(15, 15), sharex=True, sharey=True,subplot_kw={'adjustable': 'box-forced'})
    ax = axes.ravel()

    for index, (blobs, color, titles) in enumerate(all_lists):
        ax[index].set_title(titles)
        ax[index].imshow(images_list[index], interpolation='nearest', cmap='gray')
        for blob in blobs:
            y, x, r = blob
            c = plt.Circle((x, y), r, color=color, linewidth=2, fill=False)
            ax[index].add_patch(c)
        ax[index].set_axis_off()
        
    # plt.show()
    savefig(fig, "ex1-fig3.png")

    marshal.dump(blobs_log01b.tolist(), open("img1kp.bin","wb"))
    marshal.dump(blobs_log2.tolist(), open("img2kp.bin","wb"))
    marshal.dump(blobs_log9.tolist(), open("img9kp.bin","wb"))
Ejemplo n.º 41
0
def main(set1, set2, set1name, set2name, dstfolder):

    num_samples = len(set1)
    set1_eval = {
        # pitch related
        'total_used_pitch': np.zeros((num_samples, 1)),
        'total_pitch_class_histogram': np.zeros((num_samples, 12)),
        'pitch_range': np.zeros((num_samples, 1)),
        'avg_pitch_shift': np.zeros((num_samples, 1)),
        'pitch_class_transition_matrix': np.zeros((num_samples, 12, 12)),
        # rhythm
        'total_used_note': np.zeros((num_samples, 1)),
        'avg_IOI': np.zeros((num_samples, 1)),
        'note_length_hist': np.zeros((num_samples, 12)),
        'note_length_transition_matrix': np.zeros((num_samples, 12, 12)),
    }

    feat_acronyms = {
        'total_used_pitch': 'PC',
        'pitch_range': 'PR',
        'avg_pitch_shift': 'PI',
        'total_pitch_class_histogram': 'PCH',
        'pitch_class_transition_matrix': 'PCTM',
        # rhythm
        'total_used_note': 'NC',
        'avg_IOI': 'IOI',
        'note_length_hist': 'NLH',
        'note_length_transition_matrix': 'NLTM'
    }

    display_names_feats = {
        'total_used_pitch': "total used pitch (PC)",
        'total_pitch_class_histogram': "pitch class histogram (PCH)",
        'pitch_range': "pitch range (PR)",
        'avg_pitch_shift': "avg. pitch interval (PI)",
        'pitch_class_transition_matrix':
        'pitch class transition matrix (PCTM)',
        # rhythm
        'total_used_note': 'note count (NC)',
        'avg_IOI': 'avg. inter-onset interval (IOI)',
        'note_length_hist': 'note length histogram (NLH)',
        'note_length_transition_matrix': 'note length transition matrix (NLTM)'
    }

    metrics_list = list(set1_eval.keys())
    table = []
    for metric in [
            'PC', 'PR', 'PI', 'PCH', 'PCTM', 'pitch avg.', 'NC', 'IOI', 'NLH',
            'NLTM', 'rhythm avg.', 'overall avg.'
    ]:
        metric_row = [metric]
        metric_row.extend(['-'] * 10)
        table.append(metric_row)

    table = pd.DataFrame(
        np.array(table, dtype=object),
        columns=[
            "feat.",
            #
            "abs_mean1",  #
            "abs_sd1",  #
            "intra_set_mean1",  #
            "intra_set_sd1",  #
            #
            "abs_mean2",  #
            "abs_sd2",  #
            "intra_set_mean2",  #
            "intra_set_sd2",  #
            #
            "inter_set_KLD",
            "inter_set_OA"
        ]).set_index('feat.')

    for i in range(0, num_samples):
        feature = core.extract_feature(set1[i])

        for metric in metrics_list:
            set1_eval[metric][i] = getattr(core.metrics(), metric)(feature)

    # repeat for second dataset
    set2_eval = {
        # pitch related
        'total_used_pitch': np.zeros((num_samples, 1)),
        'total_pitch_class_histogram': np.zeros((num_samples, 12)),
        'pitch_range': np.zeros((num_samples, 1)),
        'avg_pitch_shift': np.zeros((num_samples, 1)),
        'pitch_class_transition_matrix': np.zeros((num_samples, 12, 12)),
        # rhythm
        'total_used_note': np.zeros((num_samples, 1)),
        'avg_IOI': np.zeros((num_samples, 1)),
        'note_length_hist': np.zeros((num_samples, 12)),
        'note_length_transition_matrix': np.zeros((num_samples, 12, 12)),
    }

    for i in range(0, num_samples):
        feature = core.extract_feature(set2[i])

        for metric in metrics_list:
            set2_eval[metric][i] = getattr(core.metrics(), metric)(feature)

    # statistic analysis: absolute measurement
    absolute_measurement = ""
    for i in range(0, len(metrics_list)):
        if "transition" not in metrics_list[i] \
            and "hist" not in metrics_list[i]:
            absolute_measurement += metrics_list[i] + ':'
            absolute_measurement += "\n" + '------------------------\n'
            absolute_measurement += "\n" + set1name

            abs_mean1 = '%.3f' % np.nanmean(set1_eval[metrics_list[i]],
                                            axis=0)[0]
            abs_sd1 = '%.3f' % np.nanstd(set1_eval[metrics_list[i]], axis=0)[0]

            table.loc[feat_acronyms[metrics_list[i]], 'abs_mean1'] = abs_mean1
            table.loc[feat_acronyms[metrics_list[i]], 'abs_sd1'] = abs_sd1

            absolute_measurement += "\n" + \
                '  mean: %s' % abs_mean1
            absolute_measurement += "\n" + \
                '  std: %s' % abs_sd1

            absolute_measurement += "\n\n" + set2name

            abs_mean2 = '%.3f' % np.nanmean(set2_eval[metrics_list[i]],
                                            axis=0)[0]
            abs_sd2 = '%.3f' % np.nanstd(set2_eval[metrics_list[i]], axis=0)[0]

            absolute_measurement += "\n" + \
                '  mean: %s' % abs_mean2
            absolute_measurement += "\n" + \
                '  std: %s\n\n' % abs_sd2

            table.loc[feat_acronyms[metrics_list[i]], 'abs_mean2'] = abs_mean2
            table.loc[feat_acronyms[metrics_list[i]], 'abs_sd2'] = abs_sd2

    with open(os.path.join(dstfolder, '1absolute_measurement.txt'), 'w') as f:
        f.writelines(absolute_measurement)

    # ## Relative measurement: generalizes the result among features with various dimensions
    #

    # the features are sum- marized to
    # - the intra-set distances
    # - the difference of intra-set and inter-set distances.

    # exhaustive cross-validation for intra-set distances measurement

    loo = LeaveOneOut()
    loo.get_n_splits(np.arange(num_samples))
    set1_intra = np.zeros((num_samples, len(metrics_list), num_samples - 1))
    set2_intra = np.zeros((num_samples, len(metrics_list), num_samples - 1))
    for i in range(len(metrics_list)):
        for train_index, test_index in loo.split(np.arange(num_samples)):
            distances = utils.c_dist(set1_eval[metrics_list[i]][test_index],
                                     set1_eval[metrics_list[i]][train_index])
            distances_mean = np.nanmean(distances)
            distances[np.where(np.isnan(distances))] = distances_mean
            set1_intra[test_index[0]][i] = distances
            del distances

            distances = utils.c_dist(set2_eval[metrics_list[i]][test_index],
                                     set2_eval[metrics_list[i]][train_index])
            distances_mean = np.nanmean(distances)
            distances[np.where(np.isnan(distances))] = distances_mean
            set2_intra[test_index[0]][i] = distances

    # exhaustive cross-validation for inter-set distances measurement
    loo = LeaveOneOut()
    loo.get_n_splits(np.arange(num_samples))
    sets_inter = np.zeros((num_samples, len(metrics_list), num_samples))

    for i in range(len(metrics_list)):
        for train_index, test_index in loo.split(np.arange(num_samples)):
            distances = utils.c_dist(set1_eval[metrics_list[i]][test_index],
                                     set2_eval[metrics_list[i]])
            distances_mean = np.nanmean(distances)
            distances[np.where(np.isnan(distances))] = distances_mean
            sets_inter[test_index[0]][i] = distances

    # visualization of intra-set and inter-set distances
    plot_set1_intra = np.transpose(set1_intra,
                                   (1, 0, 2)).reshape(len(metrics_list), -1)
    plot_set2_intra = np.transpose(set2_intra,
                                   (1, 0, 2)).reshape(len(metrics_list), -1)
    plot_sets_inter = np.transpose(sets_inter,
                                   (1, 0, 2)).reshape(len(metrics_list), -1)
    for i in range(0, len(metrics_list)):
        for s in [plot_set1_intra[i], plot_set2_intra[i], plot_sets_inter[i]]:
            s[np.isnan(s)] = np.nanmean(s)

        sns.kdeplot(plot_set1_intra[i], label='intra %s' % set1name)
        sns.kdeplot(plot_sets_inter[i], label='inter')
        sns.kdeplot(plot_set2_intra[i], label='intra %s' % set2name)

        intra_set_mean1 = '%.3f' % np.nanmean(plot_set1_intra[i], axis=0)
        intra_set_sd1 = '%.3f' % np.nanstd(plot_set1_intra[i], axis=0)

        table.loc[feat_acronyms[metrics_list[i]],
                  'intra_set_mean1'] = intra_set_mean1
        table.loc[feat_acronyms[metrics_list[i]],
                  'intra_set_sd1'] = intra_set_sd1

        intra_set_mean2 = '%.3f' % np.nanmean(plot_set2_intra[i], axis=0)
        intra_set_sd2 = '%.3f' % np.nanstd(plot_set2_intra[i], axis=0)

        table.loc[feat_acronyms[metrics_list[i]],
                  'intra_set_mean2'] = intra_set_mean2
        table.loc[feat_acronyms[metrics_list[i]],
                  'intra_set_sd2'] = intra_set_sd2

        kl = '%.3f' % utils.kl_dist(plot_set1_intra[i], plot_set2_intra[i])
        oa = '%.3f' % utils.overlap_area(plot_set1_intra[i],
                                         plot_set2_intra[i])
        table.loc[feat_acronyms[metrics_list[i]], 'inter_set_KLD'] = kl
        table.loc[feat_acronyms[metrics_list[i]], 'inter_set_OA'] = oa

        plt.title(display_names_feats[metrics_list[i]])
        plt.xlabel('Euclidean distance')
        plt.xlabel('Density')
        output.savefig(plt.gcf(),
                       os.path.join(dstfolder, '3' + metrics_list[i] + '.png'))
        plt.clf()

    # the difference of intra-set and inter-set distances.
    distance_text = ''
    for i in range(0, len(metrics_list)):
        print(metrics_list[i])
        distance_text += metrics_list[i] + ':\n'
        distance_text += '------------------------\n'
        distance_text += "\n" + set1name

        kl = '%.3f' % utils.kl_dist(plot_set1_intra[i], plot_sets_inter[i])
        oa = '%.3f' % utils.overlap_area(plot_set1_intra[i],
                                         plot_sets_inter[i])

        # table.loc[feat_acronyms[metrics_list[i]], 'inter_set_KLD'] = kl
        # table.loc[feat_acronyms[metrics_list[i]], 'inter_set_OA'] = oa

        distance_text += "\n" + '  Kullback-Leibler divergence: %s' % kl
        distance_text += "\n" + '  Overlap area: %s' % oa

        distance_text += "\n" + set2name

        plot_set2_intra_i_mean = np.nanmean(plot_set2_intra[i])
        plot_set2_intra[i][np.where(np.isnan(
            plot_set2_intra[i]))] = plot_set2_intra_i_mean

        kl = '%.3f' % utils.kl_dist(plot_set2_intra[i], plot_sets_inter[i])
        oa = '%.3f' % utils.overlap_area(plot_set2_intra[i],
                                         plot_sets_inter[i])

        distance_text += "\n" + '  Kullback-Leibler divergence: %s' % kl
        distance_text += "\n" + '  Overlap area: %s\n\n' % oa

    with open(os.path.join(dstfolder, '4distance_text.txt'), 'w') as f:
        f.writelines(distance_text)

    # save table
    save_table(table, dstfolder)

    # pitch tm
    mpl.rc('font', family='sans-serif', size=20)

    note_names = [
        "C", 'Db', "D", "Eb", "E", "F", "Gb", "G", "Ab", "A", "Bb", "B"
    ]

    plt.clf()
    plt.figure(figsize=(12, 12))
    sns.heatmap(np.mean(set1_eval['pitch_class_transition_matrix'], axis=0),
                cmap='Blues')
    sns.set(font_scale=1.4)
    plt.xticks([i + .5 for i in range(len(note_names))], note_names)
    plt.yticks([i + .5 for i in range(len(note_names))], note_names)
    # plt.tick_params(axis='both', which='major', labelsize=16)
    plt.title("Pitch transition matrix for %s samples" % set1name)
    plt.gcf().savefig(os.path.join(dstfolder, '5pitch_tm_%s.png' % set1name),
                      bbox_inches='tight')

    plt.clf()
    plt.figure(figsize=(12, 12))
    sns.heatmap(np.mean(set2_eval['pitch_class_transition_matrix'], axis=0),
                cmap='Blues')
    sns.set(font_scale=1.4)
    plt.xticks([i + .5 for i in range(len(note_names))], note_names)
    plt.yticks([i + .5 for i in range(len(note_names))], note_names)
    # plt.tick_params(axis='both', which='major', labelsize=16)
    plt.title("Pitch transition matrix for %s samples" % set2name)
    plt.gcf().savefig(os.path.join(dstfolder, '5pitch_tm_%s.png' % set2name),
                      bbox_inches='tight')
    # nl tm

    note_lens = [
        "$W$",
        "$H$",
        "$Q$",
        "$E$",
        "$S$",
        "$H .$",
        "$Q .$",
        "$E .$",
        "$S .$",
        "$H t$ ",
        "$Q t$",
        "$E t$",
    ]

    plt.clf()
    plt.figure(figsize=(12, 12))
    sns.heatmap(np.mean(set1_eval['note_length_transition_matrix'], axis=0),
                cmap="Reds",
                vmax=7)
    sns.set(font_scale=1.4)
    plt.xticks([i + .5 for i in range(len(note_lens))], note_lens)
    plt.yticks([i + .5 for i in range(len(note_lens))], note_lens)
    plt.tick_params(axis='both', which='major', labelsize=16)
    plt.title("Note length transition matrix for %s samples" % set1name)
    plt.gcf().savefig(os.path.join(dstfolder,
                                   '5notelength_tm_%s.png' % set1name),
                      bbox_inches='tight')

    plt.clf()
    plt.figure(figsize=(12, 12))
    sns.heatmap(np.mean(set2_eval['note_length_transition_matrix'], axis=0),
                cmap="Reds",
                vmax=7)
    sns.set(font_scale=1.4)
    plt.xticks([i + .5 for i in range(len(note_lens))], note_lens)
    plt.yticks([i + .5 for i in range(len(note_lens))], note_lens)
    plt.tick_params(axis='both', which='major', labelsize=16)
    plt.title("Note length transition matrix for %s samples" % set2name)
    plt.gcf().savefig(os.path.join(dstfolder,
                                   '5notelength_tm_%s.png' % set2name),
                      bbox_inches='tight')
    plt.clf()

    ##

    ## total_pitch_class_histogram

    ## note_length_hist

    np.save(os.path.join(dstfolder, 'set1.npy'), set1_eval)
    np.save(os.path.join(dstfolder, 'set2.npy'), set2_eval)

    print('folder : %s' % os.path.abspath(dstfolder))
    return
Ejemplo n.º 42
0
def save_true_disp(true_disp, name):
    fig, ax = plt.subplots()
    ax.imshow(true_disp, cmap="gray")
    ax.set_axis_off()
    savefig(fig, "%s_true_disp.png" % name)
Ejemplo n.º 43
0
def save_ltr_initial(ltr_disparity, name, patch):
    fig, ax = plt.subplots()
    ax.imshow(ltr_disparity, cmap="gray")
    ax.set_axis_off()
    savefig(fig, "%s_patch_%s_initial_disparity.png" % (name, patch))
Ejemplo n.º 44
0
def save_disparity_final(mean_disparity_map, name, patch):
    fig, ax = plt.subplots()
    ax.imshow(mean_disparity_map, cmap="gray")
    ax.set_axis_off()
    savefig(fig, "%s_patch_%s_final_disparity.png" % (name, patch))