Esempio n. 1
0
def plt_sheriffs_counties_month_heatmap(
        save=False,
        file_name='plt_sheriffs_counties_month_heatmap.png',
        file_format='png'):
    # just get the sheriffs
    df = df_sheriffs()

    # months
    months = list(set(df.month_year.to_list()))
    months.sort()

    # counties
    counties = list(set(df[common.SOURCE_COL].to_list()))
    counties.sort()

    matrix = pd.DataFrame(np.zeros(shape=(len(counties), len(months))),
                          columns=months,
                          index=counties)
    for month, month_group in df.groupby(df.month_year):
        for area, area_group in month_group.groupby(common.SOURCE_COL):
            matrix.at[area, month] = 1

    cmap = mpl.colors.ListedColormap(['#ffffff', '#0099ff'])
    norm = mpl.colors.BoundaryNorm([0, 1, 3], cmap.N)
    sns.heatmap(matrix, cmap=cmap, norm=norm, cbar=False)

    # add labels etc.
    set_labels_title('Month', 'Shires', 'Months that sheriffs appeared')

    save_or_show(save, file_name, file_format)
Esempio n. 2
0
def plot_radar(categories, values, title, colour, save=False, file_name='radar.png', file_format='png'):
    """ Provide a radar chart. This borrows heavily from https://python-graph-gallery.com/390-basic-radar-chart/"""

    n = len(categories)

    values += values[:1]

    angles = [x / float(n) * 2 * pi for x in range(n)]
    angles += angles[:1]

    # Initialise the spider plot
    ax = plt.subplot(111, polar=True)

    # Draw one axe per variable + add labels labels yet
    plt.xticks(angles[:-1], categories, fontname="Times New Roman", color='black', size=10)

    # Draw ylabels
    ax.set_rlabel_position(0)
    plt.yticks([10, 20, 30, 40], ["10", "20", "30", "40"], fontname="Times New Roman", color="black",
               size=ANNOTATION_FONT_SIZE)
    plt.ylim(0, 50)

    # Plot data
    ax.plot(angles, values, linewidth=2, linestyle='solid')

    # Fill area
    ax.fill(angles, values, colour, alpha=0.1)

    plt.title(title, fontname="Times New Roman", fontsize=TITLE_FONT_SIZE, fontweight='bold')

    save_or_show(save, file_name, file_format)
Esempio n. 3
0
def plt_sheriffs_counties_terms_no_arrears_heatmap(
        save=False,
        file_name='plt_sheriffs_counties_terms_no_arrears_heatmap.png',
        file_format='png'):
    # just get the sheriffs
    df = df_sheriffs()

    df = df[~df[common.DETAILS_COL].str.contains('arrears')]

    # counties
    counties = list(set(df[common.SOURCE_COL].to_list()))
    counties.sort()

    matrix = pd.DataFrame(np.zeros(shape=(len(counties), len(common.TERMS))),
                          columns=common.TERMS,
                          index=counties)
    for term, term_group in df.groupby(common.TERM_COL):
        for area, area_group in term_group.groupby(common.SOURCE_COL):
            matrix.at[area, term] = 1

    cmap = mpl.colors.ListedColormap(['#ffffff', '#c28cb8'])
    norm = mpl.colors.BoundaryNorm([0, 1, 3], cmap.N)
    sns.heatmap(matrix, cmap=cmap, norm=norm, cbar=False)

    # add labels etc.
    set_labels_title('Terms', 'Shires',
                     'Terms that sheriffs appeared (arrears removed)')

    save_or_show(save, file_name, file_format)
Esempio n. 4
0
def plt_trans_count_week_term_pc_term_total(save=True, title='Number of weekly transactions as a % of the term total',
                                            x_label="Week", y_label="Value of receipts as % of term total",
                                            is_long_title=False, file='receipts_weekly_term_pc.png',
                                            fig_size=PLOT_DIMENSIONS):
    """ Create a line plot for the total number of transactions as a percentage of the total number of transactions
        over the term. Each term is plotted on its own line. """

    # set the style
    sns.set(style=SN_STYLE, font=FONT_NAME)

    # get data and remove 'NOTHING' values
    df = roll.roll_with_entities_df()
    df = filter_out_nothing(df)

    plt.figure(figsize=fig_size)

    for term, term_group in df.groupby(common.TERM_COL, sort=False):
        # total for the term
        total = term_group[common.PENCE_COL].count()
        values = term_group.groupby(common.WEEK_COL)[common.PENCE_COL].count() / total * 100
        sns.lineplot(y=values, x=values.index, label=term, marker='o')

    # plot labels
    set_labels(x_label, y_label)

    # plot title
    title_text(title, is_long_title)

    # show or save the image to file
    save_or_show(save=save, plot_file_name=file)
Esempio n. 5
0
def plt_pc_by_terms(save=True, title='% of the total payments, by term', x_label="Terms", y_label='% of total payments',
                    is_long_title=True, file='terms_total_pc.png', fig_size=PLOT_DIMENSIONS):
    """ A basic bar graph that shows the payments per term as a percentage of the whole year.  """

    # set a plot size
    plt.figure(figsize=fig_size)

    # get the dataset
    terms_df = roll.terms_overview_df()

    # calculate the total income
    total = terms_df['Term total'].sum()

    # work out the %
    term_total_as_pc = terms_df['Term total'] / total * 100

    # plot the data
    ax = sns.barplot(x=term_total_as_pc.index, y=term_total_as_pc)

    # add the £.s.d. to each bar
    for patch, pc in zip(ax.patches, term_total_as_pc):
        ax.text(patch.get_x() + patch.get_width() / 2, patch.get_height(), '{0:.1f}%'.format(pc), ha="center",
                fontname=FONT_NAME, fontsize=ANNOTATION_FONT_SIZE, linespacing=2.0)
    plt.xticks(fontname=FONT_NAME)

    # plot labels
    set_labels(x_label, y_label)

    # plot title
    title_text(title, is_long_title)

    # show or save the image to file
    save_or_show(save=save, plot_file_name=file)
Esempio n. 6
0
def plt_payments_days_swarm(save=True, title='Payments per day', x_label="Day of the week", y_label="Payment",
                            is_long_title=False, file='payments_day_swarm.png', fig_size=PLOT_DIMENSIONS, log=True):
    """ Swarm plot showing the distribution of payments for each day. Each term has a different color."""

    df = roll.roll_with_entities_df()

    # set a plot size
    plt.figure(figsize=fig_size)

    ax = sns.stripplot(x=common.DAY_COL, y=common.PENCE_COL, hue=common.TERM_COL, data=df, jitter=0.3,
                       order=common.DAYS_OF_WEEK)
    if log:
        ax.set_ylim(bottom=1)
        ax.set_yscale('log')

    plt.legend(loc='lower right', shadow=True, fontsize='small')

    # plot labels
    set_labels(x_label, y_label)

    # plot title
    title_text(title, is_long_title)

    # show or save the image to file
    save_or_show(save=save, plot_file_name=file)
Esempio n. 7
0
def plt_business_by_day(save=True, title='Total no. of items of business occurring on each day',
                        x_label="Days of the week", y_label="No. of items of business", is_long_title=False,
                        file='business_by_day.png', fig_size=PLOT_DIMENSIONS, filter_nothing=True):
    """ A plot that shows the total number of business entered on a day. """

    df = roll.roll_with_entities_df()

    # filter out the rows that have no receipts
    if filter_nothing:
        df = filter_out_nothing(df)

    plt.figure(figsize=fig_size)

    # set the style
    sns.set(style=SN_STYLE, font=FONT_NAME)

    # create the plot
    sns.countplot(x=common.DAY_COL, data=df, order=common.DAYS_OF_WEEK)

    # plot labels
    set_labels(x_label, y_label)

    # plot title
    title_text(title, is_long_title)

    # show or save the image to file
    save_or_show(save=save, plot_file_name=file)
Esempio n. 8
0
def plt_total_payments_by_source(save=True, title='Total payments by source',
                                 x_label="Terms", y_label="Source", is_long_title=False,
                                 file='total_payments_source.png', fig_size=PLOT_DIMENSIONS):
    """ Bar plots showing the total amount received from each 'Source'. """

    # set the style
    sns.set(style="darkgrid")

    plt.figure(figsize=fig_size)
    df = roll.payments_overview_df()

    sns.barplot(data=df, x=common.SOURCE_COL, y=common.PENCE_COL)

    # tick range in pence (divisible by 240, which is £1)
    ticks_range = np.arange(0, 480000, 120000)

    # show the labels as £ rather than pennies
    ticks_labels = []
    for x in np.nditer(ticks_range.T):
        ticks_labels.append(money.pence_to_psd(x))
    plt.yticks(ticks_range, ticks_labels)

    plt.xticks(fontsize=ANNOTATION_FONT_SIZE, fontname=FONT_NAME, rotation=90)

    # plot labels
    set_labels(x_label, y_label)

    # plot title
    title_text(title, is_long_title)

    # show or save the image to file
    save_or_show(save=save, plot_file_name=file)
Esempio n. 9
0
def plt_scatter_payments_year(save=True, title='Scatter plot of all payments', x_label="Date",
                              y_label="Payment", is_long_title=False, file='scatter_plot_all_payments.png',
                              fig_size=PLOT_DIMENSIONS):
    """ A scatter plot of payments across the year. """

    # set a plot size
    plt.figure(figsize=fig_size)

    # get the data and remove rows with no payments
    df = roll.roll_with_entities_df()
    df['Date Time'] = df.apply(to_date, axis=1)
    df_plot = df[df[common.PENCE_COL] > 0]

    # create the plot
    plt.scatter(df_plot['Date Time'].tolist(), df_plot['Pence'], s=2)

    # tick range in pence (divisible by 240, which is £1)
    ticks_range = np.arange(0, 84000, 6000)

    # show the labels as £ rather than pennies
    ticks_labels = []
    for x in np.nditer(ticks_range.T):
        ticks_labels.append(money.pence_to_psd(x))
    plt.yticks(ticks_range, ticks_labels)

    plt.xticks(fontsize=ANNOTATION_FONT_SIZE, rotation=90, fontname=FONT_NAME)

    # plot labels
    set_labels(x_label, y_label)

    # plot title
    title_text(title, is_long_title)

    # show or save the image to file
    save_or_show(save=save, plot_file_name=file)
Esempio n. 10
0
def plt_business_by_term(save=True, title='Total number of items of business, per term', x_label="Terms",
                         y_label="No. of items of business", is_long_title=False, file='business_by_term.png',
                         fig_size=PLOT_DIMENSIONS):
    """ A bar plot that groups by terms and shows number of items of business (profferings) for that term. """

    # set a plot size
    plt.figure(figsize=fig_size)

    # get the totals
    terms_df = roll.terms_overview_df()

    # plot the data
    ax = sns.barplot(x=terms_df.index, y=terms_df['Total entries'])

    for patch, total in zip(ax.patches, terms_df['Total entries']):
        ax.text(patch.get_x() + patch.get_width() / 2, patch.get_height(), total, ha="center",
                fontname=FONT_NAME, fontsize=ANNOTATION_FONT_SIZE, linespacing=2.0)

    plt.xticks(fontname=FONT_NAME)
    plt.yticks(fontname=FONT_NAME)

    # plot labels
    set_labels(x_label, y_label)

    # plot title
    title_text(title, is_long_title)

    # show or save the image to file
    save_or_show(save=save, plot_file_name=file)
Esempio n. 11
0
def plt_keyword_frequency(title='{} most frequent keywords',
                          x_label='Keyword',
                          y_label='Frequency',
                          limit=20,
                          save=False,
                          is_long_title=False,
                          file='plt_keyword_frequency.png',
                          fig_size=PLOT_DIMENSIONS):
    """" Display a plot that shows the most frequent keywords. The number of words is determined by the limit. """

    # set the style
    sns.set(style=SN_STYLE, font=FONT_NAME)

    plt.figure(figsize=fig_size)

    title = title.format(limit)

    # get data
    df = roll.roll_with_entities_df()

    # filter out rows without keywords
    df_keywords = df[df[common.KEYWORDS_COL].notnull()]

    # get keywords as a list
    all_keywords = df_keywords[common.KEYWORDS_COL].to_list()

    # remove the semicolon delimiter
    kw = []
    for k in all_keywords:
        for i in k.split(';'):
            kw.append(i)

    kw_freq = FreqDist(kw)

    # create a pandas as df
    kw_df = pd.DataFrame(kw_freq.most_common(limit),
                         columns=['Word', 'Frequency']).set_index('Word')

    sns.set()

    kw_df.plot(kind='bar', legend=None)
    plt.xticks(fontsize=10, fontname=FONT_NAME)
    plt.yticks(fontsize=10, fontname=FONT_NAME)

    # plot labels
    set_labels(x_label, y_label)

    # plot title
    title_text(title, is_long_title)

    # show or save the image to file
    save_or_show(save=save, plot_file_name=file)
Esempio n. 12
0
def plt_total_by_terms(save=True, title='Total payments, per term', x_label="Terms", y_label='Total payments',
                       is_long_title=True, file='terms_total.png', fig_size=PLOT_DIMENSIONS):
    """ A basic bar graph that shows the total payments per term.  """

    # set a plot size
    plt.figure(figsize=fig_size)

    # get the totals
    terms_df = roll.terms_overview_df()

    # tick range in pence (divisible by 240, which is £1)
    ticks_range = np.arange(0, 600000, 120000)

    # show the labels as £ rather than pennies
    ticks_labels = []

    for x in np.nditer(ticks_range.T):
        ticks_labels.append(money.pence_to_psd(x))
    plt.yticks(ticks_range, ticks_labels, fontname=FONT_NAME)

    # plot the data
    ax = sns.barplot(x=terms_df.index, y=terms_df['Term total'])

    plt.xticks(fontname=FONT_NAME)

    # add the £.s.d. to each bar
    for patch, pence in zip(ax.patches, terms_df['Term total']):
        ax.text(patch.get_x() + patch.get_width() / 2, patch.get_height(), money.pence_to_psd(pence), ha="center",
                fontname=FONT_NAME, fontsize=ANNOTATION_FONT_SIZE, linespacing=2.0)

    # plot labels
    set_labels(x_label, y_label)

    # plot title
    title_text(title, is_long_title)

    # show or save the image to file
    save_or_show(save=save, plot_file_name=file)
Esempio n. 13
0
def plt_source_term_pc_heat_map(save=True, title='Payments per term by source',
                                 x_label="Total", y_label="Source", is_long_title=False,
                                 file='total_payments_source_heatmap.png', fig_size=(10, 10)):
    """ General heat map plot that shows the source of payments and the total values. """

    df = roll.source_term_payments_pc_matrix_df()

    # df_psd = df.applymap(money.pence_to_psd)

    plt.figure(figsize=fig_size)
    plt.rcParams['font.family'] = FONT_NAME
    with sns.axes_style('white'):
        hm = sns.heatmap(df, annot=True, cmap='Oranges', cbar=False, fmt='.1f', annot_kws={'size': ANNOTATION_FONT_SIZE})
        hm.set_yticklabels(hm.get_yticklabels(), rotation=0)

    # plot labels
    set_labels(x_label, y_label)

    # plot title
    title_text(title, is_long_title)

    # show or save the image to file
    save_or_show(save=save, plot_file_name=file)
Esempio n. 14
0
def plt_days_by_term(save=True, title='Number of days, per term', x_label="Terms", y_label="Days", is_long_title=False,
                     file='terms_days.png', fig_size=PLOT_DIMENSIONS):
    """ A bar plot that groups by term with information about days. """

    # set a plot size
    plt.figure(figsize=fig_size)

    # get the totals
    terms_df = roll.terms_overview_df()

    # plot the data
    terms_df[['Total Days', 'Days with payments', 'Days with no payments']].plot(kind='bar', colormap='tab20b')

    plt.xticks(fontname=FONT_NAME)
    plt.yticks(fontname=FONT_NAME)

    # plot labels
    set_labels(x_label, y_label)

    # plot title
    title_text(title, is_long_title)

    # show or save the image to file
    save_or_show(save=save, plot_file_name=file)