예제 #1
0
def actualPred(y_true, y_pred):
    """Plot actual vs predicted line plots

    Parameters
    ----------
    y_true : Series or vector
        The ground truth
    y_pred : Series or vector
        Predicted values
    """
    datum = pd.DataFrame({
        "date": range(y_true.shape[0]),
        "Actual": y_true,
        "Prediction": y_pred
    })
    datum = pd.melt(datum,
                    id_vars=['date'],
                    value_vars=['Actual', 'Prediction'])

    p = (
        ggplot(datum, aes(x='date')) +
        geom_line(aes(y='value', color='variable'))  # line plot
        + labs(x='date', y='Solar Output') + plotnine.theme_538() +
        plotnine.theme(figure_size=(10, 6)))
    print(p)
def plot_score(df, plot_fn):
    f = (p9.ggplot(df, p9.aes(x="emotion_cat", y="score")) +
         p9.geom_boxplot() + p9.labs(x="Model", y="EMOTION FEEL Score") +
         p9.theme_538() + p9.theme(legend_position="top",
                                   legend_direction="horizontal",
                                   figure_size=(10, 5)) +
         p9.theme(plot_background=p9.element_rect(
             fill=BG_COLOR, color=BG_COLOR, size=1)))
    f.save(plot_fn)
예제 #3
0
    def scatterplot(cls, df):
        Utils.check_and_make_dir("Figures/Scatterplots")
        df = df[(df['index'] != 'Overall') &
                (df['index'] != 'No ROI')]  # Remove No ROI and Overall rows

        df = df.groupby([config.table_cols, config.table_rows]).apply(
            lambda x: x.sort_values(['Mean']))  # Group by parameters and sort
        df = df.reset_index(drop=True)  # Reset index to remove grouping

        scatterplots = ['roi_ordered', 'stat_ordered']
        if config.table_row_order == 'roi':
            scatterplots.remove('stat')
        elif config.table_row_order == 'statorder':
            scatterplots.remove('roi_ordered')

        for scatterplot in scatterplots:
            if config.verbose:
                print(f"Saving {scatterplot} scatterplot!")

            if scatterplot == 'roi_ordered':
                roi_ord = pd.Categorical(df['index'],
                                         categories=df['index'].unique()
                                         )  # Order rows based on first facet
            else:
                roi_ord = pd.Categorical(
                    df.groupby(['MB', 'SENSE'
                                ]).cumcount())  # Order each facet individually

            figure_table = (
                pltn.ggplot(df, pltn.aes(x="Mean", y=roi_ord)) +
                pltn.geom_point(na_rm=True, size=1) + pltn.geom_errorbarh(
                    pltn.aes(xmin="Mean-Conf_Int_95", xmax="Mean+Conf_Int_95"),
                    na_rm=True,
                    height=None) + pltn.xlim(0, None) +
                pltn.scale_y_discrete(labels=[]) +
                pltn.ylab(config.table_y_label) +
                pltn.xlab(config.table_x_label) +
                pltn.facet_grid('{rows}~{cols}'.format(rows=config.table_rows,
                                                       cols=config.table_cols),
                                drop=True,
                                labeller="label_both") +
                pltn.theme_538()  # Set theme
                + pltn.theme(
                    panel_grid_major_y=pltn.themes.element_line(alpha=0),
                    panel_grid_major_x=pltn.themes.element_line(alpha=1),
                    panel_background=pltn.element_rect(fill="gray", alpha=0.1),
                    dpi=config.plot_dpi))

            figure_table.save(
                f"Figures/Scatterplots/{scatterplot}_scatterplot.png",
                height=config.plot_scale,
                width=config.plot_scale * 3,
                verbose=False,
                limitsize=False)
def plot_rank_full(df, plot_fn):
    f = (p9.ggplot(df, p9.aes(x="emotion_cat", y="ratio", fill="factor(rank)"))
         + p9.geom_bar(stat="identity") + p9.facet_wrap("cluster_labels_6") +
         p9.labs(x="Model", y="Proportion (%)", fill="Rank") + p9.theme_538() +
         p9.theme(legend_position="top",
                  legend_direction="horizontal",
                  figure_size=(10, 5)) +
         p9.theme(plot_background=p9.element_rect(
             fill=BG_COLOR, color=BG_COLOR, size=1),
                  axis_text_x=p9.element_text(rotation=45, hjust=1)))
    f.save(plot_fn)
예제 #5
0
def scatter(y_true, y_pred):
    """Plot actual vs predicted scatterplot

    Parameters
    ----------
    y_true : Series or vector
        The ground truth
    y_pred : Series or vector
        Predicted values
    """
    datum = pd.DataFrame({"Actual": y_true, "Prediction": y_pred})

    p = (
        ggplot(datum, aes(x='Actual', y="Prediction", color='"#9B59B6"')) +
        geom_point()  # line plot
        + labs(x='Actual', y='Prediction') + plotnine.theme_538() +
        plotnine.theme(figure_size=(10, 6)))
    print(p)
예제 #6
0
def get_lag_corr(y_actual, y_pred, num_lags):
    """Calculates & plots Lag Correlation

    Parameters
    ----------
    y_actual : Series or vector
        The ground truth
    y_pred : Series or vector
        Predicted values
    num_lags : int
        Lag to consider - range (0, num_lags)
    """
    lags = []
    for c in range(num_lags):
        lagged = pd.Series(y_pred).shift(c)
        lags.append(
            scipy.stats.spearmanr(lagged, y_actual, nan_policy='omit')[0])

    datum = pd.DataFrame({"Lags": range(len(lags)), "Lag-Coefficient": lags})

    p = (ggplot(datum, aes(x='Lags')) + geom_line(aes(y='Lag-Coefficient')) +
         labs(x='Lag', y='Coefficient') + plotnine.theme_538() +
         plotnine.theme(figure_size=(10, 6)))
    print(p)
예제 #7
0
    def test_theme_538(self):
        p = self.g + labs(title='Theme 538') + theme_538()

        assert p + _theme == 'theme_538'
예제 #8
0
    def histogram_make(roi, combined_raw_df, list_rois, config, xlimit,
                       save_function, find_xlim_function):
        if combined_raw_df.empty:
            if config.verbose:
                print(
                    'INFO: Histograms cannot be made for the No ROI category.')
            return
        else:
            thisroi = list_rois[roi]

            figure = (
                pltn.ggplot(combined_raw_df, pltn.aes(x="voxel_value")) +
                pltn.theme_538() + pltn.geom_histogram(
                    binwidth=config.histogram_binwidth,
                    fill=config.histogram_fig_colour,
                    boundary=0,
                    na_rm=True
                )  # Boundary centers the bars, na_rm cancels error from setting an xlimit
                + pltn.facet_grid(
                    f"{config.histogram_fig_y_facet}~{config.histogram_fig_x_facet}",
                    drop=True,
                    labeller="label_both") +
                pltn.labs(x=config.histogram_fig_label_x,
                          y=config.histogram_fig_label_y) +
                pltn.theme(
                    panel_grid_minor_x=pltn.themes.element_line(alpha=0),
                    panel_grid_major_x=pltn.themes.element_line(alpha=1),
                    panel_grid_major_y=pltn.element_line(alpha=0),
                    plot_background=pltn.element_rect(fill="white"),
                    panel_background=pltn.element_rect(fill="gray", alpha=0.1),
                    axis_title_x=pltn.element_text(
                        weight='bold', color='black', size=20),
                    axis_title_y=pltn.element_text(
                        weight='bold', color='black', size=20),
                    strip_text_x=pltn.element_text(
                        weight='bold', size=10, color='black'),
                    strip_text_y=pltn.element_text(
                        weight='bold', size=10, color='black'),
                    axis_text_x=pltn.element_text(size=10, color='black'),
                    axis_text_y=pltn.element_text(size=10, color='black'),
                    dpi=config.plot_dpi))

            # Display mean or median as vertical lines on plot
            if config.histogram_show_mean or config.histogram_show_median:
                figure += pltn.geom_vline(pltn.aes(xintercept="stat_value",
                                                   color="Statistic"),
                                          size=config.histogram_stat_line_size)
                figure += pltn.scale_color_manual(values=[
                    config.colorblind_friendly_plot_colours[3],
                    config.colorblind_friendly_plot_colours[1]
                ])

            # Display legend for mean and median
            if not config.histogram_show_legend:
                figure += pltn.theme(legend_position='none')

            if xlimit:
                # Set y limit of figure (used to make it the same for every barchart)
                figure += pltn.xlim(-1, xlimit)
                thisroi += '_same_xlim'
            else:
                figure += pltn.xlim(-1, None)

            returned_xlim = 0
            if config.use_same_axis_limits in ('Same limits',
                                               'Create both') and xlimit == 0:
                returned_xlim = find_xlim_function(thisroi, figure, 'xaxis')

            if config.use_same_axis_limits == 'Same limits' and xlimit == 0:
                return returned_xlim
            elif xlimit != 0:
                folder = 'Same_xaxis'
            else:
                folder = 'Different_xaxis'

            # Suppress Pandas warning about alignment of non-concatenation axis
            warnings.simplefilter(action='ignore', category=FutureWarning)

            save_function(figure, thisroi, config, folder, 'histogram')

            warnings.simplefilter(action='default', category=FutureWarning)

            return returned_xlim
예제 #9
0
    def barchart_make(roi, df, list_rois, config, ylimit, save_function,
                      find_ylim_function):
        thisroi = list_rois[roi]

        current_df = df.loc[df['index'] == thisroi]

        current_df = current_df.sort_values([config.single_roi_fig_x_axis])
        current_df = current_df.reset_index(
            drop=True)  # Reset index to remove grouping
        current_df[config.single_roi_fig_x_axis] = pd.Categorical(
            current_df[config.single_roi_fig_x_axis],
            categories=current_df[config.single_roi_fig_x_axis].unique())

        figure = (
            pltn.ggplot(
                current_df,
                pltn.aes(x=config.single_roi_fig_x_axis,
                         y='Mean',
                         ymin="Mean-Conf_Int_95",
                         ymax="Mean+Conf_Int_95",
                         fill='factor({colour})'.format(
                             colour=config.single_roi_fig_colour))) +
            pltn.theme_538() + pltn.geom_col(position=pltn.position_dodge(
                preserve='single', width=0.8),
                                             width=0.8,
                                             na_rm=True) +
            pltn.geom_errorbar(size=1,
                               position=pltn.position_dodge(
                                   preserve='single', width=0.8)) +
            pltn.labs(x=config.single_roi_fig_label_x,
                      y=config.single_roi_fig_label_y,
                      fill=config.single_roi_fig_label_fill) +
            pltn.scale_x_discrete(labels=[]) +
            pltn.theme(panel_grid_major_x=pltn.element_line(alpha=0),
                       axis_title_x=pltn.element_text(
                           weight='bold', color='black', size=20),
                       axis_title_y=pltn.element_text(
                           weight='bold', color='black', size=20),
                       axis_text_y=pltn.element_text(size=20, color='black'),
                       legend_title=pltn.element_text(size=20, color='black'),
                       legend_text=pltn.element_text(size=18, color='black'),
                       subplots_adjust={'right': 0.85},
                       legend_position=(0.9, 0.8),
                       dpi=config.plot_dpi) +
            pltn.geom_text(pltn.aes(y=-.7, label=config.single_roi_fig_x_axis),
                           color='black',
                           size=20,
                           va='top') + pltn.scale_fill_manual(
                               values=config.colorblind_friendly_plot_colours))

        if ylimit:
            # Set y limit of figure (used to make it the same for every barchart)
            figure += pltn.ylim(None, ylimit)
            thisroi += '_same_ylim'

        returned_ylim = 0
        if config.use_same_axis_limits in ('Same limits',
                                           'Create both') and ylimit == 0:
            returned_ylim = find_ylim_function(thisroi, figure, 'yaxis')

        if config.use_same_axis_limits == 'Same limits' and ylimit == 0:
            return returned_ylim
        elif ylimit != 0:
            folder = 'Same_yaxis'
        else:
            folder = 'Different_yaxis'

        save_function(figure, thisroi, config, folder, 'barchart')

        return returned_ylim
예제 #10
0
    def test_theme_538(self):
        p = self.g + labs(title='Theme 538') + theme_538()

        assert p + _theme == 'theme_538'
def plot_performance(df, report_year, eval_period):
    """
    Plot metric-specific performance for a set of stocks over time. Reference:
    https://www.buffettsbooks.com/how-to-invest-in-stocks/intermediate-course/lesson-20/

    :param df: DataFrame containing stock tickers and the columns specified below
    :param report_year: Year of most recent financial report
    :param eval_period: Number of years prior to most recent report to be analyzed
    :return: A list of ggplot objects
    :rtype: List
    """

    start_year = report_year - eval_period
    df = df.loc[df['year'] >= start_year]

    df = df[[
        'symbol', 'year', 'eps', 'bookValuePerShare', 'roe', 'currentRatio',
        'debtToEquity'
    ]]

    df['roe'] = df['roe'].apply(lambda x: x * 100.0)

    df = df.rename(
        {
            'eps': 'Earnings per Share',
            'roe': 'Return on Equity',
            'currentRatio': 'Current Ratio',
            'debtToEquity': 'Debt to Equity Ratio',
            'bookValuePerShare': 'Book Value per Share'
        },
        axis='columns')

    df.sort_values(by=['symbol', 'year'], inplace=True, ascending=True)
    df.dropna(inplace=True)

    # Commenting out for now, API no longer returning this col in income-statement response
    label_dict = {
        'Earnings per Share':
        'The EPS shows the company\'s profit per share. This chart '
        'should have a positive slope over time. Stable results '
        'here are extremely important for forecasting future cash '
        'flows. Note: if the company\'s book value has increased '
        'over time, the EPS should demonstrate similar growth.',

        # 'Dividend per Share': 'This chart shows the dividend history of the company. '
        #                       'This should have a flat to positive slope over time. If '
        #                       'you see a drastic drop, it may represent a stock split '
        #                       'for the company. Note: the dividend is taken from a '
        #                       'portion of the EPS, the remainder goes to the book value.',
        'Book Value per Share':
        'The book value represents the liquidation value of the '
        'entire company (per share). It\'s important to see '
        'this number increasing over time. If the company pays a'
        ' high dividend, the book value may grow at a slower '
        'rate. If the company pays no dividend, the book value '
        'should grow with the EPS each year.',
        'Return on Equity':
        'Return on equity is very important because it show the '
        'return that management has received for reinvesting the '
        'profits of the company. If using an intrinsic value '
        'calculator, it\'s very important that this number is flat or'
        ' increasing for accurate results. Find companies with a '
        'consistent ROE above 8%.',
        'Current Ratio':
        'The current ratio helps measure the health of the company in '
        'the short term. As a rule of thumb, the current ratio should be'
        ' above 1.0. A safe current ratio is typically above 1.5. Look '
        'for stability trends within the current ratio to see how the '
        'company manages their short term risk.',
        'Debt to Equity Ratio':
        'The debt to equity ratio helps measure the health of '
        'the company in the long term. As a rule of thumb, the '
        'debt to equity ratio should be lower than 0.5. Look for '
        'stability trends within the debt/equity ratio to see how'
        ' the company manages their long term risk.'
    }

    wrapper = textwrap.TextWrapper(width=120)

    for key, value in label_dict.items():
        label_dict[key] = wrapper.fill(text=value)

    plots = []

    cols = df.columns[2:].tolist()

    for metric in cols:
        p = (ggplot(df, aes('year', metric, color='symbol')) +
             geom_line(size=1, alpha=0.8) + geom_point(size=3, alpha=0.8) +
             labs(title=metric, x='Report Year', y='', color='Ticker') +
             theme_538() + theme(legend_position='left',
                                 plot_title=element_text(weight='bold')) +
             scale_x_continuous(breaks=range(min(df['year']),
                                             max(df['year']) + 1, 1)) +
             scale_y_continuous(
                 breaks=range(min(df[metric].astype(int)),
                              max(round(df[metric]).astype(int)) + 2, 1)) +
             annotate(geom='label',
                      x=statistics.mean((df['year'])),
                      y=max(round(df[metric]).astype(int) + 1),
                      label=label_dict[metric],
                      size=8,
                      label_padding=0.8,
                      fill='#F7F7F7'))

        plots.append(p)

    return plots