Example #1
0
    def plot_scores(self):
        """
        plots modellers scores
        """

        fig1 = px.scatter(self.models, x='dope', y='molpdf', hover_name='model',
                          trendline='ols', facet_col='template',
                          color='variant', category_orders={'template': self.template_order,
                                                            'variant': self.variant_order},
                          range_y=[8000, 16000],
                          color_discrete_sequence=px.colors.qualitative.G10,
                          height=500, width=1000, template='presentation',)
        fig1.update_traces(opacity=0.66)
        fig1.show()
        fig1.write_html('scatter_multiModel_score.html')

        fig2 = px.box(self.models, x='template', y='molpdf', hover_name='model',
                      points='all',
                      color='variant', category_orders={'template': self.template_order, 'variant': self.variant_order},
                      range_y=[8000, 16000],
                      color_discrete_sequence=px.colors.qualitative.G10,

                      height=500, width=1000, template='presentation')

        fig2.show()
        fig2.write_html('boxMol_multiModel_score.html')

        fig3 = px.box(self.models, x='template', y='dope', hover_name='model',
                      points='all',
                      color='variant', category_orders={'template': self.template_order, 'variant': self.variant_order},
                      range_y=[-237000, -227000],
                      color_discrete_sequence=px.colors.qualitative.G10,
                      height=500, width=1000, template='presentation')
        fig3.show()
        fig3.write_html('boxDop_multiModel_score.html')
Example #2
0
def update_output(club1, club2):
    return (px.box(
        df[(df['Club'] == club1) | (df['Club'] == club2)],
        x="Club",
        y="Height",
        color="Club",
        points="all",
        color_discrete_sequence=['#2BB3B0', '#B277A7']).update_layout(
            title={
                'text': "Height Distribution",
                'y': 0.95,
                'x': 0.5,
                'xanchor': 'center',
                'yanchor': 'top'
            }),
            px.box(
                df[(df['Club'] == club1) | (df['Club'] == club2)],
                x="Club",
                y="Weight",
                color="Club",
                points="all",
                color_discrete_sequence=['#2BB3B0', '#B277A7']).update_layout(
                    title={
                        'text': "Weight Distribution",
                        'y': 0.95,
                        'x': 0.5,
                        'xanchor': 'center',
                        'yanchor': 'top'
                    }))
Example #3
0
def corr_notes(value13, value14):

        if value13 == "general":
            fig4 = px.box(x=final_note[final_note["MPG_REAL"] > value14]["AGE"],
                          y=final_note[final_note["MPG_REAL"] > value14]["NOTE_m_sc"])

            fig4.update_layout(title=dict(text = 'Evolution of Players notes', x=0.5),
                               xaxis_title='Age',
                               yaxis_title='Notes')

            return html.Div([
                html.Br(),
                dcc.Graph(id='g4', figure=fig4)
            ])

        else:
            fig5 = px.box(x=final_note[(final_note["POSITION"] == value13) & (final_note["MPG_REAL"] > value14)]["AGE"],
                          y=final_note[(final_note["POSITION"] == value13) & (final_note["MPG_REAL"] > value14)]["NOTE_m_sc"])

            fig5.update_layout(title=dict(text = 'Evolution of Players notes', x=0.5),
                               xaxis_title='Age',
                               yaxis_title='Notes')

            return html.Div([
                html.Br(),
                dcc.Graph(id='g5', figure=fig5)
            ])
Example #4
0
def load_plot4(df, mintime, maxtime, sectorno):
    df = df.sort_values(['N'], ascending=[1])
    color_dict = dict(zip(df.NAME, df.COLORCODE))
    optiontyres = st.radio("By Tyre Compound or Overall?",
                           ("Overall", "Tyre Compound"))
    if optiontyres == "Overall":
        fig = px.box(df,
                     x="NAME",
                     y=sectorno,
                     color="NAME",
                     width=1200,
                     height=600,
                     color_discrete_map=color_dict)
        fig.update_xaxes(title_text='Name')
    if optiontyres == "Tyre Compound":
        fig = px.box(df,
                     x="TYRECOMPOUND",
                     y=sectorno,
                     color="NAME",
                     width=1200,
                     height=600,
                     color_discrete_map=color_dict)
        fig.update_xaxes(title_text='Tyre Compounds')
    fig.update_layout(plot_bgcolor='#eeeeee', legend_bgcolor='#eeeeee')
    fig.update_yaxes(range=[mintime, maxtime], title_text='Total sector time')

    st.plotly_chart(fig)
Example #5
0
def boxplot_describe(df_plot,
                     col_x="weekday",
                     col_y="speed",
                     n_variables=True):
    """
    Plot a boxplot and returns a describe of the variable.
    Args: a dataframe, a numerical column (col_y), a factor column (col_x)
    and a n_variables argument.

    Returns: If n_variables is set as true, several boxplots are depicted according to the values
    of col_x and a dataframe with the describe of the col_y column is given grouped by the col_x values
    If False, only a boxplot is plotted with the numerical values

    """

    plt.figure(figsize=(15, 8))

    if n_variables:
        if len(df_plot[col_x].value_counts().index) > 4:
            # df = px.data.tips()
            fig = px.box(df_plot, x=col_x, y=col_y)
        else:

            fig = px.box(df_plot, x=col_x, y=col_y, color=col_x)

        fig.show()

        # ax = sns.boxplot(x=col_x, y=col_y,data=df_plot)

        return df_plot[[col_y, col_x]].groupby(col_x).describe().T
    else:
        fig = px.box(df_plot, y=col_y)
        fig.show()
Example #6
0
def update_wykres_1(marka, model, lata, marka2, model2):

    dff = df.copy()

    if marka is not None:
        war1 = dff['Marka pojazdu'] == marka

        if model is not None:
            war_1m = dff['Model pojazdu'] == model
            war1 = war1 & war_1m

    if marka2 is not None:
        war2 = dff['Marka pojazdu'] == marka2

        if model2 is not None:
            war_2m = dff['Model pojazdu'] == model2
            war2 = war2 & war_2m

    if marka is not None and marka2 is not None:
        dff = dff[war1 | war2]
    elif marka is None and marka2 is not None:
        dff = dff[war2]
    elif marka is not None and marka2 is None:
        dff = dff[war1]

    rok_od = lata[0]
    rok_do = lata[1]

    dff = dff[dff['Rok produkcji'] > rok_od]
    dff = dff[dff['Rok produkcji'] < rok_do]

    if dff['Marka pojazdu'].nunique() == 2:
        fig = px.box(dff,
                     x='Rok produkcji',
                     y='Cena',
                     height=380,
                     color='Marka pojazdu')

    elif dff['Marka pojazdu'].nunique() == 1 and dff['Model pojazdu'].nunique(
    ) == 2:
        fig = px.box(dff,
                     x='Rok produkcji',
                     y='Cena',
                     height=380,
                     color='Model pojazdu')

    else:
        fig = px.box(dff, x='Rok produkcji', y='Cena', height=380)

    fig.update_layout({
        'plot_bgcolor': 'rgba(0, 0, 0, 0)',
        'paper_bgcolor': 'rgba(0, 0, 0, 0)',
        'font_color': 'rgb(127,175,223)'
    })

    return fig
def graph_boxplot_artists(value, col):
    df_cml_tmp = df_cml.copy()
    if col == 'Auction Number':
        df_cml_tmp['Auction Number'] = df_cml_tmp['Auction Number'].apply(
            lambda x: 'Action ' + str(x))

    if value is None or len(value) == 0:
        #Sold and unsold artwork by artist
        df_sale_record_artist = df_cml_tmp.groupby(col)['Sold'].agg(
            number_of_auctions='count',
            number_of_sold_artworks='sum',
            number_of_unsold_artworks=lambda x: x.count() - x.sum(),
            sale_rate='mean').sort_values('number_of_auctions',
                                          ascending=False)
    else:
        #Sold and unsold artwork by artist
        df_sale_record_artist = df_cml_tmp[df_cml_tmp['1 Author'].isin(
            value)].groupby(col)['Sold'].agg(
                number_of_auctions='count',
                number_of_sold_artworks='sum',
                number_of_unsold_artworks=lambda x: x.count() - x.sum(),
                sale_rate='mean').sort_values('number_of_auctions',
                                              ascending=False)
    if col == 'Auction Number':
        df_sale_record_artist = df_sale_record_artist.sort_index(
            ascending=True)
    #Boxplot of sale prices by artwork
    #First, let's reoder the dataframe so that the same order of the previous graphs (from artist with most auctions to the one with the lowest)
    tmp = df_cml_tmp.copy()
    tmp['Temp'] = pd.CategoricalIndex(tmp[col],
                                      ordered=True,
                                      categories=df_sale_record_artist.index)
    df_cml_sorted = tmp.sort_values('Temp', ascending=True)
    #Create graph
    filt = (df_cml_sorted[col].isin(
        df_sale_record_artist[:50].index)) & (df_cml_sorted['Sold'] == 1)
    if col == 'Dominant Colour Name':
        fig_boxplot_sale_price = px.box(
            df_cml_sorted[filt],
            x='Dominant Colour Name',
            y="Final Price",
            title=
            'Boxplot of final sale price for dominant colours associated with 0+ artworks',
            color='Dominant Colour Name',
            color_discrete_sequence=list(
                df_sale_record_artist[df_sale_record_artist > 0].index),
            points=False,
            boxmode="overlay")
    else:
        fig_boxplot_sale_price = px.box(df_cml_sorted[filt],
                                        x=col,
                                        y="Final Price",
                                        title='Final sale prices box plot')

    return fig_boxplot_sale_price
Example #8
0
def appendBox(clicks, col1, col2, color):
    print(clicks, col1, col2, color)
    global df
    fig = px.box()
    if clicks > 0:
        if color == None:
            fig = px.box(data_frame=df, x=col1, y=col2)
        else:
            fig = px.box(data_frame=df, x=col1, y=col2, color=color)
        return fig
    return fig
Example #9
0
def update_statistics_charts(project, subject, gender, race, age):
    if project == 'all':
        project = data.project.unique()
    else:
        project = [project]

    if not subject:
        return px.box(), px.box()
    elif 'all' in subject:
        subject = data.person.unique()
    else:
        subject = subject

    if gender == 'all':
        gender = data.gender.unique()
    else:
        gender = [gender]

    if not race:
        return px.box(), px.box()
    elif 'all' in race:
        race = data.race.unique()
    else:
        race = race

    mask = ((data.project.isin(project)) & (data.person.isin(subject)) &
            (data.gender.isin(gender)) & (data.race.isin(race)) &
            ((data.age <= age[1]) & (data.age >= age[0])))

    filtered_data = data.loc[mask, :]

    mape_chart_figure = {
        "data": [
            go.Box(x=filtered_data['mape'],
                   y=filtered_data['activity'],
                   orientation='h')
        ],
        "layout":
        go.Layout(title='MAPE vs Activity'),
    }

    da_chart_figure = {
        "data": [
            go.Box(x=filtered_data['da'],
                   y=filtered_data['activity'],
                   orientation='h')
        ],
        "layout":
        go.Layout(title='Data Availability vs Activity'),
    }

    return mape_chart_figure, da_chart_figure
Example #10
0
def _plotly_express(cat_col, color, churn):
    if churn == "Churn":
        fig = px.box(df,
                     x=color,
                     y=cat_col,
                     color=df['Churn_label'].map({
                         'Yes': 'Churn',
                         'No': 'NoChurn'
                     }),
                     height=450,
                     color_discrete_map={
                         "Churn": "steelblue",
                         "NoChurn": "tomato"
                     },
                     category_orders={
                         str(color):
                         df[color].value_counts().sort_index().index
                     })
        fig.update_layout(
            title=f"{cat_col} distribution <br>by {color} & Churn",
            xaxis_title=dict(),
            showlegend=True,
            yaxis_title=f"{cat_col} Distribution",
            title_x=.5,
            legend_title=f'Churn:',
            xaxis={'type': 'category'},
            margin=dict(t=100, l=50))
    else:
        fig = px.box(df,
                     x=color,
                     y=cat_col,
                     height=450,
                     category_orders={
                         str(color):
                         df[color].value_counts().sort_index().index
                     },
                     color_discrete_sequence=['mediumseagreen'])

        fig.update_layout(title=f"Distribution of {cat_col} <br>by {color}",
                          xaxis_title=dict(),
                          showlegend=True,
                          yaxis_title=f"{cat_col} Distribution",
                          title_x=.5,
                          legend_title=f'Churn:',
                          xaxis={'type': 'category'},
                          margin=dict(t=100, l=50))

    fig.update_xaxes(title='')

    return fig
Example #11
0
def update_plots(dropdown_x, dropdown_y, button):

    if (button == 'si'):
        Scatter_fig = px.scatter(data_frame=data,
                                 x=dropdown_x,
                                 y=dropdown_y,
                                 color='species')
        Scatter_fig.update_layout(title='Scatterplot' +
                                  variables_dict[dropdown_x] + 'vs.' +
                                  variables_dict[dropdown_y],
                                  xaxis_title=variables_dict[dropdown_x],
                                  yaxis_title=variables_dict[dropdown_y],
                                  paper_bgcolor="#F8F9F9")

    else:
        Scatter_fig = px.scatter(
            data_frame=data,
            x=dropdown_x,
            y=dropdown_y,
        )
        Scatter_fig.update_layout(title='Scatterplot: ' +
                                  variables_dict[dropdown_x] + ' vs. ' +
                                  variables_dict[dropdown_y],
                                  xaxis_title=variables_dict[dropdown_x],
                                  yaxis_title=variables_dict[dropdown_y],
                                  paper_bgcolor="#F8F9F9")

    Box_fig_x = px.box(data_frame=data,
                       x='species',
                       y=dropdown_x,
                       color='species')
    Box_fig_x.update_layout(showlegend=False,
                            xaxis_title='Species',
                            title='Boxplot: ' + variables_dict[dropdown_x],
                            yaxis_title='cm',
                            paper_bgcolor="#F8F9F9")

    Box_fig_y = px.box(data_frame=data,
                       x='species',
                       y=dropdown_y,
                       color='species')
    Box_fig_y.update_layout(showlegend=False,
                            xaxis_title='Species',
                            title='Boxplot: ' + variables_dict[dropdown_y],
                            yaxis_title='cm',
                            paper_bgcolor="#F8F9F9")

    return [Scatter_fig, Box_fig_x, Box_fig_y]
Example #12
0
def _plotly_express(cat_col, color, churn):
    # tmp = df_train.groupby(color)[cat_col].sum().reset_index()
    # tmp = tmp.sort_values(color)
    if churn == "Churn":
        fig = px.box(df_train, x=color, y=cat_col,  # category_orders={color:df_train[color].value_counts},
                     # legend=False,
                     color=df_train['Churn_label'].map({'Yes': 'Churn', 'No': 'NoChurn'}), height=450,
                     color_discrete_map={"Churn": "seagreen",
                                         "NoChurn": "indianred"},
                     category_orders={
                         str(color): df_train[color].value_counts().sort_index().index}
                     # opacity=.6,# height=400
                     )
        fig.update_layout(
            title=f"{cat_col} dist by <br>{color} & Churn",
            xaxis_title=dict(), showlegend=True,
            yaxis_title=f"{cat_col} Distribution",
            title_x=.5, legend_title=f'Churn:',
            xaxis={'type': 'category'},
            # legend_orientation='h',
            # legend=dict(y=-.06),
            margin=dict(t=100, l=50)
        )
    else:
        fig = px.box(df_train, x=color, y=cat_col,
                     height=450,  # legend=False,
                     category_orders={
                         str(color): df_train[color].value_counts().sort_index().index},
                     color_discrete_sequence=['seagreen']
                     # opacity=.6,# height=400
                     )

        fig.update_layout(
            title=f"Distribution of {cat_col} <br>by {color}",
            xaxis_title=dict(), showlegend=False,
            yaxis_title=f"{cat_col} Distribution",

            # width=560000,
            title_x=.5, legend_title=f'Churn:',
            xaxis={'type': 'category'},
            # legend_orientation='h',
            # legend=dict(y=-.06),
            margin=dict(t=100, l=50)
        )

    fig.update_xaxes(title='')

    return fig
Example #13
0
def boxplot_duration(df_act, y_scale='norm'):
    """
        plot a boxplot of activity durations (mean) max min 
    """
    assert y_scale in ['norm', 'log']

    df = activities_duration_dist(df_act)

    # add column for display of datapoints later
    df[START_TIME] = df_act[START_TIME].dt.strftime('%c')

    if y_scale == 'log':
        df['log minutes'] = np.log(df['minutes'])
        labels = {'minutes': 'log minutes'}
    else:
        labels = {'minutes': 'minutes'}

    fig = px.box(
        df,
        x="activity",
        y=labels['minutes'],
        notched=True,  # used notched shape
        labels=labels,
        points='all',  # display points next to box plot
        title="Activity durations",
        hover_data=[START_TIME, 'minutes']  # add day column to hover data
    )
    return fig
Example #14
0
def update_figure(Val):
    uni = px.bar()
    bi = px.bar()
    if Val == "Death Event":
        uni = px.pie(data_frame=df_HF.groupby([Val]).count().reset_index(),
                     names=Val,
                     values="Count",
                     title=f"Pie chart of {Val}")
    elif type(df_HF[Val][0]) == type("str"):
        uni, bi = px.pie(data_frame=df_HF.groupby([Val]).count().reset_index(),
                         names=Val,
                         values="Count",
                         title=f"Pie chart of {Val}"), px.bar(
                             df_HF.groupby([Val, "Death Event"
                                            ]).count().reset_index(),
                             x=Val,
                             y="Count",
                             color="Death Event",
                             title=f"Bar chart of {Val} vs deth")
    else:
        uni, bi = px.histogram(df_HF, x=Val,
                               title=f"Histogram of {Val}"), px.box(
                                   df_HF,
                                   x="Death Event",
                                   y=Val,
                                   title=f"Box plot of {Val}")
    uni.update_layout(font_color="rgb(255, 255, 255)",
                      paper_bgcolor='rgb(43, 63, 82)',
                      plot_bgcolor='rgb(43, 63, 82)')
    bi.update_layout(font_color="rgb(255, 255, 255)",
                     paper_bgcolor='rgb(43, 63, 82)',
                     plot_bgcolor='rgb(43, 63, 82)')
    return uni, bi
Example #15
0
def box_categorical(df,
                    y,
                    title='Box',
                    out_path=None,
                    max_col=2,
                    layout_kwargs={},
                    to_image=False):

    columns = df.select_dtypes(include='object')
    columns = [x for x in columns if x != y]

    data_groups = []
    for column in columns:
        median_df = df.groupby(column).agg(
            BOX_CATEGORICAL_median=(y, 'median')).reset_index().sort_values(
                by='BOX_CATEGORICAL_median')

        tmp_df = df[[column, y]].copy()
        tmp_df = tmp_df.merge(median_df, on=column, how='left')
        tmp_df = tmp_df.sort_values(by='BOX_CATEGORICAL_median')

        fig = px.box(tmp_df, x=column, y=y)
        data_groups.append(fig['data'])

    datagroups_subplots(data_groups,
                        max_col=max_col,
                        title=title,
                        out_path=out_path,
                        xaxis_titles=columns,
                        yaxis_titles=[
                            y if i % max_col == 0 else None
                            for i, _ in enumerate(columns)
                        ],
                        layout_kwargs=layout_kwargs,
                        to_image=to_image)
def plot_score_boxplot(dfe: DataFrame, per: config.Per) -> Figure:
    """
    Plot score bloxplot.

    Parameters
    ----------
    df
        DataFrame.
    per
        Per what?
    """
    precision = dfe.copy()
    precision["value"] = precision[config.label.precision]
    del precision[config.label.precision]
    precision["score"] = "precision"

    recall = dfe.copy()
    recall["value"] = recall[config.label.recall]
    del recall[config.label.recall]
    recall["score"] = "recall"

    f1score = dfe.copy()
    f1score["value"] = f1score[config.label.f1score]
    del f1score[config.label.f1score]
    f1score["score"] = config.label.f1score

    if per == config.Per.organism:
        fields = ["organism", "domain"]
        hover_data = [
            "organism",
            "domain",
            config.label.auc,
            config.label.hmmer_hits,
        ]
    else:
        hover_data = [
            per.name,
            "clan",
            config.label.auc,
            config.label.hmmer_hits,
        ]
        hover_data = list(set(hover_data))
        fields = [per.name, "clan", config.label.auc, config.label.hmmer_hits]
        fields = list(set(fields))

    dfe = pd.concat([precision, recall, f1score])

    title = f"Score boxplot, {per.name}-wise"
    fig = px.box(
        dfe,
        x="-log10(e-value)",
        color="score",
        y="value",
        title=title,
        hover_name=per.name,
        hover_data=hover_data,
    )
    fig.update_yaxes(range=YRANGE)

    return fig
Example #17
0
    def _plot_sparse(self, data_long_format) -> ReportOutput:
        columns_to_filter = [self.x, "value"]
        for optional_column in [self.color, self.facet_row, self.facet_column]:
            if optional_column is not None:
                columns_to_filter.append(optional_column)

        data_long_format_filtered = data_long_format.loc[data_long_format.value != 0, columns_to_filter]
        columns_to_filter.remove("value")
        total_counts = data_long_format_filtered.groupby(columns_to_filter, as_index=False).agg(
            {"value": 'sum'})
        data_long_format_filtered = data_long_format_filtered.merge(total_counts,
                                                                    on=self.x,
                                                                    how="left",
                                                                    suffixes=('', '_sum')) \
            .fillna(0) \
            .sort_values(by=self.x) \
            .reset_index(drop=True)

        figure = px.box(data_long_format_filtered, x=self.x, y="value", color=self.color,
                        facet_row=self.facet_row, facet_col=self.facet_column,
                        labels={
                            "valuemean": self.y_title,
                            self.x: self.x_title,
                        }, template='plotly_white',
                        color_discrete_sequence=px.colors.diverging.Tealrose)

        file_path = self.result_path / f"{self.result_name}.html"

        figure.write_html(str(file_path))

        return ReportOutput(path=file_path, name="feature boxplots")
Example #18
0
def choosing_the_algorithm(df, x, y, color, max_y):
    fig = px.box(df, x=x, y=y, color=color)
    fig.update_traces(quartilemethod="exclusive")  # or "inclusive", or "linear" by default
    fig.update_layout(yaxis=dict(range=[0, max_y]))
    fig.update_layout(showlegend=False)
    fig.update_layout(margin=dict(l=0, r=0, t=0, b=0), )
    return fig
Example #19
0
def plot10(df):

    df = df.sort_values("age")
    sw = df['age'].sort_values()
    sw_01 = (sw - sw.min()) / (sw.max() - sw.min())
    sw_colors = {n: mpl.colors.rgb2hex(c) for n, c in zip(sw, matplotlib.cm.viridis(sw_01))}

    fig = px.box(df,
                x="age",
                y="pace",
                color="age",
                category_orders={'sepal_width': sw.to_list()[::-1]},
                color_discrete_map=sw_colors,
                labels={
                        'pace': 'Ritmo',
                        'age': 'Idade'
                    }
                )

    fig.update_yaxes(rangemode="tozero")

    fig.update_layout(
        height=750,
        title="<b>Influência da idade no ritmo do atleta<b>",
        title_font_size = 20
    )

    return fig
Example #20
0
def plot_compare_stocks(stocks=None,
                        strategy=bollinger_bands,
                        date_from=None,
                        date_to=None,
                        period=365):
    if stocks is None:
        stocks = []
    list_of_comparisons = []

    for stock in stocks:
        logger.info(f"adding {stock.fullname} to comparison")
        data = stock.data
        strategy_data = strategy(data)

        if date_from:
            strategy_data = strategy_data[date_from:]
            data = data[date_from:]
        if date_to:
            strategy_data = strategy_data[:date_to]
            data = data[:date_to]

        dict_hold = return_on_hold(data, period=period)
        dict_str = return_on_strategy(strategy_data, period=period)

        df_hold = pd.DataFrame(dict_hold.items(),
                               columns=["Date", "Hold"]).set_index("Date")
        df_STR = pd.DataFrame(dict_str.items(),
                              columns=["Date", "Strategy"]).set_index("Date")

        df_merged = df_hold.merge(df_STR,
                                  left_index=True,
                                  right_index=True,
                                  how="left")

        df_fin = pd.melt(
            df_merged.reset_index(),
            id_vars=["Date"],
            value_vars=["Hold", "Strategy"],
            value_name="ROI",
            var_name="Type",
        ).set_index("Date")
        df_fin["Stock"] = f"{stock.fullname}_{stock.interval}"
        list_of_comparisons.append(df_fin)

    df_compare = pd.concat(list_of_comparisons)
    fig = px.box(df_compare, x="Stock", y="ROI", color="Type")
    x_axis = df_compare["Stock"].unique()
    fig.add_trace(
        go.Scatter(x=x_axis,
                   y=[1] * len(x_axis),
                   mode="lines+markers",
                   name="profit line"))
    time_of_completion = datetime.datetime.now().strftime("%Y_%m_%d__%H_%M")
    plot_path = str(
        Path(__file__).parent.parent / "plots" /
        f"plot_comparison_{time_of_completion}.html")
    plot(fig, filename=plot_path, auto_open=True)
    sleep(1)

    return plot_path
Example #21
0
def plot_boxplot(data, col, parameter, title='Boxplot'):
    """
    This function plots the boxplot and prints the necessary details of the given input
    Inputs:
    data = The dataframe on which you would like to plot the boxplot
    col = The column of the dataframe on which you would like to plot the boxplot on
    parameter = What you would like to be printed while printing the necessary details
    title = The title which you would like to give to your boxplot
    Returns:
    Returns a plotly figure
    """
    fig = ff.box(data, y=col, title=title)
    print(
        f'From this boxplot we can see that the median {parameter} of this stock {col} is {np.median(data[col])}'
    )
    if data[col].skew() > 0:
        print(
            f'From this boxplot we can see that there are more positive {parameter} in this stock'
        )
        print(
            f'This can be seen in the boxplot as there are more {parameter} above the median i.e there is more data above the median than below the median'
        )
    elif data[col].skew() < 0:
        print(
            f'From this boxplot we can see that there are more negative {parameter} in this stock'
        )
        print(
            f'This can be seen in the boxplot as there are more {parameter} below the median i.e there is more data below the median than above the median'
        )
    else:
        print(
            f'The {parameter} are not skewed and the positive returns and the negative returns are equally likely'
        )
    print('Please watch out for the outliers')
    return fig
Example #22
0
def visualize_elapsed_time_per_ts(df: pd.DataFrame, relative=False) -> None:
    indexes = ['simulation', 'run', 'timestep', 'substep']

    z_df = df.set_index(indexes)
    first_time = z_df.query('timestep == 1 & substep == 1').reset_index(
        [-1, -2]).run_time
    s = (z_df.run_time - first_time)
    s.name = 'time_since_start'

    z_df = z_df.join(s)
    s = z_df.groupby(indexes[:-1]).time_since_start.max()

    fig_df = s.reset_index()
    if relative is True:
        s = fig_df.groupby(indexes[:-2]).time_since_start.diff()
        s.name = 'psub_duration'
        fig_df = fig_df.join(s)

        y_col = 'psub_duration'
    else:
        y_col = 'time_since_start'

    fig = px.box(fig_df, x='timestep', y=y_col)

    return fig
Example #23
0
    def _plot(self, plotting_data, output_name):
        if plotting_data.empty:
            logging.warning(
                f"Coefficients: empty data subset specified, skipping {output_name} plot..."
            )
        else:

            filename = self.result_path / f"{output_name}.html"

            import plotly.express as px
            figure = px.box(
                plotting_data,
                x="max_seed_overlap",
                y="coefficients",
                labels={
                    "max_seed_overlap": self._x_axis_title,
                    "coefficients": self._y_axis_title
                },
                template='plotly_white',
                color_discrete_sequence=px.colors.diverging.Tealrose)
            # figure.update_layout(title={"text":self.title, "x":0.5, "font": {"size":14}})

            figure.write_html(str(filename))

            return ReportOutput(
                filename,
                f"Overlap between implanted motif seeds and features versus {self._y_axis_title.lower()}"
            )
Example #24
0
def outliers(request):
    global ppd
    print("outliers function")
    if request.method == 'POST':
        print("POST data")
        print(request.POST['outlier'])
        ppd.remove_feature_outlier_data(request.POST['outlier'])

    numeric_data = ppd.get_numeric_data()
    numeric_features_name = ppd.get_numeric_features_name()
    feature_box_plot = {}

    # Calculate Quartile
    # ppd.cal_quartile()

    for i in numeric_features_name:
        fig = px.box(numeric_data.loc[numeric_data[i].notnull(), i],
                     y=i,
                     points='all',
                     width=600)
        feature_box_plot[i] = {
            'box_plot':
            pio.to_html(fig=fig, full_html=False, include_plotlyjs=False),
            'num_outlier':
            ppd.get_feature_num_outlier(i),
            'have_missing':
            ppd.check_feature_missing(i)
        }

    context = {'feature_box_plot': feature_box_plot}
    return render(request, 'data_cleaning_app/outliers.html', context=context)
Example #25
0
def box_plot(data, options):
    xaxis_title = options.pop("xaxis_title") or "Time"
    yaxis_title = options.pop("yaxis_title")
    legend_title = options.pop("legend_title")
    sample = options.pop("sample")
    fig_options = ChainMap(
        options, GRAPHS_DEFAULT_OPTIONS["timeseries"]["box"].get_defaults(
            exclude_non_plotly_options=True))
    fig_options["x"] = "time"
    fig_options["y"] = "value"

    ts_resampled = data.resample(sample).sum()
    ts_resampled.index.name = "time"
    ts_unstacked = ts_resampled.unstack()
    ts_unstacked.name = "value"
    ts_flattened = ts_unstacked.reset_index()

    try:
        fig = px.box(ts_flattened, points="outliers", **fig_options)
    except ValueError as ve:
        flash(f"Timeseries plot error: {ve}", category="error")
        raise PlottingError(f"Timeseries plot error: {ve}")

    fig.update_layout(xaxis_title=xaxis_title,
                      yaxis_title=yaxis_title
                      or add_unit_to_label(fig_options["y"], ts_flattened),
                      legend_title=legend_title,
                      template=GRAPHS_DEFAULT_TEMPLATE,
                      **GRAPHS_DEFAULT_LAYOUT)
    fig.update_xaxes(GRAPHS_DEFAULT_XAXES_LAYOUT)
    fig.update_yaxes(GRAPHS_DEFAULT_YAXES_LAYOUT)
    return fig
Example #26
0
 def _generate_box(self):
     df = self.pp.get_numeric_df(self.settings['data'])
     #df.rename(columns=lambda x: x[:11], inplace=True)
     fig = px.box(df)
     return html.Div([
         html.Div(html.H1(children='Ящик с усами'),
                  style={'text-align': 'center'}),
         html.Div([
             html.Div(dcc.Graph(id='box', figure=fig),
                      style={
                          'width': '78%',
                          'display': 'inline-block',
                          'border-color': 'rgb(220, 220, 220)',
                          'border-style': 'solid',
                          'padding': '5px'
                      }),
             html.Div(dcc.Markdown(children=markdown_text_box),
                      style={
                          'width': '18%',
                          'float': 'right',
                          'display': 'inline-block'
                      })
         ])
     ],
                     style={'margin': '100px'})
Example #27
0
 def _generate_box(self):
     df = self.pp.get_numeric_df(self.settings['data'])
     fig = px.box(df)
     fig.update_xaxes(title='Переменные')
     fig.update_yaxes(title='Значения квантилей')
     return html.Div([
         html.Div(html.H1(children='Ящик с усами'),
                  style={'text-align': 'center'}),
         html.Div([
             html.Div(dcc.Graph(id='box', figure=fig),
                      style={
                          'width': '78%',
                          'display': 'inline-block',
                          'border-color': 'rgb(220, 220, 220)',
                          'border-style': 'solid',
                          'padding': '5px'
                      }),
             html.Div(dcc.Markdown(children=markdown_text_box),
                      style={
                          'width': '18%',
                          'float': 'right',
                          'display': 'inline-block'
                      })
         ])
     ],
                     style={'margin': '100px'})
Example #28
0
def analyze_boxplot(df, col):
    '''
    Analyzes on the basis of boxplot 
    Inputs:
    df - the dataframe you wish to analyze
    col - the column of the yahoo finance dataframe you wish to analyze
    The analysis is done on the basis of number of observations above and below the median
    Also analyzes on the basis of skewness
    '''
    stock_list = df.columns
    for stock in stock_list:
        fig = px.box(df, y=stock, title=f'Boxplot for {col} of {stock}')
        st.plotly_chart(fig)
        st.write(
            f'From this boxplot we can see that the median {col} of {stock} is {np.nanmedian(df[stock])}'
        )
        if df[stock].skew() > 0:
            st.write(
                f'From this boxplot we can see that there are more positive {col} in this stock'
            )
            st.write(
                f'This can be seen in the boxplot as there are more {col} above the median i.e there is more data above the median than below the median'
            )
        elif df[stock].skew() < 0:
            st.write(
                f'From this boxplot we can see that there are more negative {col} in this stock'
            )
            st.write(
                f'This can be seen in the boxplot as there are more {col} below the median i.e there is more data below the median than above the median'
            )
        else:
            st.write('This stock is not skewed at all')
            st.write(
                'This is shown by equal number of observations both above and below the median'
            )
def get_feature_stats(dataframe):
    list_of_charts = []
    columns = dataframe.keys()
    try:
        for col in columns:
            fig = None
            column = dataframe[col]
            if np.issubdtype(column.dtype, np.object) and dataframe.groupby(
                    column).count().shape[0] <= 5:
                fig = px.histogram(dataframe, x=column, color=column)
            elif np.issubdtype(column.dtype, np.number):
                if dataframe.groupby(column).count().shape[0] <= 2:
                    fig = px.histogram(dataframe, x=column, color=column)
                elif column.min() == 0 and\
                        column.quantile(0) == 0 and\
                        column.quantile(0.5) == 0 and dataframe.groupby(column).count().shape[0] <= 5:
                    fig = px.histogram(dataframe, x=column, color=column)
                else:
                    fig = px.box(dataframe, y=column)
            else:
                pass

            graphJSON = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)
            list_of_charts.append([col, graphJSON])
    except:
        pass
    list_of_charts = pd.DataFrame(list_of_charts, columns=["feature", "chart"])
    # print(list_of_charts)
    return list_of_charts
Example #30
0
def display_boxes(clust, vari):
    clusts = ["Food", "Health", "Food & Health"]

    box_cluster = pd.read_csv(path + "box_cluster.csv").rename(
        columns={
            "Food": "Food clusters",
            "Health": "Health clusters",
            "Food & Health": "Food & Health clusters"
        })
    box_cluster["Food clusters"] = box_cluster["Food clusters"] + 1
    box_cluster["Health clusters"] = box_cluster["Health clusters"] + 1
    box_cluster[
        "Food & Health clusters"] = box_cluster["Food & Health clusters"] + 1
    box_cluster = box_cluster.sort_values(clust)
    fig = px.box(box_cluster,
                 x=clust,
                 y=vari,
                 color=clust,
                 color_discrete_sequence=[
                     '#fdca26', '#ed7953', '#bd3786', '#7201a8', '#0d0887'
                 ])
    fig.update_layout(plot_bgcolor='white')
    fig.update_yaxes(showline=True, linewidth=2, linecolor='black')
    fig.update_xaxes(showline=True, linewidth=2, linecolor='black')
    fig.update_layout(legend=dict(yanchor='bottom', y=0))

    return fig