Python Histogram 예제들, plotly.graph_objs.Histogram Python 예제들

예제 #1

0

파일 보기

파일: categorical_data_plots.py 프로젝트: mkozel92/ml_playground

def get_target_histogram(targets: np.array,
                         name: str,
                         color_scheme: list = BLUE_SCHEME) -> dict:
    traces = [go.Histogram(x=targets, marker=dict(color=color_scheme[1]))]
    layout = dict(title=name)
    return {'layout': layout, 'data': traces}

예제 #2

0

파일 보기

def gera_histograma_taxas(dg, ds):

    taxa_municipal_máxima_no_alvo = ds.df_M_Alvo_em_foco[
        'Taxa_por_hab_SUS'].max()

    hist, bin_edges = np.histogram(ds.df_M_Alvo_em_foco['Taxa_por_hab_SUS'])
    mean_count = np.mean(
        hist) / 2  # para regular a altura das linhas marcadoras verticais

    taxa_município_em_foco_trace = \
           go.Scatter(x=[ds.taxa_anual_M_Res * ds.denominador_populacional]*2,
                      y=[0, mean_count],
                      name=wrap(f'Taxa em {ds.M_Res_em_foco}', 30),
                      mode='lines',
                      line = dict(width=2),
                      hoverinfo='skip',
                      opacity=0.9)

    taxa_BR_trace = go.Scatter(
        x=[ds.taxa_anual_Br * ds.denominador_populacional] * 2,
        y=[0, mean_count],
        name=wrap(f'Taxa Brasil', 30),
        mode='lines',
        line=dict(width=1),
        hoverinfo='skip',
        opacity=0.9)

    taxa_capital_trace = go.Scatter(
        x=[ds.taxa_anual_Capital * ds.denominador_populacional] * 2,
        y=[0, mean_count],
        name=wrap(f'Taxa {ds.Capital_em_foco}', 30),
        mode='lines',
        line=dict(width=1),
        hoverinfo='skip',
        opacity=0.9)

    taxa_UF_trace = go.Scatter(
        x=[ds.taxa_anual_UF * ds.denominador_populacional] * 2,
        y=[0, mean_count],
        name=wrap(f'Taxa {ds.UF_M_Res_em_foco}', 30),
        mode='lines',
        line=dict(width=1),
        hoverinfo='skip',
        opacity=0.9)

    taxa_limiar_trace = go.Scatter(
        x=[ds.taxa_anual_limiar * ds.denominador_populacional] * 2,
        y=[0, mean_count],
        name=wrap(f'Limiar log-normal', 30),
        mode='lines',
        line=dict(width=1),
        hoverinfo='skip',
        opacity=0.9)

    df_M_Alvo_em_foco_qtd_maior_que_zero = ds.df_M_Alvo_em_foco[
        ds.df_M_Alvo_em_foco['Qtd_EMA'] > 0]

    data = [
        go.Histogram(
            x=df_M_Alvo_em_foco_qtd_maior_que_zero['Taxa_por_hab_SUS'] *
            ds.denominador_populacional,
            name='Número de municípios'), taxa_BR_trace, taxa_UF_trace,
        taxa_capital_trace, taxa_limiar_trace, taxa_município_em_foco_trace
    ]

    layout = go.Layout(
        title={
            "text":
            f'Histograma de taxas de Atendimentos em {dg.ano_em_foco} no alvo <br>{ds.alvo_completo}',
            'x': 0.5,
            'xanchor': 'center'
        },
        font=dict(size=10),
        yaxis={'title': f"Número de municípios"},
        xaxis={
            'title':
            f'Atendimentos / {f_int(ds.denominador_populacional)} Habitantes SUS'
        },
    )

    return go.Figure(data=data, layout=layout)

예제 #3

0

파일 보기

파일: ensemble-for-titanic.py 프로젝트: nischalshrestha/automatic_wat_discovery

print('Youngest Passenger was of:',train.Age.min(),'Years')
print('Average Age on the ship:',train.Age.mean(),'Years')


# In[ ]:


Age_female_survived = train[(train.Sex=='female') & (train.Survived==1)].Age
Age_female_dead = train[(train.Sex=='female') & (train.Survived==0)].Age
Age_male_survived = train[(train.Sex=='male') & (train.Survived==1)].Age
Age_male_dead = train[(train.Sex=='male') & (train.Survived==0)].Age

fig = tools.make_subplots(rows=1, cols=2,subplot_titles=('Female', 'Male'))

survived_female = go.Histogram(
    name='Survived_female',
    x=Age_female_survived
)
fig.append_trace(survived_female, 1, 1)
dead_female = go.Histogram(
    name='Dead_female',
    x=Age_female_dead
)
fig.append_trace(dead_female, 1, 1)
fig.layout.xaxis1.update({'title':'Age'})

survived_male = go.Histogram(
    name='Survived_male',
    x=Age_male_survived
)
dead_male = go.Histogram(
    name='Dead_male',

예제 #4

0

파일 보기

                   showlegend = True,
                   xaxis = dict(title = 'Male Rating',
                                range=[0, 1]),
                   yaxis = dict(title = 'Female Rating',
                                range=[0, 1]))

fig = go.Figure(data = trace, layout = layout)
py.iplot(fig, filename = 'inverse-ratings-by-gender')


###########################################################
#Distribution of women rating movies vs. men
###########################################################

x0 = movie_graph_data['no_of_female_ratings']/movie_graph_data['gender_ratings']
data = go.Histogram(x = x0)

layout = go.Layout(xaxis = dict(title = 'Proportion of Female Raters',
                                range = [0,1]))

fig = go.Figure(data = [data], layout = layout)

py.iplot(fig, filename='proportion-of-female-raters')

###########################################################
#Rating differentials between women and men
###########################################################
test = movie_graph_data[['no_of_female_ratings','gender_ratings','ratings_differential']]
test['proportion'] = (test['no_of_female_ratings']/test['gender_ratings']/5).round(2)*5
test = test.groupby(['proportion'])[['proportion','ratings_differential']].agg(['mean','count'])
test['films'] = ['Number of films: ' + str(test['proportion']['count'].iloc[i]) for i in range(0,len(test))]

예제 #5

0

파일 보기

파일: speech-representation-and-data-exploration.py 프로젝트: ajmal017/data-journey

# Calculate

number_of_recordings = []

for direct in dirs:

    waves = [
        f for f in os.listdir(join(train_audio_path, direct))
        if f.endswith('.wav')
    ]

    number_of_recordings.append(len(waves))

# Plot

data = [go.Histogram(x=dirs, y=number_of_recordings)]

trace = go.Bar(
    x=dirs,
    y=number_of_recordings,
    marker=dict(color=number_of_recordings,
                colorscale='Viridius',
                showscale=True),
)

layout = go.Layout(title='Number of recordings in given label',
                   xaxis=dict(title='Words'),
                   yaxis=dict(title='Number of recordings'))

py.iplot(go.Figure(data=[trace], layout=layout))

예제 #6

0

파일 보기

파일: explore.py 프로젝트: pandyav79/pandas_exploration_util

    def f(viz='X-Y',
          colx='',
          coly='',
          colz='',
          colw=10,
          na=False,
          asc='',
          source=df):
        if (viz == 'X-Y'):
            print(
                '{}: {:0.1%} null ({:d} out of {:d})'\
                    .format(
                        colx
                        , source[colx].isnull().sum() / source.shape[0]
                        , source[colx].isnull().sum()
                        , source.shape[0]
                    )
                )
            print(
                '{}: {:0.1%} null ({:d} out of {:d})'\
                    .format(
                        colz
                        , source[colz].isnull().sum() / source.shape[0]
                        , source[colz].isnull().sum()
                        , source.shape[0]
                    )
                )

            temp = source
            if na:
                temp = temp.fillna(-1000)

            if (coly == 'unaggregrated'):
                grouped = temp.loc[:, [colx, colz]].set_index(colx)
                grouped.columns = pd.MultiIndex.from_product([[colz], [coly]])
            if (coly in ['count', 'sum', 'mean', 'std', 'max', 'min']):
                grouped = temp.groupby(colx).agg({colz: [coly]})
            elif (coly == 'uniques'):
                grouped = temp.groupby(colx).apply(lambda g: pd.Series(
                    g[colz].unique().size,
                    index=pd.MultiIndex.from_product([[colz], [coly]])))
            # print(grouped.head())

            trace = go.Scattergl(x=grouped.index,
                                 y=grouped[colz][coly],
                                 name=coly + ' of ' + colz + ' vs ' + colx,
                                 mode=colw)
            layout = go.Layout(title=coly + ' of ' + colz + ' vs ' + colx,
                               yaxis=dict(title=coly + ' of ' + colz),
                               xaxis=dict(title=colx))

        elif (viz == 'pareto'):
            print(
                '{}: {:0.1%} null ({:d} out of {:d})'\
                    .format(
                        colx
                        , source[colx].isnull().sum() / source.shape[0]
                        , source[colx].isnull().sum()
                        , source.shape[0]
                    )
                )
            print(
                '{}: {:0.1%} null ({:d} out of {:d})'\
                    .format(
                        colz
                        , source[colz].isnull().sum() / source.shape[0]
                        , source[colz].isnull().sum()
                        , source.shape[0]
                    )
                )
            sort_order = (asc == 'Ascending')
            temp = source
            if na:
                temp = temp.fillna(-1000)
            grouped = temp.groupby(colx)

            if (coly in ['count', 'sum', 'mean', 'std', 'max', 'min']):
                grouped = grouped.agg({colz: [coly]})
            elif (coly == 'uniques'):
                grouped = grouped.apply(lambda g: pd.Series(
                    g[colz].unique().size,
                    index=pd.MultiIndex.from_product([[colz], [coly]])))



            grouped = grouped.reset_index().sort_values([(colz, coly)], ascending=sort_order).head(colw)\
                .sort_values([(colz, coly)], ascending = (not sort_order))

            #             print(grouped)

            trace = go.Bar(y=grouped[colx],
                           x=grouped[colz][coly],
                           name=colx,
                           marker=dict(color='rgb(49,130,189)'),
                           orientation='h')
            layout = go.Layout(
                title=coly + ' of ' + colz + ' by ' + colx,
                yaxis=dict(
                    title=colx,
                    type="category",
                    #                     categoryorder = "category descending"
                    tickformat=".3f"),
                xaxis=dict(title=coly + ' of ' + colz),
                margin=dict(l=160))

        else:
            print(
                '{}: {:0.1%} null ({:d} out of {:d})'\
                    .format(
                        colx
                        , source[colx].isnull().sum() / source.shape[0]
                        , source[colx].isnull().sum()
                        , source.shape[0]
                    )
                )
            temp = source
            if na:
                temp = temp.fillna(-1000)
            trace = go.Histogram(x=temp[colx],
                                 name=colx,
                                 marker=dict(color='rgb(49,130,189)'))
            layout = go.Layout(title='distribution',
                               yaxis=dict(title='count'),
                               xaxis=dict(title=colx))

        data = [trace]
        fig = go.Figure(data=data, layout=layout)
        plot_url = py.iplot(fig)

예제 #7

0

파일 보기

파일: sample955.py 프로젝트: tetherless-world/CodeGraph

# So there are coluns about Estimate, Margin of Error, Percent related to Sex, Age, Race, and Total Population. Lets start exploring these variables.
#
# ### Distribution of Total Population across Census Tracts
#
# <br>
#
# **Census Tracts:**
# Census tracts (CTs) are small, relatively stable geographic areas that usually have a population between 2,500 and 8,000 persons. They are located in census metropolitan areas and in census agglomerations that had a core population of 50,000 or more in the previous census.
#
#

# In[ ]:

total_population = rca_df["HC01_VC03"][1:]

trace = go.Histogram(x=total_population,
                     marker=dict(color='orange', opacity=0.6))
layout = dict(title="Total Population Distribution - Across the counties",
              margin=dict(l=200),
              width=800,
              height=400)
data = [trace]
fig = go.Figure(data=data, layout=layout)
iplot(fig)

male_pop = rca_df["HC01_VC04"][1:]
female_pop = rca_df["HC01_VC05"][1:]

trace1 = go.Histogram(x=male_pop,
                      name="male population",
                      marker=dict(color='blue', opacity=0.6))
trace2 = go.Histogram(x=female_pop,

예제 #8

0

파일 보기

파일: stockanalyzer.py 프로젝트: lStrachanowski/stocks

def stock(stockval):

    # pobiera dane dla waloru
    main_df = stock_data(stockval, 2, 120)
    # ustala ticker dla danego waloru
    ticker = get_ticker(stockval[:-4])
    # Pobiera arkusz zleceń
    ten_orders = order_book(ticker)
    # Pobiera wskaźniki
    indicators = company_indicators(ticker)

    # Pobiera wiadomości
    news = get_news(get_isin(stockval[:-4]))

    # Pobiera dane fiansowe
    financial_data = get_financial_data(get_isin(stockval[:-4]))
    gross_profit = []
    net_profit = []
    sales = []
    debt = []
    capital = []
    dates = [x for x in financial_data[0] if x]
    try:
        for i in range(0, 2):
            for val in financial_data[1][i]:
                if len(val) > 0:
                    if val[0] == "Zysk (strata) brutto":
                        gross_profit.append([
                            int("".join(x.split())) * 1000 for x in val[1] if x
                        ])
                    elif val[0] == "Zysk (strata) netto":
                        net_profit.append([
                            int("".join(x.split())) * 1000 for x in val[1] if x
                        ])
                    elif val[
                            0] == "Przychody netto ze sprzedaży produktów, towarów i materiałów":
                        sales.append([
                            int("".join(x.split())) * 1000 for x in val[1] if x
                        ])
                    elif val[0] == "Zobowiązania i rezerwy na zobowiązania":
                        debt.append([
                            int("".join(x.split())) * 1000 for x in val[1] if x
                        ])
                    elif val[0] == "Kapitał własny":
                        capital.append([
                            int("".join(x.split())) * 1000 for x in val[1] if x
                        ])

        years = dates[4:8]
        profit_data_net_q = go.Bar(x=dates[0:4], y=net_profit[0], name='net')

        profit_data_gross_q = go.Bar(x=dates[0:4],
                                     y=gross_profit[0],
                                     name='gross')

        profit_layout = go.Layout(barmode='group')

        profit_data_q = [profit_data_net_q, profit_data_gross_q]
        profit_fig_q = go.Figure(data=profit_data_q, layout=profit_layout)
        pio.write_image(profit_fig_q,
                        'static/profits_q.png',
                        width=600,
                        height=400)

        profit_data_net_y = go.Bar(x=dates[4:8], y=net_profit[1], name='net')

        profit_data_gross_y = go.Bar(x=years, y=gross_profit[1], name='gross')

        layout_reversed = go.Layout(xaxis=dict(autorange='reversed'),
                                    bargap=0.5)
        bars_style = go.Layout(bargap=0.5)

        profit_data_y = [profit_data_net_y, profit_data_gross_y]
        profit_fig_y = go.Figure(data=profit_data_y, layout=layout_reversed)
        pio.write_image(profit_fig_y,
                        'static/profits_y.png',
                        width=600,
                        height=400)
        if len(sales) > 0:
            sales_status = True
            sales_q = [go.Bar(x=dates[0:4], y=sales[0])]
            fig_sales = go.Figure(data=sales_q, layout=bars_style)
            pio.write_image(fig_sales,
                            'static/sales_q.png',
                            width=600,
                            height=400)
            sales_y = [go.Bar(x=years, y=sales[1])]
            fig_sales = go.Figure(data=sales_y, layout=layout_reversed)
            pio.write_image(fig_sales,
                            'static/sales_y.png',
                            width=600,
                            height=400)
        else:
            sales_status = False
        if len(debt) > 0:
            debt_status = True
            debt_q = [go.Bar(x=dates[0:4], y=debt[0])]
            fig_sales = go.Figure(data=debt_q, layout=bars_style)
            pio.write_image(fig_sales,
                            'static/debt_q.png',
                            width=600,
                            height=400)
            debt_y = [go.Bar(x=years, y=debt[1])]
            fig_sales = go.Figure(data=debt_y, layout=layout_reversed)
            pio.write_image(fig_sales,
                            'static/debt_y.png',
                            width=600,
                            height=400)
        else:
            debt_status = False

        if len(capital) > 0:
            capital_status = True
            capital_q = [go.Bar(x=dates[0:4], y=capital[0])]
            fig_sales = go.Figure(data=capital_q, layout=bars_style)
            pio.write_image(fig_sales,
                            'static/capital_q.png',
                            width=600,
                            height=400)
            capital_y = [go.Bar(x=years, y=capital[1])]
            fig_sales = go.Figure(data=capital_y, layout=layout_reversed)
            pio.write_image(fig_sales,
                            'static/capital_y.png',
                            width=600,
                            height=400)
        else:
            capital_status = False

    except:
        sales_status = False
        debt_status = False
        capital_status = False
        print("no financial")

    # Pobiera dane o akcjonariacie
    shareholders = get_shareholders(ticker)

    # Pobiera opis firmy
    company_details = company_info(ticker)

    # Pobiera szczegółowe dane o transakcjach
    transaction_data(stockval)

    # Zwraca volumen akcji, który został nabyty po danej cenie.
    stock_prices = analyze_stock_transactions(stockval)
    vol_x, vol_y = zip(*stock_prices)

    vol_data = [go.Bar(x=vol_y, y=vol_x, orientation='h')]

    vol_layout = go.Layout(yaxis=dict(dtick=0.25, ))
    vol_fig = go.Figure(data=vol_data, layout=vol_layout)
    pio.write_image(vol_fig, 'static/daily_volume.png', width=700, height=500)

    sma_100 = sma(stockval, 100)
    sma_200 = sma(stockval, 200)

    a = daily_return(stockval)
    data = [go.Bar(x=a.index, y=a['<CLOSE>'])]
    fig = go.Figure(data=data)
    pio.write_image(fig, 'static/daily_return.png', width=600, height=400)

    # Histogram dziennych zwrotów
    histogram = [go.Histogram(x=a['<CLOSE>'])]
    hist_layout = go.Layout(xaxis=dict(
        tick0=0,
        dtick=2.0,
    ), bargap=0.1)
    histogram_fig = go.Figure(data=histogram, layout=hist_layout)
    pio.write_image(histogram_fig,
                    'static/histogram.png',
                    width=550,
                    height=350)

    # Średnie kroczące
    sma100 = go.Scatter(x=sma_100.index,
                        y=sma_100['<CLOSE>'],
                        line=dict(color='#af211c', dash='dot'),
                        opacity=0.8,
                        name='sma 100')

    sma200 = go.Scatter(x=sma_200.index,
                        y=sma_200['<CLOSE>'],
                        line=dict(color='#bc59ff', dash='dot'),
                        opacity=0.8,
                        name='sma 200')

    #  Bollinger bands + wykres świecowy
    boll = bollinger(stockval)

    boll_high = go.Scatter(x=boll[0].index,
                           y=boll[0]['<CLOSE>'],
                           line=dict(color='#17BECF'),
                           opacity=0.8,
                           name='bollinger up')

    boll_min = go.Scatter(x=boll[1].index,
                          y=boll[1]['<CLOSE>'],
                          line=dict(color='#17BECF'),
                          opacity=0.8,
                          name='bollinger 65 mean')

    boll_low = go.Scatter(x=boll[2].index,
                          y=boll[2]['<CLOSE>'],
                          line=dict(color='#17BECF'),
                          opacity=0.8,
                          name='bollinger down')

    vol = go.Bar(x=main_df[-90:].index,
                 y=main_df[-90:]['<VOL>'],
                 marker=dict(
                     color='rgb(158,202,225)',
                     line=dict(color='rgb(8,48,107)', width=0.5),
                 ),
                 opacity=0.2,
                 yaxis='y2',
                 name='volume')

    candle_boll = go.Candlestick(x=main_df[:90].index,
                                 open=main_df[:90]['<OPEN>'],
                                 high=main_df[:90]['<HIGH>'],
                                 low=main_df[:90]['<LOW>'],
                                 close=main_df[:90]['<CLOSE>'])
    candle_layout = go.Layout(xaxis=dict(rangeslider=dict(visible=False)),
                              yaxis2=dict(title='Volume',
                                          overlaying='y',
                                          side='right'))

    boll_data = [
        boll_high, boll_min, boll_low, candle_boll, vol, sma100, sma200
    ]

    boll_fig = dict(data=boll_data, layout=candle_layout)

    pio.write_image(boll_fig, 'static/chart.png', width=1920, height=1080)

    return render_template('stock.html',
                           data_list=stock_list,
                           stock_name=stockval[:-4],
                           o_book=ten_orders,
                           close_value=main_df.iloc[-1]['<CLOSE>'],
                           daily_return=round(a.iloc[-1]['<CLOSE>'], 2),
                           indicators=indicators,
                           stock_news=news,
                           shareholder=shareholders,
                           ticker=ticker,
                           fin_data=financial_data,
                           prices=stock_prices,
                           details=company_details,
                           sales_status=sales_status,
                           debt_status=debt_status,
                           capital_status=capital_status)

예제 #9

0

파일 보기

파일: data-exploration.py 프로젝트: AndreCNF/FCUL_ALS_Disease_Progression

# ### Plots

ALS_proc_gender_count = ALS_proc_df.groupby('REF').first().Gender.value_counts().to_frame()
data = [go.Pie(labels=ALS_proc_gender_count.index, values=ALS_proc_gender_count.Gender)]
layout = go.Layout(title='Patients Gender Demographics')
fig = go.Figure(data, layout)
fig.show()

ALS_proc_niv_count = ALS_proc_df.NIV.value_counts().to_frame()
data = [go.Pie(labels=ALS_proc_niv_count.index, values=ALS_proc_niv_count.NIV)]
layout = go.Layout(title='Visits where the patient is using NIV')
fig = go.Figure(data, layout)
fig.show()

data = [go.Histogram(x = ALS_proc_df.NIV)]
layout = go.Layout(title='Number of visits where the patient is using NIV.')
fig = go.Figure(data, layout)
fig.show()

ALS_proc_patient_niv_count = ALS_proc_df.groupby('subject_id').niv.max().value_counts().to_frame()
data = [go.Pie(labels=ALS_proc_patient_niv_count.index, values=ALS_proc_patient_niv_count.niv)]
layout = go.Layout(title='Patients which eventually use NIV')
fig = go.Figure(data, layout)
fig.show()

data = [go.Scatter(
                    x = ALS_proc_df.FVC,
                    y = ALS_proc_df.NIV,
                    mode = 'markers'
                  )]

예제 #10

0

파일 보기

    return x


# Paso 1: Generamos muestras de la variable uniforme U
x_n = constante.SEMILLA
u = []  # array de uniformes
x = []  # array de inversas

for _ in range(constante.CANT_EXPERIMENTOS):
    x_n = gcl_uniforme(x_n)
    u.append(x_n)

#  Paso 2: Aplicar la transformacion inversa
for i in range(len(u)):
    x.append(obtenerTransfInversa(u[i]))  # Transformacion inversa

# Mostramos histograma del resultado
data = [go.Histogram(x=x)]
#py.plot(data, filename='histograma-inversa-normal-v1')

# Mostramos media, varianza y moda muestrales y teoricos
media = np.mean(x)
varianza = np.var(x)
moda = max(set(x), key=x.count)

print("Media muestral: {0} Varianza muestral: {1} Moda muestral: {2}".format(
    media, varianza, moda))
print("Media teorica:  {0} Varianza teorica:  {1} Moda teorica:  {2}".format(
    0, 1, 0))

예제 #11

0

파일 보기

파일: script889.py 프로젝트: darkblue-b/kaggleScape

            y="Time Decimal",
            data=daily_lows,
            palette='rainbow')

# In[ ]:

box_tracer = []
for key, day in dayOfWeek.items():
    box_tracer.append(
        go.Box(y=daily_lows[daily_lows['Day of Week'] == day]['Time Decimal'],
               name=day))
iplot(box_tracer)

# ### Tuesday Low Histogram
#
# Let's take a closer look into the distrobution of the lows on Tuesdays.

# In[ ]:

sns.distplot(daily_lows[daily_lows['Day of Week'] == 'Tue']['Time Decimal'],
             bins=24,
             kde=False)

# In[ ]:

histo_tracer = [
    go.Histogram(
        x=daily_lows[daily_lows['Day of Week'] == 'Tue']['Time Decimal'])
]
iplot(histo_tracer)

예제 #12

0

파일 보기

def main():
    db = QuestionDatabase()
    question_lookup = db.all_questions()
    questions = list(question_lookup.values())

    guesser_train_questions = [q for q in questions if q.fold == "guesstrain"]
    guesser_train_answers = [q.page for q in guesser_train_questions]
    answer_counts = Counter(guesser_train_answers)
    answer_set = set(answer_counts.keys())

    app = dash.Dash()
    app.layout = html.Div(children=[
        html.H1(children="Quiz Bowl Question Explorer"),
        compute_stats(questions, db.location),
        html.H2("Question Inspector"),
        dcc.Dropdown(
            options=[{
                "label": q.qnum,
                "value": q.qnum
            } for q in questions],
            value=questions[0].qnum,
            id="question-selector",
        ),
        html.Div([html.Div(id="question-display")]),
        dcc.Graph(
            id="answer-count-plot",
            figure=go.Figure(
                data=[
                    go.Histogram(x=list(answer_counts.values()),
                                 name="Answer Counts")
                ],
                layout=go.Layout(title="Answer Count Distribution",
                                 showlegend=True),
            ),
        ),
        dcc.Graph(
            id="answer-count-cum-plot",
            figure=go.Figure(
                data=[
                    go.Histogram(
                        x=list(answer_counts.values()),
                        name="Answer Counts Cumulative",
                        cumulative=dict(enabled=True, direction="decreasing"),
                        histnorm="percent",
                    )
                ],
                layout=go.Layout(title="Answer Count Cumulative Distribution",
                                 showlegend=True),
            ),
        ),
        html.Label("Answer Selection"),
        dcc.Dropdown(
            options=sorted(
                [{
                    "label": a,
                    "value": a
                } for a in answer_set],
                key=lambda k: k["label"],
            ),
            id="answer-list",
        ),
        html.Div(id="answer-count"),
    ])

    @app.callback(
        Output(component_id="answer-count", component_property="children"),
        [Input(component_id="answer-list", component_property="value")],
    )
    def update_answer_count(answer):
        return f"Answer: {answer} Question Count: {answer_counts[answer]}"

    @app.callback(
        Output(component_id="question-display", component_property="children"),
        [Input(component_id="question-selector", component_property="value")],
    )
    def update_question(qb_id):
        qb_id = int(qb_id)
        question = question_lookup[qb_id]
        sentences, answer, _ = question.to_example()
        return ([
            html.P(f"ID: {qb_id} Fold: {question.fold}"),
            html.H3("Sentences")
        ] + [html.P(f"{i}: {sent}") for i, sent in enumerate(sentences)] +
                [html.H3("Answer"), html.P(answer)])

    app.css.append_css(
        {"external_url": "https://codepen.io/chriddyp/pen/bWLwgP.css"})
    app.run_server(debug=True)

예제 #13

0

파일 보기


            break

print("***DONE! ")
#diff_values = np.asarray(diff_values)

#np.savetxt(result_path +'xy.txt', np.c_[X,Y], delimiter = ',', fmt='%i')
#df = pd.DataFrame({"X-value" : np.asarray(X), "Y-value" : np.asarray(Y)})
#df.to_csv(result_path + "dist.csv", index = False)


trace1 = go.Histogram(
    x=X,
    marker=dict(
        color='#FFD7E9',
    ),
    opacity=0.50
)
trace2 = go.Histogram(
    x=Y,
    marker=dict(
        color='#EB89B5'
    ),
    marker_line_color='rgb(8,48,107)',
    marker_line_width=1.5,
    opacity=0.50
)

data = [trace1, trace2]
layout = go.Layout(barmode='stack')

예제 #14

0

파일 보기

파일: Customer_Prediction_Main.py 프로젝트: nabeelmohammed1995/Prediction-of-customer-behavior-in-online-retail

# tx_max_purchase holds the maximum invoice date in our dataset
tx_max_purchase['Recents'] = (tx_max_purchase['MaxPurchaseDate'].max() -
                              tx_max_purchase['MaxPurchaseDate']).dt.days

#merge this dataframe to our new user dataframe
tx_user = pd.merge(tx_user,
                   tx_max_purchase[['CustomerID', 'Recents']],
                   on='CustomerID')

tx_user.head()

#plot a histogram just to check what does the 'Recents' data would look like if it is distributed among customers
#After we find out if there are any maximum data in Recents, then we can use K-means algorithm to assign groups with scores to it.

plot_data = [go.Histogram(x=tx_user['Recents'])]

plot_layout = go.Layout(title='Recents')
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)

# In[52]:

from sklearn.cluster import KMeans
sse = {}
tx_recency = tx_user[['Recents']]
for k in range(1, 10):
    kmeans = KMeans(n_clusters=k, max_iter=1000).fit(tx_recency)
    tx_recency["clusters"] = kmeans.labels_
    sse[k] = kmeans.inertia_
plt.figure()

예제 #15

0

파일 보기

파일: checkee_visualize_daily.py 프로젝트: leileibill/PaChong

                 'title': 'Complete date',
                 'range': [two_months_ago, current_date]
             },
             yaxis={'title': 'Number of cases'},
             # showlegend=True
         )
     }),
 dcc.Graph(
     id='histogram',
     figure={
         'data': [
             go.Histogram(
                 x=waiting_days,
                 # histnorm='probability
                 xbins=dict(size=5),
                 marker=dict(color='rgb(158,202,225)',
                             line=dict(
                                 color='rgb(8,48,107)',
                                 width=1.5,
                             )),
             ),
         ],
         'layout':
         go.Layout(
             title=
             'Distribution of waiting days for cases cleared in the past 4 weeks',
             xaxis={
                 'title': 'Waiting days',
                 'range': [0, 90]
             },
             yaxis={'title': 'Number of cases'},
             # showlegend=True

예제 #16

0

파일 보기

파일: tree_loo.py 프로젝트: DominicBurkart/text_classifiers

def hypt(accuracy,
         iv,
         dv,
         perms=10000,
         show_graph=True,
         name="hyptest",
         print_progress=True,
         multiprocess=True,
         save_perm_accuracies=True):
    '''
    Tests whether classifiers are performing significantly better than chance.

    Permutation-based hypothesis testing based on removing the correspondence between the IV and the DV via
    randomization to generate a null distribution.

    :param accuracy:
    :param iv:
    :param dv:
    :param perms:
    :param show_graph:
    :param plot_name:
    :param print_progress:
    :return:
    '''
    import copy
    null_accuracy = []
    if multiprocess:
        import multiprocessing
        async_kwargs = {
            "hyp_test": False,
            "show_graph": False,
            "write_out": False
        }
        print("Instantiating multiprocessing for " + str(perms) +
              " permutations on " + str(multiprocessing.cpu_count()) +
              " cores.")
        with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
            out_dicts = []
            resps = []
            for i in range(perms):
                civ = copy.deepcopy(iv)
                np.random.shuffle(civ)
                resps.append(
                    pool.apply_async(leave_one_out,
                                     args=(civ, dv),
                                     kwds=async_kwargs,
                                     callback=out_dicts.append))
            for r in resps:
                r.wait()
            null_accuracy = [d['accuracy'] for d in out_dicts]
    else:
        for i in range(perms):
            civ = copy.deepcopy(iv)
            np.random.shuffle(civ)
            null_accuracy.append(
                leave_one_out(civ,
                              dv,
                              show_graph=False,
                              hyp_test=False,
                              write_out=False)['accuracy'])
            if print_progress:
                print("Permutation test iteration #: " + str(i + 1))
                print("Percent complete: " + str(((i + 1) / perms) * 100) +
                      "%")
    if show_graph:
        # todo: add line to show where the classifier's average accuracy was
        import plotly.graph_objs as go
        from plotly.offline import plot
        fig = go.Figure(data=[go.Histogram(x=null_accuracy, opacity=0.9)])
        plot(fig, filename=name + ".html")
    g = [s for s in null_accuracy if s >= accuracy]
    if save_perm_accuracies:
        import csv
        with open(name + '_null_accuracies.csv', "w") as f:
            w = csv.writer(f)
            for a in null_accuracy:
                w.writerow([a])
    return len(g) / len(null_accuracy)  # probability of null hypothesis

예제 #17

0

파일 보기

#######
# Objective: Create a histogram that plots the 'length' field
# from the Abalone dataset (../data/abalone.csv).
# Set the range from 0 to 1, with a bin size of 0.02
######

# Perform imports here:
import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd

# create a DataFrame from the .csv file:
df = pd.read_csv('../../Data/abalone.csv')

# create a data variable:
data = [
    go.Histogram(x=df['length'], xbins={
        'start': 0,
        'end': 1,
        'size': 0.02
    })
]

# add a layout
layout = go.Layout(title='Length Histogram')

# create a fig from data & layout, and plot the fig
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='histogram-solution.html')

예제 #18

0

파일 보기

파일: grapher3.py 프로젝트: Kalawishis/Economic-Simulator

py.iplot(h_data_x, filename = "histogram consumer good x")
py.iplot(h_data_y, filename = "histogram consumer good y")
py.iplot(h_data_k, filename = "histogram producer capital")
py.iplot(h_data_l, filename = "histogram producer labor")
py.iplot(h_data_s, filename = "histogram market supply")
py.iplot(h_data_d, filename = "histogram market demand")
"""
data = []
data.extend([
    class_x - sim_x for class_x, sim_x in zip(class_consumer_x, sim_consumer_x)
])
data.extend([
    class_y - sim_y for class_y, sim_y in zip(class_consumer_y, sim_consumer_y)
])
data.extend([
    class_k - sim_k for class_k, sim_k in zip(class_producer_k, sim_producer_k)
])
data.extend([
    class_l - sim_l for class_l, sim_l in zip(class_producer_l, sim_producer_l)
])
data.extend(
    [class_s - sim_s for class_s, sim_s in zip(class_market_s, sim_market_s)])
data.extend(
    [class_d - sim_d for class_d, sim_d in zip(class_market_d, sim_market_d)])

h_data = [go.Histogram(x=data)]

py.iplot(h_data, filename="histogram total data")

f.close()

예제 #19

0

파일 보기

파일: sample933.py 프로젝트: tetherless-world/CodeGraph

#     * y = y axis
#     * opacity = opacity of histogram
#     * name = name of legend
#     * marker = color of histogram
# * trace2 = second histogram
# * layout = layout
#     * barmode = mode of histogram like overlay. Also you can change it with *stack*

# In[ ]:

# prepare data
x2011 = timesData.student_staff_ratio[timesData.year == 2011]
x2012 = timesData.student_staff_ratio[timesData.year == 2012]

trace1 = go.Histogram(x=x2011,
                      opacity=0.75,
                      name="2011",
                      marker=dict(color='rgba(171, 50, 96, 0.6)'))
trace2 = go.Histogram(x=x2012,
                      opacity=0.75,
                      name="2012",
                      marker=dict(color='rgba(12, 50, 196, 0.6)'))

data = [trace1, trace2]
layout = go.Layout(
    barmode='overlay',
    title=' students-staff ratio in 2011 and 2012',
    xaxis=dict(title='students-staff ratio'),
    yaxis=dict(title='Count'),
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)

예제 #20

0

파일 보기

파일: app3.py 프로젝트: data-wizards/data-wizards.github.io

import plotly.graph_objs as go

from plotly import tools
import plotly.plotly as py

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.read_csv(
    'https://raw.githubusercontent.com/dogatekin/Project/master/subsetDF.csv')

x = df['Review Score']
X = x[x > 0.5]

trace0 = go.Histogram(x=df['Sales Rank'])
trace1 = go.Histogram(x=X, )

fig = tools.make_subplots(rows=1,
                          cols=2,
                          specs=[[{}, {}]],
                          shared_xaxes=False,
                          shared_yaxes=False,
                          vertical_spacing=0.001)
fig.append_trace(trace0, 1, 1)
fig.append_trace(trace1, 1, 2)

fig['layout'].update(height=400,
                     width=800,
                     title='Histogram of the two possible response variables')

예제 #21

0

파일 보기

import cauldron as cd
import plotly.graph_objs as go
from cauldron import plotting
import measurement_stats as mstats

df = cd.shared.couplings_df

variations = df['raw_mad'] / df['raw_median']

cd.display.plotly(data=go.Histogram(x=variations),
                  layout=plotting.create_layout(
                      title='Coupling Length Fractional Deviations',
                      x_label='Fractional Deviation',
                      y_label='Frequency (#)'))

dist = mstats.create_distribution(measurements=variations.tolist(),
                                  uncertainties=0.01)

x = mstats.distributions.uniform_range(dist, 3)
y = dist.probabilities_at(x)

cd.display.plotly(data=go.Scatter(x=x, y=y, mode='lines', fill='tozeroy'),
                  layout=plotting.create_layout(title='Coupling Length KDE',
                                                x_label='Coupling Lengths (m)',
                                                y_label='Expectation Value'))

예제 #22

0

파일 보기

def update_frequency_graphs(n_clicks,jsonified_data):
    
    # error checks
    if ((n_clicks is None) or (jsonified_data == [])):
        print('Returning dash.no_update')
        return dash.no_update
    
    # get variables from jsonidied_data
    datasets=json.loads(jsonified_data)
    df_key_1=pd.read_json(datasets['df_key_1'],orient='split')
    df_key_2=pd.read_json(datasets['df_key_2'],orient='split')
    df_key_1_2=pd.read_json(datasets['df_key_1_2'],orient='split')
    
    key_1=datasets['key_1']
    key_2=datasets['key_2']
    
    # convert strings to dates
    df_key_1['datee']=pd.to_datetime(df_key_1['datee']).dt.date
    df_key_2['datee']=pd.to_datetime(df_key_2['datee']).dt.date
    df_key_1_2['datee']=pd.to_datetime(df_key_1_2['datee']).dt.date
    
    # calculate consecutive transaction intervals
    intervals_1=(df_key_1['datee']-df_key_1['datee'].shift(1)).dropna().apply(lambda x: x.days)
    intervals_2=(df_key_2['datee']-df_key_2['datee'].shift(1)).dropna().apply(lambda x: x.days)
    intervals_1_2=(df_key_1_2['datee']-df_key_1_2['datee'].shift(1)).dropna().apply(lambda x: x.days)

    # make subplots
    fig = make_subplots(
        rows=3,
        cols=1,
        vertical_spacing=0.02
    )
    
    # key_1
    fig.add_trace(go.Histogram(
            x=intervals_1,
            name='Intervals'
        ),
        row=1,
        col=1
    )
       
    # key_2
    fig.add_trace(go.Histogram(
            x=intervals_2,
            name='Intervals'
        ),
        row=2,
        col=1
    )
        
    # key_1_2
    fig.add_trace(go.Histogram(
        x=intervals_1_2,
        name='Intervals'
        ),
        row=3,
        col=1
    )
    
    # xaxis properties
    fig.update_xaxes(title_text="Interval (days)", row=3, col=1)
    
    # yaxis properties
    fig.update_yaxes(title_text="Frequency", row=1, col=1)
    fig.update_yaxes(title_text="Frequency", row=2, col=1)
    fig.update_yaxes(title_text="Frequency", row=3, col=1)
    
    # layout
    fig.update_layout(
        autosize=False,
        # width=800,
        # height=800,
        margin={'l': 0, 'b': 0, 't': 0, 'r': 0, 'autoexpand' : True},
        hovermode='closest',
        showlegend=False
    )
    
    return fig

예제 #23

0

파일 보기

파일: histograms.py 프로젝트: treilly94/plotly-templates

    def get_data(self, series):

        trace = go.Histogram(x=series, opacity=0.75)
        return [trace]

예제 #24

0

파일 보기

파일: layouts.py 프로젝트: WiemHAD/emotion-detector

df2 = pd.read_csv('https://query.data.world/s/jq7lk27hbmlg2t5rf4tqoksxnrs4fl')

targets = df1['Emotion']
corpus = df1['Text']

corpus2 = df2['content']
targets2 = df2['sentiment']

list_emotions = list(df1['Emotion'].unique())
list_emotions.append('all')

#names = list('1er dataset', '2eme dataset', 'dataset global')
"""definition des graphs"""
#goBar emotions 1st data set
fig1 = go.Figure()
fig1 = go.Figure(data=[go.Histogram(x=targets, name='Emotions')],
                 layout={
                     'title': 'Emotions Histogram',
                     'xaxis_title_text': 'Emotions',
                     'yaxis_title_text': 'frequence'
                 })

#goBar emotions 2nd data set
fig2 = go.Figure()
fig2 = go.Figure(data=[go.Histogram(x=targets2, name='Emotions')],
                 layout={
                     'title': 'Emotions Histogram',
                     'xaxis_title_text': 'Emotions',
                     'yaxis_title_text': 'frequence'
                 })

예제 #25

0

파일 보기

파일: Neural_net_210219.py 프로젝트: Gleader/Trade-policy

# Replace data by its standardized values

data[list(ecdf_normalized_df.columns.values)] = ecdf_normalized_df


# Visualisations

# Flows
print(data['flow'].describe())

flows_winsorized = mstats.winsorize(data['flow'], limits=[0.05, 0.05])
layout = go.Layout(
    title="Basic histogram of flows (winsorized)")

data_hist = [go.Histogram(x=flows_winsorized)]
fig = go.Figure(data=data_hist, layout=layout)

iplot(fig, filename='Basic histogram of flows')


# Corr

corr = ecdf_normalized_df.corr()

sns.heatmap(corr[(corr >= 0.5) | (corr <= -0.5)],
            cmap='viridis', vmax=1.0, vmin=-1.0, linewidths=0.1,
            annot=True, annot_kws={"size": 8}, square=True)

# Coef of variation

예제 #26

0

파일 보기

def plot_distplot(
    df,
    column,
    hist=True,
    kde=True,
    gauss=False,
    show_box=True,
    points=False,
    x_range=None,
    notched=True,
    show_mean=True,
    kde_resolution=128,
    colors="default",
    n_bins=None,
    x_legend=0.85,
    y_legend=0.8,
    show_legend=True,
    legend="default",
    bargap=0.03,
    transparent=True,
):

    # vrednosti koje fitujemo

    variable_values = df[column].values
    if x_range is not None:
        variable_values = variable_values[(variable_values >= x_range[0])
                                          & (variable_values <= x_range[1])]

    xaxis_range = [min(variable_values), max(variable_values)]

    # generiši vrednosti za x osu
    x_values = np.linspace(min(variable_values), max(variable_values),
                           kde_resolution)

    # srednja vrednost i medijana za vrednosti koje fitujemo
    mean, std = stats.norm.fit(variable_values)
    # gustina verovatniće
    gauss_prob_dens = stats.norm.pdf(sorted(df[column].values),
                                     loc=mean,
                                     scale=std)

    # Kernel Density Estimate gustina verovatnoće
    kde = stats.gaussian_kde(variable_values)
    kde_values = kde(x_values)

    if colors == "default":
        colors = ["#191970", "#64b5f6", "#ef6c00", "#03adfc"]

    traces = []
    if show_box:
        box = go.Box(
            x=variable_values,
            marker=dict(color=colors[3]),
            boxpoints=points,
            notched=notched,
            boxmean=show_mean,
            showlegend=False,
        )

    if hist:
        hist_trace = go.Histogram(
            x=variable_values,
            histnorm="probability density",
            marker=dict(color=colors[0], opacity=0.7),
            nbinsx=n_bins,
            name="Histogram",
            showlegend=show_legend,
        )
        traces.append(hist_trace)

    # KDE probability density
    if kde:
        kde_trace = go.Scatter(x=x_values,
                               y=kde_values,
                               name="KDE PDF",
                               showlegend=show_legend)
        traces.append(kde_trace)

    # Gaussian probability density
    if gauss:
        gauss_trace = go.Scatter(
            x=sorted(variable_values),
            y=gauss_prob_dens,
            name="Gauss PDF",
            line=dict(color="#FFA500"),
            showlegend=show_legend,
        )
        traces.append(gauss_trace)

    if show_box:
        fig = make_subplots(rows=2, cols=1)
        fig.add_trace(box, row=1, col=1)
        for trace in traces:
            fig.add_trace(trace, row=2, col=1)

        fig.layout["xaxis2"].update(
            axis_layout(show_grid=False, range_=xaxis_range, ticks=""))
        fig.layout["yaxis2"].update(
            axis_layout(ticks=""),
            domain=[0, 0.75],
            showexponent="last",
            exponentformat="power",
        )

        fig.layout["xaxis"].update(
            axis_layout(
                title="",
                ticks="",
                showticklabels=False,
                range_=xaxis_range,
                show_grid=False,
            ))
        fig.layout["yaxis"].update(
            axis_layout(title="",
                        ticks="",
                        showticklabels=False,
                        show_grid=False),
            domain=[0.78, 1],
        )
    else:
        fig = go.Figure()
        for trace in traces:
            fig.add_trace(trace)

        fig.layout["xaxis"].update(
            axis_layout(title="", range_=xaxis_range, show_grid=False), )

        fig.layout["yaxis"].update(
            axis_layout(title="", show_grid=True),
            showexponent="last",
            exponentformat="power",
        )

    legend_font = {"x": x_legend, "y": y_legend}

    if legend == "default":
        font = dict(size=16, family="Times New Roman")
    legend_font.update({"font": font})

    fig.update_layout(legend=legend_font, bargap=bargap)
    if transparent:
        fig.update_layout(
            legend=dict(bgcolor="rgba(0,0,0,0)"),
            paper_bgcolor="rgba(0,0,0,0)",
            plot_bgcolor="rgba(0,0,0,0)",
        )

    return fig

예제 #27

0

파일 보기

    def calculate(self, reference_data: pd.DataFrame,
                  production_data: pd.DataFrame, column_mapping):
        if column_mapping:
            date_column = column_mapping.get('datetime')
            id_column = column_mapping.get('id')
            target_column = column_mapping.get('target')
            prediction_column = column_mapping.get('prediction')
            num_feature_names = column_mapping.get('numerical_features')
            if num_feature_names is None:
                num_feature_names = []
            else:
                num_feature_names = [
                    name for name in num_feature_names
                    if is_numeric_dtype(reference_data[name])
                ]

            cat_feature_names = column_mapping.get('categorical_features')
            if cat_feature_names is None:
                cat_feature_names = []
            else:
                cat_feature_names = [
                    name for name in cat_feature_names
                    if is_numeric_dtype(reference_data[name])
                ]

        else:
            date_column = 'datetime' if 'datetime' in reference_data.columns else None
            id_column = None
            target_column = 'target' if 'target' in reference_data.columns else None
            prediction_column = 'prediction' if 'prediction' in reference_data.columns else None

            utility_columns = [
                date_column, id_column, target_column, prediction_column
            ]

            num_feature_names = list(
                set(reference_data.select_dtypes([np.number]).columns) -
                set(utility_columns))
            cat_feature_names = list(
                set(reference_data.select_dtypes([np.object]).columns) -
                set(utility_columns))

        #set params data
        params_data = []
        drifted_fetures_count = 0
        #plt.ioff()
        for feature_name in num_feature_names:  # + cat_feature_names: #feature_names:
            prod_small_hist = np.histogram(
                production_data[feature_name][np.isfinite(
                    production_data[feature_name])],
                bins=10,
                density=True)
            ref_small_hist = np.histogram(
                reference_data[feature_name][np.isfinite(
                    reference_data[feature_name])],
                bins=10,
                density=True)

            feature_type = 'num'

            p_value = ks_2samp(reference_data[feature_name],
                               production_data[feature_name])[1]

            distr_sim_test = "Detected" if p_value < 0.05 else "Not Detected"
            drifted_fetures_count += 1 if p_value < 0.05 else 0

            params_data.append({
                "details": {
                    "parts": [{
                        "title": "Data drift",
                        "id": feature_name + "_drift"
                    }, {
                        "title": "Data distribution",
                        "id": feature_name + "_distr"
                    }],
                    "insights": []
                },
                "f1": feature_name,
                "f6": feature_type,
                "f3": {
                    "x": list(ref_small_hist[1]),
                    "y": list(ref_small_hist[0])
                },
                "f4": {
                    "x": list(prod_small_hist[1]),
                    "y": list(prod_small_hist[0])
                },
                "f2": distr_sim_test,
                "f5": round(p_value, 6)
            })

        for feature_name in cat_feature_names:  #feature_names:
            prod_small_hist = np.histogram(
                production_data[feature_name][np.isfinite(
                    production_data[feature_name])],
                bins=10,
                density=True)
            ref_small_hist = np.histogram(
                reference_data[feature_name][np.isfinite(
                    reference_data[feature_name])],
                bins=10,
                density=True)

            feature_type = 'cat'

            #p_value = ks_2samp(reference_data[feature_name], production_data[feature_name])[1]
            #CHI2 to be implemented for cases with different categories
            ref_feature_vc = reference_data[feature_name][np.isfinite(
                reference_data[feature_name])].value_counts()
            prod_feature_vc = production_data[feature_name][np.isfinite(
                production_data[feature_name])].value_counts()

            keys = set(
                list(reference_data[feature_name][np.isfinite(
                    reference_data[feature_name])].unique()) +
                list(production_data[feature_name][np.isfinite(
                    production_data[feature_name])].unique()))

            ref_feature_dict = dict.fromkeys(keys, 0)
            for key, item in zip(ref_feature_vc.index, ref_feature_vc.values):
                ref_feature_dict[key] = item

            prod_feature_dict = dict.fromkeys(keys, 0)
            for key, item in zip(prod_feature_vc.index,
                                 prod_feature_vc.values):
                prod_feature_dict[key] = item

            f_exp = [value[1] for value in sorted(ref_feature_dict.items())]
            f_obs = [value[1] for value in sorted(prod_feature_dict.items())]

            p_value = chisquare(f_exp, f_obs)[1]

            distr_sim_test = "Detected" if p_value < 0.05 else "Not Detected"
            drifted_fetures_count += 1 if p_value < 0.05 else 0

            params_data.append({
                "details": {
                    "parts": [{
                        "title": "Data drift",
                        "id": feature_name + "_drift"
                    }, {
                        "title": "Data distribution",
                        "id": feature_name + "_distr"
                    }],
                    "insights": []
                },
                "f1": feature_name,
                "f6": feature_type,
                "f3": {
                    "x": list(ref_small_hist[1]),
                    "y": list(ref_small_hist[0])
                },
                "f4": {
                    "x": list(prod_small_hist[1]),
                    "y": list(prod_small_hist[0])
                },
                "f2": distr_sim_test,
                "f5": round(p_value, 6)
            })

        #set additionalGraphs
        additional_graphs_data = []
        for feature_name in num_feature_names + cat_feature_names:  #feature_names:

            #plot distributions
            fig = go.Figure()
            fig.add_trace(
                go.Histogram(x=reference_data[feature_name],
                             marker_color=grey,
                             opacity=0.6,
                             nbinsx=10,
                             name='Reference',
                             histnorm='probability'))

            fig.add_trace(
                go.Histogram(x=production_data[feature_name],
                             marker_color=red,
                             opacity=0.6,
                             nbinsx=10,
                             name='Production',
                             histnorm='probability'))

            fig.update_layout(legend=dict(orientation="h",
                                          yanchor="bottom",
                                          y=1.02,
                                          xanchor="right",
                                          x=1),
                              xaxis_title=feature_name,
                              yaxis_title="Share")

            distr_figure = json.loads(fig.to_json())

            #plot drift
            reference_mean = np.mean(reference_data[feature_name][np.isfinite(
                reference_data[feature_name])])
            reference_std = np.std(reference_data[feature_name][np.isfinite(
                reference_data[feature_name])],
                                   ddof=1)
            x_title = "Timestamp" if date_column else "Index"

            fig = go.Figure()

            fig.add_trace(
                go.Scatter(x=production_data[date_column]
                           if date_column else production_data.index,
                           y=production_data[feature_name],
                           mode='markers',
                           name='Production',
                           marker=dict(size=6, color=grey)))

            fig.update_layout(
                xaxis_title=x_title,
                yaxis_title=feature_name,
                showlegend=True,
                legend=dict(orientation="h",
                            yanchor="bottom",
                            y=1.02,
                            xanchor="right",
                            x=1),
                shapes=[
                    dict(
                        type="rect",
                        # x-reference is assigned to the x-values
                        xref="paper",
                        # y-reference is assigned to the plot paper [0,1]
                        yref="y",
                        x0=0,
                        y0=reference_mean - reference_std,
                        x1=1,
                        y1=reference_mean + reference_std,
                        fillcolor="LightGreen",
                        opacity=0.5,
                        layer="below",
                        line_width=0,
                    ),
                    dict(
                        type="line",
                        name='Reference',
                        xref="paper",
                        yref="y",
                        x0=0,  #min(testset_agg_by_date.index),
                        y0=reference_mean,
                        x1=1,  #max(testset_agg_by_date.index),
                        y1=reference_mean,
                        line=dict(color="Green", width=3)),
                ])

            drift_figure = json.loads(fig.to_json())

            #add distributions data
            additional_graphs_data.append(
                AdditionalGraphInfo(feature_name + '_distr', {
                    "data": distr_figure['data'],
                    "layout": distr_figure['layout']
                }))

            #add drift data
            additional_graphs_data.append(
                AdditionalGraphInfo(feature_name + '_drift', {
                    "data": drift_figure['data'],
                    "layout": drift_figure['layout']
                }))

        self.wi = BaseWidgetInfo(
            title="Data Drift: drift detected for " +
            str(drifted_fetures_count) + " out of " +
            str(len(num_feature_names) + len(cat_feature_names)) + " features",
            type="big_table",
            details="",
            alertStats=AlertStats(),
            alerts=[],
            alertsPosition="row",
            insights=[],
            size=2,
            params={
                "rowsPerPage":
                min(len(num_feature_names) + len(cat_feature_names), 10),
                "columns": [{
                    "title": "Feature",
                    "field": "f1"
                }, {
                    "title": "Type",
                    "field": "f6"
                }, {
                    "title": "Reference Distribution",
                    "field": "f3",
                    "type": "histogram",
                    "options": {
                        "xField": "x",
                        "yField": "y"
                    }
                }, {
                    "title": "Production Distribution",
                    "field": "f4",
                    "type": "histogram",
                    "options": {
                        "xField": "x",
                        "yField": "y"
                    }
                }, {
                    "title": "Data drift",
                    "field": "f2"
                }, {
                    "title": "P-Value for Similarity Test",
                    "field": "f5",
                    "sort": "asc"
                }],
                "data":
                params_data
            },
            additionalGraphs=additional_graphs_data)

예제 #28

0

파일 보기

파일: x18179541_code.py 프로젝트: SankaraSubramanian94/Masters-Projects

)

textbox = widgets.Dropdown(
    description='City:   ',
    value='SOUTHINGTON',
    options=drug_info_dataframe['city'].unique().tolist()
)

state = widgets.Dropdown(options=list(drug_info_dataframe['state'].unique()),
    value='CONNECTICUT',
    description='State:',
)


# Assign an empty figure widget with two traces
trace1 = go.Histogram(x=drug_info_dataframe['avg_drug_score'], opacity=0.75, name='Average Drug Score')
trace2 = go.Histogram(x=drug_info_dataframe['count'], opacity=0.75, name='Death Count')
g = go.FigureWidget(data=[trace1, trace2],
                    layout=go.Layout(
                        title=dict(
                            text='US Drug Death'
                        ),
                        barmode='overlay'
                    ))
def validate():
    if state.value in drug_info_dataframe['state'].unique() and textbox.value in drug_info_dataframe['city'].unique() and \
    use_age_group.value in drug_info_dataframe['age_bin'].unique():
        return True
    else:
        return False

예제 #29

0

파일 보기

fig11 = go.Figure(data=data, layout=layout)

#iplot(fig11, filename='basic-bar')
graphJSON3 = json.dumps(fig11, cls=plotly.utils.PlotlyJSONEncoder)
#plt.savefig('static/marital_count.png')
#-----------------------------
# In[16]:

# Distribution of Balances by Marital status
single = df['balance'].loc[df['marital'] == 'single'].values
married = df['balance'].loc[df['marital'] == 'married'].values
divorced = df['balance'].loc[df['marital'] == 'divorced'].values

single_dist = go.Histogram(x=single,
                           histnorm='density',
                           name='single',
                           marker=dict(color='#6E6E6E'))

married_dist = go.Histogram(x=married,
                            histnorm='density',
                            name='married',
                            marker=dict(color='#2E9AFE'))

divorced_dist = go.Histogram(x=divorced,
                             histnorm='density',
                             name='divorced',
                             marker=dict(color='#FA5858'))

fig4 = tools.make_subplots(rows=3, print_grid=False)

fig4.append_trace(single_dist, 1, 1)

예제 #30

0

파일 보기

파일: icaa2.py 프로젝트: Abolation/semester_project_python

def update_output_div(graph_type, box_type):
    if graph_type == 'cars_by_fuel_type':
        values = []
        for ftype in obbey["Fuel Type"].unique():
            count = obbey[(obbey["Fuel Type"] == ftype)]["Fuel Type"].count()
            values.append(count)
        fig = {
            "data": [{
                "values": values,
                "labels": obbey["Fuel Type"].unique(),
                "hoverinfo": "label+percent",
                "type": "pie"
            }],
            "layout": {
                "title": "Cars by Fuel Type",
            }
        }

    elif graph_type == 'miles_per_gallon_city_highway':
        if box_type == 'miles_per_gallon_highway':
            data = []
            for mtype in obbey.Manufacturer.unique():
                trace = go.Box(
                    x=obbey[(obbey.Manufacturer == mtype)].Manufacturer,
                    y=obbey[(obbey.Manufacturer == mtype
                             )]["Miles-per-gallon in highway"],
                    name=mtype,
                )
                data.append(trace)

            layout = go.Layout(
                title='Miles per gallon in Highway',
                showlegend=True,
                yaxis=dict(title="Miles per gallon"),
                xaxis=dict(title=""),
            )

            fig = dict(data=data, layout=layout)
        else:
            data = []
            for mtype in obbey.Manufacturer.unique():
                trace = go.Box(
                    x=obbey[(obbey.Manufacturer == mtype)].Manufacturer,
                    y=obbey[(obbey.Manufacturer == mtype
                             )]["Miles-per-gallon in city"],
                    name=mtype,
                )
                data.append(trace)

            layout = go.Layout(
                title='Miles per gallon in City',
                showlegend=True,
                yaxis=dict(title="Miles per gallon"),
                xaxis=dict(title=""),
            )

            fig = dict(data=data, layout=layout)
    else:
        trace1 = go.Histogram(
            x=obbey[(obbey.Driveline == "All-wheel drive")].Horsepower,
            name='All-wheel drive')
        trace2 = go.Histogram(
            x=obbey[(obbey.Driveline == "Front-wheel drive")].Horsepower,
            name='Front-wheel drive')
        trace3 = go.Histogram(
            x=obbey[(obbey.Driveline == "Rear-wheel drive")].Horsepower,
            name='Rear-wheel drive')
        trace4 = go.Histogram(
            x=obbey[(obbey.Driveline == "Four-wheel drive")].Horsepower,
            name='Four-wheel drive')

        data = [trace1, trace2, trace3, trace4]
        layout = go.Layout(title='Horsepower by driveline')
        fig = go.Figure(data=data, layout=layout)

    return fig