예제 #1
0
def kamervragen_reply_time_per_year(years, kamervraag_durations):
    fig = ff.create_distplot(
        kamervraag_durations,
        years,
        # colors=colors,
        bin_size=1,
        show_curve=True,
        show_hist=False,
        show_rug=False
    )

    xaxis = XAxis(range=[0, 60])

    fig['layout'].update(xaxis=xaxis)
    # fig['layout'].update(title="Kamervraag Antwoordtijd per Ministerie tijdens Rutte-II (KDE probability distributie)")
    fig['layout'].update(xaxis=dict(title='Antwoordtijd [dagen]'))
    # fig['layout'].update(yaxis=dict(title=''))
    fig['layout'].update(height=700)
    fig['layout'].update(margin=Margin(t=20))
    legend = dict(
        # x=0.01,
        # y=1,
        bordercolor='#E2E2E2',
        bgcolor='#FFFFFF',
        borderwidth=2
    )
    fig['layout'].update(legend=legend)

    return Plot.create_plot_html_default(figure_or_data=fig)
def plot_1d_coupling_profile(couplings_per_bin, plot_dir, ab):



    group_labels = [ str(bindict['lower']) + "Å < ΔCβ  < " + str(bindict['upper']) + "Å" for binname, bindict in sorted(couplings_per_bin.iteritems(), reverse=True)]
    hist_data = [bindict['couplings'] for  binname, bindict in sorted(couplings_per_bin.iteritems(), reverse=True)]

    nr_datapoints = int(np.round(np.mean([len(x) for x in hist_data]), decimals=-2))

    # Create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False)


    for trace in fig['data']:
        trace['line']['width'] = 2


    fig['layout']['font'] = dict(size = 16)
    fig['layout']['xaxis']['title'] = "couplings w_ij("+ab+")"
    fig['layout']['xaxis']['range'] = [-1,1]
    fig['layout']['yaxis']['title'] = "Distribution of couplings for " + ab
    fig['layout']['margin']['t'] = 10

    plot_name = plot_dir + "/1d_coupling_profile_"+ ab + "_avgdatapoints"+str(nr_datapoints)+".html"
    plotly_plot(fig, filename=plot_name, auto_open=False)
def plot_density(protein, bqij_data, plot_dir):

    group_labels    = [key for key in sorted(bqij_data.keys()) if key != "L"]
    L = bqij_data['L']

    hist_data       = []
    data=[]
    for group in group_labels:
        bqij_file = bqij_data[group]
        Nij, qij = io.read_qij(bqij_file, bqij_data['L'])

        data_group = qij[np.triu_indices(n=L, k=1)].flatten()

        hist_data.append(data_group)

        data.append(
            go.Histogram(
                x=data_group,
                histnorm='probability',
                name=group,
                xbins=dict(
                    start=-0.1,
                    end=1,
                    size=0.005
                ),
                opacity=0.75
            )
        )


    # Create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False)
    fig['layout']['font'] = dict(size = 18)
    fig['layout']['xaxis']['title'] = "q_ijab"
    plot_file = plot_dir + "/" + protein + "_distribution_qijab" + ".html"
    plotly_plot(fig, filename=plot_file, auto_open=False)

    #create histogram
    plot_file = plot_dir + "/" + protein + "_histogram_qijab" + ".html"
    layout = go.Layout(
        barmode='overlay',
        xaxis=dict(
            title="q_ijab",
            exponentformat="e",
            showexponent='All'
        ),
        yaxis=dict(
            exponentformat="e",
            showexponent='All'
        ),
        font=dict(size = 18)
    )
    fig = go.Figure(data=data, layout=layout)
    plotly_plot(fig, filename=plot_file, auto_open=False)
예제 #4
0
파일: app.py 프로젝트: Hex-Liu/Put_Trading
def set_display_children(slct_row, data, columns):
    dt_tmp = pd.DataFrame(data, columns=[c['id'] for c in columns])
    idx = dt_tmp.iloc[list(slct_row)]['idx']

    fig = ff.create_distplot([dt_ni[i] for i in idx], [str(i) for i in idx], bin_size=.5, show_rug=False,
                             show_curve=False)
    fig.layout.update(title='Histogram of Mean Return')
    fig.layout.xaxis.update({'title': 'Mean Return (In Dollar$) '})
    return dcc.Graph(
        figure=fig,
        id='my-graph_rec'
    )
def plot_distance_distribution(distances_ab, ab, distance_definition, log, plot_dir):

    group_labels    = ["sequence separation " + str(seq_sep) for seq_sep, values in sorted(distances_ab.iteritems())]
    hist_data       = [np.array(values[ab])[~np.isnan(values[ab])] for seq_sep, values in sorted(distances_ab.iteritems())]

    if log:
        hist_data = [ np.log(np.array(values[ab]))[~np.isnan(values[ab])] for seq_sep, values in sorted(distances_ab.iteritems())]


    # Create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False)


    for trace in fig['data']:
        trace['line']['width'] = 2
        if log:
            trace['text'] = ['Cb distance: ' + str(x) for x in np.exp(trace['x'])]
        else:
            trace['text'] = ['Cb distance: ' + str(x) for x in trace['x']]
        trace['hoverinfo'] = "text"


    residues =  ab[0] + " and " + ab[2]
    if ab == 'all':
        residues = "residue pair"



    fig['layout']['font'] = dict(size = 16)
    fig['layout']['xaxis']['title'] = distance_definition + " distance between " + residues
    fig['layout']['xaxis']['showspikes'] = True
    fig['layout']['yaxis']['title'] = "Distribution of " + residues + " distances ("+distance_definition+")"
    fig['layout']['yaxis']['showspikes'] = True
    fig['layout']['xaxis']['range'] = [3,100]
    fig['layout']['xaxis']['tickangle'] = 0
    fig['layout']['margin']['t'] = 10


    plot_file = plot_dir + "/" + distance_definition + "_distribution_" + ab + "_data" + str(int(np.mean([len(h) for h in hist_data])))+".html"

    if log:
        fig['layout']['xaxis']['tickmode'] = "array"
        fig['layout']['xaxis']['ticktext'] = [3,4,5,6,8,10,12,15,20,30,40,50,70,80]
        fig['layout']['xaxis']['tickvals'] = np.log(fig['layout']['xaxis']['ticktext'])
        fig['layout']['xaxis']['range'] = np.log([3,100])
        plot_file = plot_file.replace(".html","_log.html")

    plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_density(samples,PI = []):
	data = []
	hist_data = [samples]
	group_labels = ['group1']
	KDE = ff.create_distplot(hist_data, group_labels,show_hist=False,show_rug=False)
	layout = go.Layout(
	autosize=False,
	width=500,
	height=500,
	margin=go.Margin(
	    l=50,
	    r=50,
	    b=100,
	    t=100,
	    pad=4
	),
	xaxis=dict(title='x'),
	yaxis=dict(title='PDF(x)')
	)
	trace1 = go.Scatter(
         y= KDE['data'][0]['y'],
         x = KDE['data'][0]['x'],
        mode = 'lines',line=dict(width=2),name='PDF',
        fill= None)
	data.append(trace1)
	if not PI:
			return go.Figure(data=data, layout=layout)
	### plot the PI interval
	y = KDE['data'][0]['y']
	x = KDE['data'][0]['x']
	df=pd.DataFrame(y,x,columns=['Y'])
	df.index.name = 'X'
	df=df.loc[(df.index> PI[0]) & (df.index < PI[1]),:]

	trace2 = go.Scatter(y= df.Y.values,x = df.index,mode = 'line',fill= 'tozeroy',  
		line=dict(width=0.0),name='PI' + ' [' + str(PI[0]) + ',' + str(PI[1]) + ']')
	data.append(trace2)
	return go.Figure(data=data, layout=layout)
def plot_1d_coupling_profile(couplings_per_pair, lower_cb_distance, upper_cb_distance, plot_file ):



    group_labels    = [key + "("+str(len(couplings_per_pair[key]))+")" for key in couplings_per_pair.keys()]
    hist_data       = couplings_per_pair.values()

    # Create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False)


    for trace in fig['data']:
        trace['line']['width'] = 2


    fig['layout']['font'] = dict(size = 16)
    fig['layout']['xaxis']['title'] = "couplings w_ijab for residue pairs ij at {0}Å < ΔCβ  < {1}Å".format(lower_cb_distance, upper_cb_distance)
    fig['layout']['xaxis']['range'] = [-1,1]
    fig['layout']['yaxis']['title'] = "Distribution of couplings "
    fig['layout']['margin']['t'] = 10


    plotly_plot(fig, filename=plot_file, auto_open=False)
import plotly.figure_factory as ff
import plotly.graph_objects as go
import statistics
import random
import pandas as pd
import csv

df = pd.read_csv("data.csv")
data = df["average"].tolist()

fig = ff.create_distplot([data],["average"],show_hist = False)
fig.show()

mean = statistics.mean(data)
std_deviation = statistics.stdev(data)

print("mean of samples: ", mean)
print("standard deviation of sample",std_deviation)
예제 #9
0
파일: tools.py 프로젝트: plotly/plotly.py
 def create_distplot(*args, **kwargs):
     FigureFactory._deprecated('create_distplot')
     from plotly.figure_factory import create_distplot
     return create_distplot(*args, **kwargs)
예제 #10
0
import plotly.figure_factory as pf
import statistics
import random
import pandas as pd
import csv

df = pd.read_csv('medium_data.csv')
data = df['reading_time'].tolist()
fig = pf.create_distplot([data], ["Reading Time"], show_hist=False)
fig.show()

print("Population Mean ", statistics.mean(data))


def randomSetOfMeans(counter):
    dataSet = []

    for i in range(0, counter):
        randomIndex = random.randint(0, len(data))
        value = data[randomIndex]
        dataSet.append(value)

    mean = statistics.mean(dataSet)
    return mean


def showFig(meanList):
    df = meanList
    fig = pf.create_distplot([df], ["Reading Time"], show_hist=False)
    fig.show()
예제 #11
0
def show_fig(mean_list): 
    df = mean_list 
    mean = statistics.mean(df) 
    fig = ff.create_distplot([df], ["average"], show_hist=False)
    fig.add_trace(go.Scatter(x=[mean, mean], y=[0, 1], mode="lines", name="MEAN")) 
    fig.show()
예제 #12
0
    dev = st.stdev(arr)
    return dev


def percent(arr, mean, std):
    m1 = int(round(mean - std * 2, 0))
    m2 = int(round(mean + std * 2, 0))
    count = 0
    for i in arr:
        if int(round(i)) in range(m1, m2):
            count += 1
    percent = (count / len(arr)) * 100

    return percent


mean = Mean(arr)
median = Median(arr)
mode = Mode(arr)
stdev = standardDev(arr)
per = percent(arr, mean, stdev)

print(f"Mean is {mean}")
print(f"Median is {median}")
print(f"Mode is {mode}")
print(f"Standard Deviation is {stdev}")
print(f"Percentage is {per}")

fig = ff.create_distplot([arr], ["Data"], show_hist=False)
fig.show()
예제 #13
0
파일: app.py 프로젝트: piosienk/2021L-WB-ML
from sklearn.model_selection import train_test_split
import xgboost as xgb
import plotly.figure_factory as ff
from functions import *

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

dt = pd.read_csv('all_data.csv')
dt = dt[[
    '(%)lymphocyte', 'High sensitivity C-reactive protein',
    'Lactate dehydrogenase', 'outcome'
]]

fig = ff.create_distplot([dt['(%)lymphocyte'].dropna().to_numpy()],
                         ['lymphocytes'])
fig2 = ff.create_distplot(
    [dt['High sensitivity C-reactive protein'].dropna().to_numpy()],
    ['hs-CRP'])
fig3 = ff.create_distplot([dt['Lactate dehydrogenase'].dropna().to_numpy()],
                          ['LDH'])

app.layout = html.Div(children=[
    html.H1(children='Interactive survival prediction model'),
    html.Div(children=[
        html.Label("Choose your patient's nationality:"),
        dcc.RadioItems(
            id='nation',
            options=[{
                'label': i,
                'value': i
예제 #14
0
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import streamlit as st
import numpy as np
from plotly import figure_factory

# Add histogram data
x1 = np.random.randn(200) - 2
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 2

# Group data together
hist_data = [x1, x2, x3]
group_labels = ["Group 1", "Group 2", "Group 3"]
bin_size = [0.1, 0.25, 0.5]

# Create distribution plot with custom bin_size
chart = figure_factory.create_distplot(hist_data, group_labels, bin_size)

# Plot!
st.plotly_chart(chart)
예제 #15
0
def _distplot(df):
    data = df.T.values
    labels = [str(i) for i in df.columns]
    result = ff.create_distplot(data, labels)
    return result["data"], result["layout"]
예제 #16
0
파일: C110.py 프로젝트: VISHVADHARMAN/C-110
def show_fig(mean_list):
    df = mean_list
    fig = ff.create_distplot([df], ["average"], show_hist=False)
    fig.show()
예제 #17
0
#Finding patterns between the sum of dice results
import random
import plotly.figure_factory as ff

count = []
diceResult = []

for i in range(0, 100):
    dice1 = random.randint(1, 6)
    dice2 = random.randint(1, 6)
    diceResult.append(dice1 + dice2)
    count.append(i)
fig = ff.create_distplot([diceResult], ["result"])
fig.show()
예제 #18
0
def main():
    st.button("Re-run")

    #set up layout
    st.title("Welcome to the pag3")
    st.markdown("Coming soon ... Sign up [here]() to get notified.")

    #### MAP OPEN-STREET MAP #####
    df = pd.DataFrame(np.random.randn(1000, 2) / [50, 50] + [37.76, -122.4],
                      columns=['lat', 'lon'])
    st.map(df)

    # #### GRAPH-VISUAL #####
    import graphviz as graphviz
    # Create a graphlib graph object
    graph = graphviz.Digraph()
    graph.edge('run', 'intr')
    graph.edge('intr', 'runbl')
    graph.edge('runbl', 'run')
    graph.edge('run', 'kernel')
    graph.edge('kernel', 'zombie')
    graph.edge('kernel', 'sleep')
    graph.edge('kernel', 'runmem')
    graph.edge('sleep', 'swap')
    graph.edge('swap', 'runswap')
    graph.edge('runswap', 'new')
    graph.edge('runswap', 'runmem')
    graph.edge('new', 'runmem')
    graph.edge('sleep', 'runmem')
    st.graphviz_chart(graph)

    # ### SINGLE-TABLE ####
    st.subheader('DATAFRAME')
    df = pd.DataFrame(np.random.randn(50, 20),
                      columns=('col %d' % i for i in range(20)))
    st.dataframe(df)  # Same as st.write(df)

    ### SINGLE-TABLE - yellowMAX ####
    st.subheader('DATAFRAME WITH MAX UNDERLIANED')
    df = pd.DataFrame(np.random.randn(10, 20),
                      columns=('col %d' % i for i in range(20)))
    st.dataframe(df.style.highlight_max(axis=0))

    ### STATIC TABLE ####
    #Display a static table.
    ''' This differs from st.dataframe in that the table in this case is static: 
    its entire contents are laid out directly on the page.'''
    st.subheader('TABLE')
    df = pd.DataFrame(np.random.randn(10, 5),
                      columns=('col %d' % i for i in range(5)))
    st.table(df)

    ### LINE CHART ####
    st.subheader('LINE CHART')
    chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c'])

    st.line_chart(chart_data)

    ### LINE CHART AREA####
    st.subheader('LINE CHART AREA')
    chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c'])

    st.area_chart(chart_data)

    #### BAR-CHART #####
    st.subheader('BAR CHART')
    chart_data = pd.DataFrame(np.random.randn(50, 3), columns=["a", "b", "c"])

    st.bar_chart(chart_data)

    #### HISTOGRAM-MATPLOTLIB #####
    import matplotlib.pyplot as plt
    st.subheader('HISTOGRAM MATPLOTLIB')

    arr = np.random.normal(1, 1, size=100)
    fig, ax = plt.subplots()
    ax.hist(arr, bins=20)
    st.pyplot(fig)

    #### scatter-ALTAIR #####
    st.subheader('SCATTER ALTAIR')
    import altair as alt
    df = pd.DataFrame(np.random.randn(200, 3), columns=['a', 'b', 'c'])

    c = alt.Chart(df).mark_circle().encode(x='a',
                                           y='b',
                                           size='c',
                                           color='c',
                                           tooltip=['a', 'b', 'c'])
    st.altair_chart(c, use_container_width=True)

    #### PLOTLY #########
    st.subheader('PLOTLY DIAGRAM')
    import plotly.figure_factory as ff
    # Add histogram data
    x1 = np.random.randn(200) - 2
    x2 = np.random.randn(200)
    x3 = np.random.randn(200) + 2

    # Group data together
    hist_data = [x1, x2, x3]

    group_labels = ['Group 1', 'Group 2', 'Group 3']

    # Create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, bin_size=[.1, .25, .5])
    # Plot!
    st.plotly_chart(fig, use_container_width=True)

    #### BOKEH #########
    st.subheader('BOKEH')
    from bokeh.plotting import figure
    x = [1, 2, 3, 4, 5]
    y = [6, 7, 2, 4, 5]

    p = figure(title='simple line example', x_axis_label='x', y_axis_label='y')
    p.line(x, y, line_width=2)
    st.bokeh_chart(p, use_container_width=True)

    #### Plotly 2 ################
    import plotly.express as px
    from plotly.subplots import make_subplots
    import plotly.graph_objects as go
    from numpy import random

    pts = 50
    x1 = np.arange(pts)
    y1 = np.random.rand(pts)
    y2 = np.random.rand(pts)
    y3 = (x1 / pts)**2

    fig = make_subplots(rows=1, cols=2)

    fig.add_trace(go.Scatter(x=x1, y=y1, mode='markers', name='markers'),
                  row=1,
                  col=1)
    fig.add_trace(go.Scatter(x=x1, y=y2, mode='markers', name='markers2'),
                  row=1,
                  col=2)
    fig.add_trace(go.Scatter(x=x1, y=y3, mode='lines', name='lines'),
                  row=1,
                  col=2)

    fig.update_layout(height=300,
                      width=800,
                      title_text="Side By Side Subplots")
    st.plotly_chart(fig)

    #### exagonbar-3d ###########################################
    st.subheader('exagon bar-3d')
    import pydeck as pdk
    df = pd.DataFrame(np.random.randn(1000, 2) / [50, 50] + [37.76, -122.4],
                      columns=['lat', 'lon'])

    st.pydeck_chart(
        pdk.Deck(
            map_style='mapbox://styles/mapbox/light-v9',
            initial_view_state=pdk.ViewState(
                latitude=37.76,
                longitude=-122.4,
                zoom=11,
                pitch=50,
            ),
            layers=[
                pdk.Layer(
                    'HexagonLayer',
                    data=df,
                    get_position='[lon, lat]',
                    radius=200,
                    elevation_scale=4,
                    elevation_range=[0, 1000],
                    pickable=True,
                    extruded=True,
                ),
                pdk.Layer(
                    'ScatterplotLayer',
                    data=df,
                    get_position='[lon, lat]',
                    get_color='[200, 30, 0, 160]',
                    get_radius=200,
                ),
            ],
        ))

    #### exagonbar-3d ACCIDENT ###########################################

    UK_ACCIDENTS_DATA = (
        'https://raw.githubusercontent.com/uber-common/'
        'deck.gl-data/master/examples/3d-heatmap/heatmap-data.csv')

    # Define a layer to display on a map
    layer = pdk.Layer('HexagonLayer',
                      UK_ACCIDENTS_DATA,
                      get_position=['lng', 'lat'],
                      auto_highlight=True,
                      elevation_scale=50,
                      pickable=True,
                      elevation_range=[0, 3000],
                      extruded=True,
                      coverage=1)

    # Set the viewport location
    view_state = pdk.ViewState(longitude=-1.415,
                               latitude=52.2323,
                               zoom=6,
                               min_zoom=5,
                               max_zoom=15,
                               pitch=40.5,
                               bearing=-27.36)
    # Render
    st.pydeck_chart(pdk.Deck(layers=[layer], initial_view_state=view_state))

    #### one-image ###########################################
    st.subheader('one-image from URL')
    from PIL import Image
    #image = Image.open('images/cat.jpg')
    image = 'https://static.streamlit.io/examples/cat.jpg'
    st.image(image, use_column_width=True)

    #### more-images###########################################
    st.subheader('more-images')
    col1, col2, col3 = st.beta_columns(3)
    with col1:
        st.header("A cat")
        st.image("https://static.streamlit.io/examples/cat.jpg",
                 use_column_width=True)

    with col2:
        st.header("A dog")
        st.image("https://static.streamlit.io/examples/dog.jpg",
                 use_column_width=True)

    with col3:
        st.header("An owl")
        st.image("https://static.streamlit.io/examples/owl.jpg",
                 use_column_width=True)

    #### display -code ###########################################
    st.subheader('display code')
    with st.echo():
        st.write('This code will be printed')

    show_footer()
예제 #19
0
import plotly.figure_factory as ff
import pandas as pd
import csv

df = pd.read_csv("data.csv")
fig = ff.create_distplot([df["Avg Rating"].tolist()], ["Avg Rating"])
fig.show()
예제 #20
0
import statistics
import random
import plotly.graph_objects as go

reader = pd.read_csv("temp.csv")
data = reader["temp"].tolist()

mean = statistics.mean(data)
std = statistics.stdev(data)

print(mean)
print(std)

data1 = []
for i in range(0, 1000):
    index = random.randint(0, len(data))
    value = data[index]
    data1.append(value)
print(data1)

mean1 = statistics.mean(data1)
std1 = statistics.stdev(data1)

print("mean of sample data = ", mean1)
print("std of smaple data = ", std1)

graph = ff.create_distplot([data1], ["temp"], show_hist=False)
graph.add_trace(
    go.Scatter(x=[mean, mean1], y=[0, 1], mode="lines", name="mean"))
graph.show()
예제 #21
0
import statistics
import plotly.figure_factory as ff

df = pd.read_csv("height-weight.csv")
heightlist = df["Height(Inches)"].to_list()
weightlist = df["Weight(Pounds)"].to_list()
heightmean = statistics.mean(heightlist)
weightmean = statistics.mean(weightlist)
heightmode = statistics.mode(heightlist)
weightmode = statistics.mode(weightlist)
heightmedian = statistics.median(heightlist)
weightmedian = statistics.median(weightlist)
print("mean,median,mode of height is: {} ,{} ,{} respectively".format(heightmean,heightmedian,heightmode) )
print("mean,median,mode of weight is: {} ,{} ,{} respectively".format(weightmean,weightmedian,weightmode)  )

fig = ff.create_distplot([heightlist],["result"],show_hist = False)
fig1 = ff.create_distplot([weightlist],["result"],show_hist = False)

fig.show()
fig1.show()

stdht = statistics.stdev(heightlist)
stdwt = statistics.stdev(weightlist)

print("the standard deviation of height and weight is: {},{} respectively".format(stdht,stdwt))
height1ststart,height1stend = heightmean - stdht,heightmean + stdht
height2ststart,height2stend = heightmean - (2*stdht),heightmean + (2*stdht)
height3ststart,height3stend = heightmean - (3*stdht),heightmean + (3*stdht)

weight1ststart,weight1stend = weightmean - stdwt,weightmean + stdwt
weight2ststart,weight2stend = weightmean - (2*stdwt),weightmean + (2*stdwt)
예제 #22
0
def update_graph(chart):
    trace1 = []
    for age in ['0-17', '18-25', '26-35', '36-45', '46-50', '51-55', '55+']:
        trace1.append(go.Box(
            y=df[df["Age Group"] == age]['Purchase'],
            name=age))

    layout1 = go.Layout(
        title="Purchase vs Age group",
        xaxis={"title": "Age Group"},
        yaxis={"title": "Sales ($)"},
        colorway=['#b2182b', '#ef8a62', '#fddbc7', '#E7E7E7', '#d1e5f0', '#67a9cf', '#2166ac']
    )
    df_bar = df.groupby(["Occupation"])['Purchase'].sum().reset_index()
    trace2 = [go.Bar(
        x=df_bar["Occupation"],
        y=df_bar["Purchase"],
        marker={'color': '#ef8a62', 'line': {'color': "#b2182b", 'width': 0.5}},
        opacity=0.6,
        error_y={'type': 'percent', 'value': 10}
    )]

    layout2 = go.Layout(
        title="Purchase vs Occupation Types",
        xaxis={"title": "Occupation Category (20 Types)"},
        yaxis={"title": "Sales ($)"}
    )

    trace3 = []
    for product in ['Product 1', 'Product 2', 'Product 3']:
        trace3.append(go.Histogram(
            x=df[product],
            name=product,
            xbins={
                "size": 3},
            opacity=0.8
        ))
    layout3 = go.Layout(
        title="Product Category Distribution",
        xaxis={"title": "Products"},
        yaxis={"title": "Frequency"},
        barmode='overlay',
        colorway=['#e9a3c9', '#ffffbf', '#a1d76a']
    )

    trace4 = [go.Histogram2d(x=df["Age Group"].sort_values(),
                             y=df['Purchase'],
                             histnorm='probability',
                             autobinx=False,
                             xbins={
                                 "size": 1},
                             autobiny=False,
                             ybins={
                                 "size": 1000},

                             colorscale=[[0, 'rgb(12,51,131)'],
                                         [0.25, 'rgb(10,136,186)'],
                                         [0.5, 'rgb(242,211,56)'],
                                         [0.75, 'rgb(242,143,56)'],
                                         [1, 'rgb(217,30,30)']]
                             )]
    layout4 = go.Layout(
        title="Sales vs Age Group distribution",
        xaxis={"title": "Age Group"},
        yaxis={"title": "Sales ($)"}
    )

    x1 = df[df["City"] == "A"]["Purchase"]
    x2 = df[df["City"] == "B"]["Purchase"]
    x3 = df[df["City"] == "C"]["Purchase"]

    hist_data = [x1, x2, x3]
    group_labels = ["City A", "City B", "City C"]

    figure5 = ff.create_distplot(hist_data, group_labels, bin_size=1000, colors=['#a6cee3', '#1f78b4', '#b2df8a'])
    figure5['layout'].update(title='Sales Distribution Over Cities',
                             # xaxis={"title":"Bins"},
                             yaxis={"title": " Probability Density of Sales"})

    trace6 = [
        {"type": 'violin',
         "x": df[df['Gender'] == 'M']["Resident of the current city"],
         "y": df[df['Gender'] == 'M']["Purchase"],
         "legendgroup": 'M',
         "scalegroup": 'M',
         "name": 'Male',
         "box": {
             "visible": True
         },
         "meanline": {
             "visible": True
         },
         "line": {
             "color": '#C1EC00'
         }},
        {"type": 'violin',
         "x": df[df['Gender'] == 'F']["Resident of the current city"],
         "y": df[df['Gender'] == 'F']["Purchase"],
         "legendgroup": 'F',
         "scalegroup": 'F',
         "name": 'Female',
         "box": {
             "visible": True
         },
         "meanline": {
             "visible": True
         },
         "line": {
             "color": '#EC7899'
         }}
    ]

    layout6 = go.Layout(
        title="Sales Distribution Over Duration of Stay",
        xaxis={"title": " Duration of Stay (years)"},
        yaxis={"title": "Sales ($)"},
        violinmode="group")

    if chart == "Box Plot":
        return {
            "data": trace1,
            "layout": layout1
        }
    elif chart == "Error Bar":
        return {
            "data": trace2,
            "layout": layout2
        }
    elif chart == "Histogram":
        return {
            "data": trace3,
            "layout": layout3
        }
    elif chart == "2D Histogram":
        return {
            "data": trace4,
            "layout": layout4
        }
    elif chart == "Distplot":
        return figure5

    else:
        return {
            "data": trace6,
            "layout": layout6
        }
예제 #23
0
파일: dataset.py 프로젝트: Shreesha-28/c110
import plotly.figure_factory as ff
import plotly.graph_objects as go
import statistics
import random
import pandas as pd
import csv

df = pd.read_csv("data.csv")
data = df["temp"].tolist()

fig = ff.create_distplot([data], ["temp"], show_hist=False)

fig.show()

mean = statistics.mean(data)
std_deviation = statistics.stdev(data)

print(mean, std_deviation)
예제 #24
0
import pandas as pd
import plotly.figure_factory as ff

df = pd.read_csv("data.csv")
fig = ff.create_distplot([df["Avg Rating"]], ["Average Rating"],
                         show_hist=True)
fig.show()
예제 #25
0
import random
import plotly.express as px
import plotly.figure_factory as ff

count = []
diceResult = []

for i in range(0,100):
    dice1 = random.randint(1,6)
    dice2 = random.randint(1,6)
    diceResult.append(dice1+dice2)
    count.append(i)

#fig = px.bar(x = diceResult, y = count)
fig = ff.create_distplot([diceResult],["Result"],show_hist = False)
fig.show()
예제 #26
0
import plotly.figure_factory as ff
import csv 
import pandas as pd 

df = pd.read_csv("data.csv") 
fig = ff.create_distplot([df["Weight(Pounds)"].tolist()], ["Weight"], show_hist= False)

fig.show()
예제 #27
0
def update_density(selected_group):
    if selected_group != 0:
        dense_plot = ff.create_distplot([vectors.iloc[:, selected_group - 1]],
                                        [str(selected_group)])
        dense_plot['layout'].update(title='<b>likelihood density</b>')
        return dense_plot
예제 #28
0
mean = statistics.mean(mean_list)
print("mean of sampling distribution:- ", mean)
print("Standard deviation of sampling distribution:- ", std_deviation)

## findig the standard deviation starting and ending values
first_std_deviation_start, first_std_deviation_end = mean - std_deviation, mean + std_deviation
second_std_deviation_start, second_std_deviation_end = mean - (
    2 * std_deviation), mean + (2 * std_deviation)
third_std_deviation_start, third_std_deviation_end = mean - (
    3 * std_deviation), mean + (3 * std_deviation)

df = pd.read_csv("data.csv")
data = df["average"].tolist()
mean_of_sample1 = statistics.mean(data)
print("Mean of sample1:- ", mean_of_sample1)
fig = ff.create_distplot([mean_list], ["average"], show_hist=False)
fig.add_trace(
    go.Scatter(x=[mean, mean], y=[0, 0.17], mode="lines", name="MEAN"))
fig.add_trace(
    go.Scatter(x=[mean_of_sample1, mean_of_sample1],
               y=[0, 0.17],
               mode="lines",
               name="Average of population"))
fig.add_trace(
    go.Scatter(x=[first_std_deviation_end, first_std_deviation_end],
               y=[0, 0.17],
               mode="lines",
               name="STANDARD DEVIATION 1 END"))
fig.add_trace(
    go.Scatter(x=[second_std_deviation_end, second_std_deviation_end],
               y=[0, 0.17],
예제 #29
0
import pandas as pd
import csv
import plotly.figure_factory as ff
df = pd.read_csv("bell.csv")
fig = ff.create_distplot([df["Avg Rating"].tolist()], ["Avg Rating"],
                         show_hist=False)
fig.show()
예제 #30
0
import os
import pandas as pd
import plotly.offline as pyo
import plotly.figure_factory as ff
import plotly.graph_objs as go

data_folder = os.path.join(os.path.dirname(os.path.dirname(__file__)),
                           'data_samples/Data')
data_file = os.path.join(data_folder, 'iris.csv')
df = pd.read_csv(data_file)

print(df['class'].unique())

x1 = df[df['class'] == 'Iris-setosa']['petal_length']
x2 = df[df['class'] == 'Iris-versicolor']['petal_length']
x3 = df[df['class'] == 'Iris-virginica']['petal_length']

hist_data = [x1, x2, x3]

group_labels = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

fig = ff.create_distplot(hist_data, group_labels)
pyo.plot(fig)
예제 #31
0
# Add labels
plt.title('Density plot and Histogram of number of words of abstract')
plt.xlabel('Count of documents')
plt.ylabel('Number of words')
plt.show()

# histogram
plt.hist(doc_length, color='blue', edgecolor='black', bins=size)
# Add labels
plt.title('Histogram of number of words of abstract')
plt.xlabel('Number of words')
plt.ylabel('Count of documents')
plt.show()

import plotly.figure_factory as ff
group_labels = ['distplot']  # name of the dataset
fig = ff.create_distplot([doc_length], group_labels)
py.plot(fig, filename='../schemas/preprocess_distplot_word_count.html')

# ======================================================================================================================
# Find the maximum length of abstract in the whole dataset
# ======================================================================================================================

print("Maximum length of abstract in the whole dataset",
      max(data['abstract'].apply(len)))  # 3011
print("Maximum length of title in the whole dataset",
      max(data['title'].apply(len)))
# Maximum length of abstract in the whole dataset 1059
# Maximum length of title in the whole dataset 34
예제 #32
0
 def create_distplot(*args, **kwargs):
     FigureFactory._deprecated('create_distplot')
     from plotly.figure_factory import create_distplot
     return create_distplot(*args, **kwargs)
#######
# This distplot demonstrates that random samples
# seldom fit a "normal" distribution.
######
import plotly.offline as pyo
import plotly.figure_factory as ff
import numpy as np

x1 = np.random.randn(200)-2
x2 = np.random.randn(200)
x3 = np.random.randn(200)+2
x4 = np.random.randn(200)+4

hist_data = [x1,x2,x3,x4]
group_labels = ['Group1','Group2','Group3','Group4']

fig = ff.create_distplot(hist_data, group_labels)
pyo.plot(fig, filename='multiset_distplot.html')
예제 #34
0
def updateGraph(dataFields:list, filterIndex:int, graphType:int,
				binSize:int):
	"""
	updates the graph based on the chosen data fields, data filters,
	graph type, and bin size (the latter if histogram is selected)
	"""

	# title of the graph, set to the filename for now
	title = 'Without filter'

	if len(dataFields) == 0:
		return go.Figure(layout=dict(title=title)) # empty graph

	if filterIndex is 0:
		fList = ['isMale']
	elif filterIndex is 1:
		fList = ['analyticMajor']
	elif filterIndex is 2:
		fList = ['nativeEnglish']
	# function which filters a piece of data
	# depending on the filters the user selected
	def dataFilter(data:dict) -> bool:
		for name in fList:
			return True

	filteredDataSet = tuple(filter(dataFilter, dataSet))

	if len(filteredDataSet) == 0:
		return go.Figure(layout=dict(title=title)) # empty graph

	# convert the data being plotted into numbers
	try:
		traceValues = [
			[ float(d[field]) for d in filteredDataSet ]
			for field in dataFields
			]
	except ValueError:
		return go.Figure(layout=dict(title="Error: Can't plot non-numeric data on a numeric axis."))

	# turn the position on the graph type slider into a graph type name
	graphType = GRAPHTYPE_CHOICES[graphType]

	if graphType == 'Histogram':

		out = ff.create_distplot(
			traceValues, dataFields,
			show_curve=False, show_rug=False, bin_size=binSize,
			)
		out.layout['title'] = title
		return out

	if graphType == 'Density Plot':

		out = ff.create_distplot(
			traceValues, dataFields,
			show_hist=False, show_rug=False,
			)
		out.layout['title'] = title
		return out

	layout = dict(title=title) # layout used by all of the graph types below

	if graphType == 'Violin Plot':

		traces = [
			dict(
				type='violin',
				name=field,
				y=values,
				)
			for field,values in zip(dataFields,traceValues)
			]

	elif graphType == 'Box Plot':

		traces = [
			go.Box(
				name=field,
				y=values,
				)
			for field,values in zip(dataFields,traceValues)
			]

	elif graphType == 'Dot Plot':

		traces = [
			dict(
				type='scatter',
				name=field,
				y=[d[field] for d in filteredDataSet],
				x=[d[DATA_IDFIELD] for d in filteredDataSet],
				mode='markers',
				)
			for field in dataFields
			]

		layout['xaxis'] = dict(
			title=DATA_IDFIELD,
			type='category',
			titlefont=dict(
				size=12,
				),
			)

	elif graphType == 'Bar Plot':

		traces = [
			go.Bar(
				name=field,
				y=[d[field] for d in filteredDataSet],
				x=[d[DATA_IDFIELD] for d in filteredDataSet],
				)
			for field in dataFields
			]

		layout['xaxis'] = dict(
			title=DATA_IDFIELD,
			type='category',
			titlefont=dict(
				size=12,
				),
			)

	return go.Figure(data=traces, layout=layout)
#######
# This distplot looks back at the Mark Twain/
# Quintus Curtius Snodgrass data and tries
# to compare them.
######
import plotly.offline as pyo
import plotly.figure_factory as ff

snodgrass = [.209,.205,.196,.210,.202,.207,.224,.223,.220,.201]
twain = [.225,.262,.217,.240,.230,.229,.235,.217]

hist_data = [snodgrass,twain]
group_labels = ['Snodgrass','Twain']

fig = ff.create_distplot(hist_data, group_labels, bin_size=[.005,.005])
pyo.plot(fig, filename='SnodgrassTwainDistplot.html')
예제 #36
0
import statistics as st
import csv
import plotly_express as px
import plotly.figure_factory as ff
import pandas as pd

df = pd.read_csv("StudentsPerformance.csv")
reading_score = df["reading score"].tolist()
fig = ff.create_distplot([reading_score], ["reading score"], show_hist=False)
fig.show()
mean = st.mean(reading_score)
print("mean:", mean)
std = st.stdev(reading_score)
print("standard deviation:", std)
SD1_START, SD1_END = mean - std, mean + std
SD2_START, SD2_END = mean - (2 * std), mean - (2 * std)
SD3_START, SD3_END = mean - (3 * std), mean - (3 * std)
listdata_1SD = [
    result for result in reading_score
    if result > SD1_START and result < SD1_END
]
listdata_2SD = [
    result for result in reading_score
    if result > SD2_START and result < SD2_END
]
listdata_3SD = [
    result for result in reading_score
    if result > SD3_START and result < SD3_END
]
print("{}% of data lies between 1SD".format(
    len(listdata_1SD) * 100.0 / len(reading_score)))
예제 #37
0
def genFigure(xCfgAirlines, xCfgLocations, xCfgAircraft, normalized):
########################################
########################################



    selectedAirports   = xCfgAirlines.get('airports', dataModule.Airports)
    selectedAirlines   = xCfgAirlines.get('airlines', dataModule.Airlines)
    # selectedAircraft   = xCfgAircraft.get('aircraft', dataModule.Aircraft)

    selectedAircraft = xCfgAircraft

    routes = dataModule.filterData(selectedAirports, selectedAirlines, selectedAircraft)


    # YlOrRd = cl.scales['9']['seq']['YlOrRd']
    # clrscale = cl.to_rgb(cl.interp( YlOrRd, 10 ))


    rangeData = []
    groupLabels = []
    for ac, df in routes.groupby('aircraft'):
        distances = df['distance'].values
        if len(distances) < 3: continue
        ac = ac.replace('Boeing ', 'B').replace('Airbus ', '').replace('McDonnell Douglas ', '').replace('Embraer ', 'E').replace('Aerospatiale/Alenia ','')
        groupLabels += [ac[0:20]] #max 20 characters
        m = np.mean(distances) if 'yes' in normalized else 1
        rangeData += [distances/m]


    fig = ff.create_distplot(rangeData, groupLabels, 
                bin_size=100, show_hist = False, show_rug = False, histnorm='probability') #density
    

    fig = fig.to_dict()


    for i, d in enumerate(fig['data']):
        d['opacity'] = 0.6
        d['selectgroup'] = i
        d['selectedpoints'] = [0]


    fig['layout'].update(  dict(
            title = 'Distances Flown by Aircraft Type',
            titlefont = {
                'size': 16,
                'color': '#a8a8a8',
                'family': 'Open Sans'
                },
            font = {'color': '#fff',},
            xaxis = dict(
                type='log',
                showgrid=True,
                gridcolor='rgba(255,255,255,.2)',
                tickfont={'color':'white'},
                title= 'Normalized Distance', 
                titlefont= {'color': '#a8a8a8'}),
            yaxis=dict(
                showgrid=False,
                showticklabels=True,
                ticks='',
                tickfont={'color':'white'},
                visibile=True,
                title= 'Prob. Density', 
                titlefont={'color':'#a8a8a8'}
                ),
            paper_bgcolor='rgba(0,0,0,0)',
            plot_bgcolor='rgba(0,0,0,0)',
            margin={'t': 40, 'b':40 , 'r':0, 'l': 50, 'pad': 1},
            legend={'orientation':'v', 'xanchor': 'left', 'x': 1},
            ))


    return fig
예제 #38
0

def normalize_series(dframe, series_col_index):
    y = dframe[series_col_index]
    log_y = np.log(y)
    diff_log_y = log_y - np.average(log_y)
    var_log_y = sum(diff_log_y**2) / len(y)
    stdev_log_y = np.sqrt(var_log_y)
    cnt = 0
    for i in range(0, len(log_y)):
        if (abs(diff_log_y[i]) > (4 * stdev_log_y)):
            cnt += 1
            if (i > 5):
                y.iloc[i] = y.iloc[i - 1]
            else:
                y.iloc[i] = y.iloc[i + 1]
    #print('cleaned:  ', cnt)


dframe = pd.read_csv(sys.argv[1])  #index_col=0
hist_data = [
    dframe['1thread'], dframe['2thread'], dframe['3thread'], dframe['4thread'],
    dframe['5thread'], dframe['6thread'], dframe['7thread'], dframe['8thread']
]
group_labels = [
    'Single_Threaded', '2_Threads', '3_Threads', '4_Threads', '5_Threads',
    '6_Threads', '7_Threads', '8_Threads'
]
fig = ff.create_distplot(hist_data, group_labels, show_hist=False)
div1 = plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')
print(div1)
예제 #39
0
def show_fig(mean_list):
    df = mean_list
    fig = ff.create_distplot([df], ["reading_time"])
    fig.show()
예제 #40
0
def showFig(meanList):
    df = meanList
    fig = pf.create_distplot([df], ["Reading Time"], show_hist=False)
    fig.show()