def kamervragen_reply_time_per_year(years, kamervraag_durations): fig = ff.create_distplot( kamervraag_durations, years, # colors=colors, bin_size=1, show_curve=True, show_hist=False, show_rug=False ) xaxis = XAxis(range=[0, 60]) fig['layout'].update(xaxis=xaxis) # fig['layout'].update(title="Kamervraag Antwoordtijd per Ministerie tijdens Rutte-II (KDE probability distributie)") fig['layout'].update(xaxis=dict(title='Antwoordtijd [dagen]')) # fig['layout'].update(yaxis=dict(title='')) fig['layout'].update(height=700) fig['layout'].update(margin=Margin(t=20)) legend = dict( # x=0.01, # y=1, bordercolor='#E2E2E2', bgcolor='#FFFFFF', borderwidth=2 ) fig['layout'].update(legend=legend) return Plot.create_plot_html_default(figure_or_data=fig)
def plot_1d_coupling_profile(couplings_per_bin, plot_dir, ab): group_labels = [ str(bindict['lower']) + "Å < ΔCβ < " + str(bindict['upper']) + "Å" for binname, bindict in sorted(couplings_per_bin.iteritems(), reverse=True)] hist_data = [bindict['couplings'] for binname, bindict in sorted(couplings_per_bin.iteritems(), reverse=True)] nr_datapoints = int(np.round(np.mean([len(x) for x in hist_data]), decimals=-2)) # Create distplot with custom bin_size fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False) for trace in fig['data']: trace['line']['width'] = 2 fig['layout']['font'] = dict(size = 16) fig['layout']['xaxis']['title'] = "couplings w_ij("+ab+")" fig['layout']['xaxis']['range'] = [-1,1] fig['layout']['yaxis']['title'] = "Distribution of couplings for " + ab fig['layout']['margin']['t'] = 10 plot_name = plot_dir + "/1d_coupling_profile_"+ ab + "_avgdatapoints"+str(nr_datapoints)+".html" plotly_plot(fig, filename=plot_name, auto_open=False)
def plot_density(protein, bqij_data, plot_dir): group_labels = [key for key in sorted(bqij_data.keys()) if key != "L"] L = bqij_data['L'] hist_data = [] data=[] for group in group_labels: bqij_file = bqij_data[group] Nij, qij = io.read_qij(bqij_file, bqij_data['L']) data_group = qij[np.triu_indices(n=L, k=1)].flatten() hist_data.append(data_group) data.append( go.Histogram( x=data_group, histnorm='probability', name=group, xbins=dict( start=-0.1, end=1, size=0.005 ), opacity=0.75 ) ) # Create distplot with custom bin_size fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False) fig['layout']['font'] = dict(size = 18) fig['layout']['xaxis']['title'] = "q_ijab" plot_file = plot_dir + "/" + protein + "_distribution_qijab" + ".html" plotly_plot(fig, filename=plot_file, auto_open=False) #create histogram plot_file = plot_dir + "/" + protein + "_histogram_qijab" + ".html" layout = go.Layout( barmode='overlay', xaxis=dict( title="q_ijab", exponentformat="e", showexponent='All' ), yaxis=dict( exponentformat="e", showexponent='All' ), font=dict(size = 18) ) fig = go.Figure(data=data, layout=layout) plotly_plot(fig, filename=plot_file, auto_open=False)
def set_display_children(slct_row, data, columns): dt_tmp = pd.DataFrame(data, columns=[c['id'] for c in columns]) idx = dt_tmp.iloc[list(slct_row)]['idx'] fig = ff.create_distplot([dt_ni[i] for i in idx], [str(i) for i in idx], bin_size=.5, show_rug=False, show_curve=False) fig.layout.update(title='Histogram of Mean Return') fig.layout.xaxis.update({'title': 'Mean Return (In Dollar$) '}) return dcc.Graph( figure=fig, id='my-graph_rec' )
def plot_distance_distribution(distances_ab, ab, distance_definition, log, plot_dir): group_labels = ["sequence separation " + str(seq_sep) for seq_sep, values in sorted(distances_ab.iteritems())] hist_data = [np.array(values[ab])[~np.isnan(values[ab])] for seq_sep, values in sorted(distances_ab.iteritems())] if log: hist_data = [ np.log(np.array(values[ab]))[~np.isnan(values[ab])] for seq_sep, values in sorted(distances_ab.iteritems())] # Create distplot with custom bin_size fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False) for trace in fig['data']: trace['line']['width'] = 2 if log: trace['text'] = ['Cb distance: ' + str(x) for x in np.exp(trace['x'])] else: trace['text'] = ['Cb distance: ' + str(x) for x in trace['x']] trace['hoverinfo'] = "text" residues = ab[0] + " and " + ab[2] if ab == 'all': residues = "residue pair" fig['layout']['font'] = dict(size = 16) fig['layout']['xaxis']['title'] = distance_definition + " distance between " + residues fig['layout']['xaxis']['showspikes'] = True fig['layout']['yaxis']['title'] = "Distribution of " + residues + " distances ("+distance_definition+")" fig['layout']['yaxis']['showspikes'] = True fig['layout']['xaxis']['range'] = [3,100] fig['layout']['xaxis']['tickangle'] = 0 fig['layout']['margin']['t'] = 10 plot_file = plot_dir + "/" + distance_definition + "_distribution_" + ab + "_data" + str(int(np.mean([len(h) for h in hist_data])))+".html" if log: fig['layout']['xaxis']['tickmode'] = "array" fig['layout']['xaxis']['ticktext'] = [3,4,5,6,8,10,12,15,20,30,40,50,70,80] fig['layout']['xaxis']['tickvals'] = np.log(fig['layout']['xaxis']['ticktext']) fig['layout']['xaxis']['range'] = np.log([3,100]) plot_file = plot_file.replace(".html","_log.html") plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_density(samples,PI = []): data = [] hist_data = [samples] group_labels = ['group1'] KDE = ff.create_distplot(hist_data, group_labels,show_hist=False,show_rug=False) layout = go.Layout( autosize=False, width=500, height=500, margin=go.Margin( l=50, r=50, b=100, t=100, pad=4 ), xaxis=dict(title='x'), yaxis=dict(title='PDF(x)') ) trace1 = go.Scatter( y= KDE['data'][0]['y'], x = KDE['data'][0]['x'], mode = 'lines',line=dict(width=2),name='PDF', fill= None) data.append(trace1) if not PI: return go.Figure(data=data, layout=layout) ### plot the PI interval y = KDE['data'][0]['y'] x = KDE['data'][0]['x'] df=pd.DataFrame(y,x,columns=['Y']) df.index.name = 'X' df=df.loc[(df.index> PI[0]) & (df.index < PI[1]),:] trace2 = go.Scatter(y= df.Y.values,x = df.index,mode = 'line',fill= 'tozeroy', line=dict(width=0.0),name='PI' + ' [' + str(PI[0]) + ',' + str(PI[1]) + ']') data.append(trace2) return go.Figure(data=data, layout=layout)
def plot_1d_coupling_profile(couplings_per_pair, lower_cb_distance, upper_cb_distance, plot_file ): group_labels = [key + "("+str(len(couplings_per_pair[key]))+")" for key in couplings_per_pair.keys()] hist_data = couplings_per_pair.values() # Create distplot with custom bin_size fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False) for trace in fig['data']: trace['line']['width'] = 2 fig['layout']['font'] = dict(size = 16) fig['layout']['xaxis']['title'] = "couplings w_ijab for residue pairs ij at {0}Å < ΔCβ < {1}Å".format(lower_cb_distance, upper_cb_distance) fig['layout']['xaxis']['range'] = [-1,1] fig['layout']['yaxis']['title'] = "Distribution of couplings " fig['layout']['margin']['t'] = 10 plotly_plot(fig, filename=plot_file, auto_open=False)
import plotly.figure_factory as ff import plotly.graph_objects as go import statistics import random import pandas as pd import csv df = pd.read_csv("data.csv") data = df["average"].tolist() fig = ff.create_distplot([data],["average"],show_hist = False) fig.show() mean = statistics.mean(data) std_deviation = statistics.stdev(data) print("mean of samples: ", mean) print("standard deviation of sample",std_deviation)
def create_distplot(*args, **kwargs): FigureFactory._deprecated('create_distplot') from plotly.figure_factory import create_distplot return create_distplot(*args, **kwargs)
import plotly.figure_factory as pf import statistics import random import pandas as pd import csv df = pd.read_csv('medium_data.csv') data = df['reading_time'].tolist() fig = pf.create_distplot([data], ["Reading Time"], show_hist=False) fig.show() print("Population Mean ", statistics.mean(data)) def randomSetOfMeans(counter): dataSet = [] for i in range(0, counter): randomIndex = random.randint(0, len(data)) value = data[randomIndex] dataSet.append(value) mean = statistics.mean(dataSet) return mean def showFig(meanList): df = meanList fig = pf.create_distplot([df], ["Reading Time"], show_hist=False) fig.show()
def show_fig(mean_list): df = mean_list mean = statistics.mean(df) fig = ff.create_distplot([df], ["average"], show_hist=False) fig.add_trace(go.Scatter(x=[mean, mean], y=[0, 1], mode="lines", name="MEAN")) fig.show()
dev = st.stdev(arr) return dev def percent(arr, mean, std): m1 = int(round(mean - std * 2, 0)) m2 = int(round(mean + std * 2, 0)) count = 0 for i in arr: if int(round(i)) in range(m1, m2): count += 1 percent = (count / len(arr)) * 100 return percent mean = Mean(arr) median = Median(arr) mode = Mode(arr) stdev = standardDev(arr) per = percent(arr, mean, stdev) print(f"Mean is {mean}") print(f"Median is {median}") print(f"Mode is {mode}") print(f"Standard Deviation is {stdev}") print(f"Percentage is {per}") fig = ff.create_distplot([arr], ["Data"], show_hist=False) fig.show()
from sklearn.model_selection import train_test_split import xgboost as xgb import plotly.figure_factory as ff from functions import * external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] app = dash.Dash(__name__, external_stylesheets=external_stylesheets) dt = pd.read_csv('all_data.csv') dt = dt[[ '(%)lymphocyte', 'High sensitivity C-reactive protein', 'Lactate dehydrogenase', 'outcome' ]] fig = ff.create_distplot([dt['(%)lymphocyte'].dropna().to_numpy()], ['lymphocytes']) fig2 = ff.create_distplot( [dt['High sensitivity C-reactive protein'].dropna().to_numpy()], ['hs-CRP']) fig3 = ff.create_distplot([dt['Lactate dehydrogenase'].dropna().to_numpy()], ['LDH']) app.layout = html.Div(children=[ html.H1(children='Interactive survival prediction model'), html.Div(children=[ html.Label("Choose your patient's nationality:"), dcc.RadioItems( id='nation', options=[{ 'label': i, 'value': i
# you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import streamlit as st import numpy as np from plotly import figure_factory # Add histogram data x1 = np.random.randn(200) - 2 x2 = np.random.randn(200) x3 = np.random.randn(200) + 2 # Group data together hist_data = [x1, x2, x3] group_labels = ["Group 1", "Group 2", "Group 3"] bin_size = [0.1, 0.25, 0.5] # Create distribution plot with custom bin_size chart = figure_factory.create_distplot(hist_data, group_labels, bin_size) # Plot! st.plotly_chart(chart)
def _distplot(df): data = df.T.values labels = [str(i) for i in df.columns] result = ff.create_distplot(data, labels) return result["data"], result["layout"]
def show_fig(mean_list): df = mean_list fig = ff.create_distplot([df], ["average"], show_hist=False) fig.show()
#Finding patterns between the sum of dice results import random import plotly.figure_factory as ff count = [] diceResult = [] for i in range(0, 100): dice1 = random.randint(1, 6) dice2 = random.randint(1, 6) diceResult.append(dice1 + dice2) count.append(i) fig = ff.create_distplot([diceResult], ["result"]) fig.show()
def main(): st.button("Re-run") #set up layout st.title("Welcome to the pag3") st.markdown("Coming soon ... Sign up [here]() to get notified.") #### MAP OPEN-STREET MAP ##### df = pd.DataFrame(np.random.randn(1000, 2) / [50, 50] + [37.76, -122.4], columns=['lat', 'lon']) st.map(df) # #### GRAPH-VISUAL ##### import graphviz as graphviz # Create a graphlib graph object graph = graphviz.Digraph() graph.edge('run', 'intr') graph.edge('intr', 'runbl') graph.edge('runbl', 'run') graph.edge('run', 'kernel') graph.edge('kernel', 'zombie') graph.edge('kernel', 'sleep') graph.edge('kernel', 'runmem') graph.edge('sleep', 'swap') graph.edge('swap', 'runswap') graph.edge('runswap', 'new') graph.edge('runswap', 'runmem') graph.edge('new', 'runmem') graph.edge('sleep', 'runmem') st.graphviz_chart(graph) # ### SINGLE-TABLE #### st.subheader('DATAFRAME') df = pd.DataFrame(np.random.randn(50, 20), columns=('col %d' % i for i in range(20))) st.dataframe(df) # Same as st.write(df) ### SINGLE-TABLE - yellowMAX #### st.subheader('DATAFRAME WITH MAX UNDERLIANED') df = pd.DataFrame(np.random.randn(10, 20), columns=('col %d' % i for i in range(20))) st.dataframe(df.style.highlight_max(axis=0)) ### STATIC TABLE #### #Display a static table. ''' This differs from st.dataframe in that the table in this case is static: its entire contents are laid out directly on the page.''' st.subheader('TABLE') df = pd.DataFrame(np.random.randn(10, 5), columns=('col %d' % i for i in range(5))) st.table(df) ### LINE CHART #### st.subheader('LINE CHART') chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c']) st.line_chart(chart_data) ### LINE CHART AREA#### st.subheader('LINE CHART AREA') chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c']) st.area_chart(chart_data) #### BAR-CHART ##### st.subheader('BAR CHART') chart_data = pd.DataFrame(np.random.randn(50, 3), columns=["a", "b", "c"]) st.bar_chart(chart_data) #### HISTOGRAM-MATPLOTLIB ##### import matplotlib.pyplot as plt st.subheader('HISTOGRAM MATPLOTLIB') arr = np.random.normal(1, 1, size=100) fig, ax = plt.subplots() ax.hist(arr, bins=20) st.pyplot(fig) #### scatter-ALTAIR ##### st.subheader('SCATTER ALTAIR') import altair as alt df = pd.DataFrame(np.random.randn(200, 3), columns=['a', 'b', 'c']) c = alt.Chart(df).mark_circle().encode(x='a', y='b', size='c', color='c', tooltip=['a', 'b', 'c']) st.altair_chart(c, use_container_width=True) #### PLOTLY ######### st.subheader('PLOTLY DIAGRAM') import plotly.figure_factory as ff # Add histogram data x1 = np.random.randn(200) - 2 x2 = np.random.randn(200) x3 = np.random.randn(200) + 2 # Group data together hist_data = [x1, x2, x3] group_labels = ['Group 1', 'Group 2', 'Group 3'] # Create distplot with custom bin_size fig = ff.create_distplot(hist_data, group_labels, bin_size=[.1, .25, .5]) # Plot! st.plotly_chart(fig, use_container_width=True) #### BOKEH ######### st.subheader('BOKEH') from bokeh.plotting import figure x = [1, 2, 3, 4, 5] y = [6, 7, 2, 4, 5] p = figure(title='simple line example', x_axis_label='x', y_axis_label='y') p.line(x, y, line_width=2) st.bokeh_chart(p, use_container_width=True) #### Plotly 2 ################ import plotly.express as px from plotly.subplots import make_subplots import plotly.graph_objects as go from numpy import random pts = 50 x1 = np.arange(pts) y1 = np.random.rand(pts) y2 = np.random.rand(pts) y3 = (x1 / pts)**2 fig = make_subplots(rows=1, cols=2) fig.add_trace(go.Scatter(x=x1, y=y1, mode='markers', name='markers'), row=1, col=1) fig.add_trace(go.Scatter(x=x1, y=y2, mode='markers', name='markers2'), row=1, col=2) fig.add_trace(go.Scatter(x=x1, y=y3, mode='lines', name='lines'), row=1, col=2) fig.update_layout(height=300, width=800, title_text="Side By Side Subplots") st.plotly_chart(fig) #### exagonbar-3d ########################################### st.subheader('exagon bar-3d') import pydeck as pdk df = pd.DataFrame(np.random.randn(1000, 2) / [50, 50] + [37.76, -122.4], columns=['lat', 'lon']) st.pydeck_chart( pdk.Deck( map_style='mapbox://styles/mapbox/light-v9', initial_view_state=pdk.ViewState( latitude=37.76, longitude=-122.4, zoom=11, pitch=50, ), layers=[ pdk.Layer( 'HexagonLayer', data=df, get_position='[lon, lat]', radius=200, elevation_scale=4, elevation_range=[0, 1000], pickable=True, extruded=True, ), pdk.Layer( 'ScatterplotLayer', data=df, get_position='[lon, lat]', get_color='[200, 30, 0, 160]', get_radius=200, ), ], )) #### exagonbar-3d ACCIDENT ########################################### UK_ACCIDENTS_DATA = ( 'https://raw.githubusercontent.com/uber-common/' 'deck.gl-data/master/examples/3d-heatmap/heatmap-data.csv') # Define a layer to display on a map layer = pdk.Layer('HexagonLayer', UK_ACCIDENTS_DATA, get_position=['lng', 'lat'], auto_highlight=True, elevation_scale=50, pickable=True, elevation_range=[0, 3000], extruded=True, coverage=1) # Set the viewport location view_state = pdk.ViewState(longitude=-1.415, latitude=52.2323, zoom=6, min_zoom=5, max_zoom=15, pitch=40.5, bearing=-27.36) # Render st.pydeck_chart(pdk.Deck(layers=[layer], initial_view_state=view_state)) #### one-image ########################################### st.subheader('one-image from URL') from PIL import Image #image = Image.open('images/cat.jpg') image = 'https://static.streamlit.io/examples/cat.jpg' st.image(image, use_column_width=True) #### more-images########################################### st.subheader('more-images') col1, col2, col3 = st.beta_columns(3) with col1: st.header("A cat") st.image("https://static.streamlit.io/examples/cat.jpg", use_column_width=True) with col2: st.header("A dog") st.image("https://static.streamlit.io/examples/dog.jpg", use_column_width=True) with col3: st.header("An owl") st.image("https://static.streamlit.io/examples/owl.jpg", use_column_width=True) #### display -code ########################################### st.subheader('display code') with st.echo(): st.write('This code will be printed') show_footer()
import plotly.figure_factory as ff import pandas as pd import csv df = pd.read_csv("data.csv") fig = ff.create_distplot([df["Avg Rating"].tolist()], ["Avg Rating"]) fig.show()
import statistics import random import plotly.graph_objects as go reader = pd.read_csv("temp.csv") data = reader["temp"].tolist() mean = statistics.mean(data) std = statistics.stdev(data) print(mean) print(std) data1 = [] for i in range(0, 1000): index = random.randint(0, len(data)) value = data[index] data1.append(value) print(data1) mean1 = statistics.mean(data1) std1 = statistics.stdev(data1) print("mean of sample data = ", mean1) print("std of smaple data = ", std1) graph = ff.create_distplot([data1], ["temp"], show_hist=False) graph.add_trace( go.Scatter(x=[mean, mean1], y=[0, 1], mode="lines", name="mean")) graph.show()
import statistics import plotly.figure_factory as ff df = pd.read_csv("height-weight.csv") heightlist = df["Height(Inches)"].to_list() weightlist = df["Weight(Pounds)"].to_list() heightmean = statistics.mean(heightlist) weightmean = statistics.mean(weightlist) heightmode = statistics.mode(heightlist) weightmode = statistics.mode(weightlist) heightmedian = statistics.median(heightlist) weightmedian = statistics.median(weightlist) print("mean,median,mode of height is: {} ,{} ,{} respectively".format(heightmean,heightmedian,heightmode) ) print("mean,median,mode of weight is: {} ,{} ,{} respectively".format(weightmean,weightmedian,weightmode) ) fig = ff.create_distplot([heightlist],["result"],show_hist = False) fig1 = ff.create_distplot([weightlist],["result"],show_hist = False) fig.show() fig1.show() stdht = statistics.stdev(heightlist) stdwt = statistics.stdev(weightlist) print("the standard deviation of height and weight is: {},{} respectively".format(stdht,stdwt)) height1ststart,height1stend = heightmean - stdht,heightmean + stdht height2ststart,height2stend = heightmean - (2*stdht),heightmean + (2*stdht) height3ststart,height3stend = heightmean - (3*stdht),heightmean + (3*stdht) weight1ststart,weight1stend = weightmean - stdwt,weightmean + stdwt weight2ststart,weight2stend = weightmean - (2*stdwt),weightmean + (2*stdwt)
def update_graph(chart): trace1 = [] for age in ['0-17', '18-25', '26-35', '36-45', '46-50', '51-55', '55+']: trace1.append(go.Box( y=df[df["Age Group"] == age]['Purchase'], name=age)) layout1 = go.Layout( title="Purchase vs Age group", xaxis={"title": "Age Group"}, yaxis={"title": "Sales ($)"}, colorway=['#b2182b', '#ef8a62', '#fddbc7', '#E7E7E7', '#d1e5f0', '#67a9cf', '#2166ac'] ) df_bar = df.groupby(["Occupation"])['Purchase'].sum().reset_index() trace2 = [go.Bar( x=df_bar["Occupation"], y=df_bar["Purchase"], marker={'color': '#ef8a62', 'line': {'color': "#b2182b", 'width': 0.5}}, opacity=0.6, error_y={'type': 'percent', 'value': 10} )] layout2 = go.Layout( title="Purchase vs Occupation Types", xaxis={"title": "Occupation Category (20 Types)"}, yaxis={"title": "Sales ($)"} ) trace3 = [] for product in ['Product 1', 'Product 2', 'Product 3']: trace3.append(go.Histogram( x=df[product], name=product, xbins={ "size": 3}, opacity=0.8 )) layout3 = go.Layout( title="Product Category Distribution", xaxis={"title": "Products"}, yaxis={"title": "Frequency"}, barmode='overlay', colorway=['#e9a3c9', '#ffffbf', '#a1d76a'] ) trace4 = [go.Histogram2d(x=df["Age Group"].sort_values(), y=df['Purchase'], histnorm='probability', autobinx=False, xbins={ "size": 1}, autobiny=False, ybins={ "size": 1000}, colorscale=[[0, 'rgb(12,51,131)'], [0.25, 'rgb(10,136,186)'], [0.5, 'rgb(242,211,56)'], [0.75, 'rgb(242,143,56)'], [1, 'rgb(217,30,30)']] )] layout4 = go.Layout( title="Sales vs Age Group distribution", xaxis={"title": "Age Group"}, yaxis={"title": "Sales ($)"} ) x1 = df[df["City"] == "A"]["Purchase"] x2 = df[df["City"] == "B"]["Purchase"] x3 = df[df["City"] == "C"]["Purchase"] hist_data = [x1, x2, x3] group_labels = ["City A", "City B", "City C"] figure5 = ff.create_distplot(hist_data, group_labels, bin_size=1000, colors=['#a6cee3', '#1f78b4', '#b2df8a']) figure5['layout'].update(title='Sales Distribution Over Cities', # xaxis={"title":"Bins"}, yaxis={"title": " Probability Density of Sales"}) trace6 = [ {"type": 'violin', "x": df[df['Gender'] == 'M']["Resident of the current city"], "y": df[df['Gender'] == 'M']["Purchase"], "legendgroup": 'M', "scalegroup": 'M', "name": 'Male', "box": { "visible": True }, "meanline": { "visible": True }, "line": { "color": '#C1EC00' }}, {"type": 'violin', "x": df[df['Gender'] == 'F']["Resident of the current city"], "y": df[df['Gender'] == 'F']["Purchase"], "legendgroup": 'F', "scalegroup": 'F', "name": 'Female', "box": { "visible": True }, "meanline": { "visible": True }, "line": { "color": '#EC7899' }} ] layout6 = go.Layout( title="Sales Distribution Over Duration of Stay", xaxis={"title": " Duration of Stay (years)"}, yaxis={"title": "Sales ($)"}, violinmode="group") if chart == "Box Plot": return { "data": trace1, "layout": layout1 } elif chart == "Error Bar": return { "data": trace2, "layout": layout2 } elif chart == "Histogram": return { "data": trace3, "layout": layout3 } elif chart == "2D Histogram": return { "data": trace4, "layout": layout4 } elif chart == "Distplot": return figure5 else: return { "data": trace6, "layout": layout6 }
import plotly.figure_factory as ff import plotly.graph_objects as go import statistics import random import pandas as pd import csv df = pd.read_csv("data.csv") data = df["temp"].tolist() fig = ff.create_distplot([data], ["temp"], show_hist=False) fig.show() mean = statistics.mean(data) std_deviation = statistics.stdev(data) print(mean, std_deviation)
import pandas as pd import plotly.figure_factory as ff df = pd.read_csv("data.csv") fig = ff.create_distplot([df["Avg Rating"]], ["Average Rating"], show_hist=True) fig.show()
import random import plotly.express as px import plotly.figure_factory as ff count = [] diceResult = [] for i in range(0,100): dice1 = random.randint(1,6) dice2 = random.randint(1,6) diceResult.append(dice1+dice2) count.append(i) #fig = px.bar(x = diceResult, y = count) fig = ff.create_distplot([diceResult],["Result"],show_hist = False) fig.show()
import plotly.figure_factory as ff import csv import pandas as pd df = pd.read_csv("data.csv") fig = ff.create_distplot([df["Weight(Pounds)"].tolist()], ["Weight"], show_hist= False) fig.show()
def update_density(selected_group): if selected_group != 0: dense_plot = ff.create_distplot([vectors.iloc[:, selected_group - 1]], [str(selected_group)]) dense_plot['layout'].update(title='<b>likelihood density</b>') return dense_plot
mean = statistics.mean(mean_list) print("mean of sampling distribution:- ", mean) print("Standard deviation of sampling distribution:- ", std_deviation) ## findig the standard deviation starting and ending values first_std_deviation_start, first_std_deviation_end = mean - std_deviation, mean + std_deviation second_std_deviation_start, second_std_deviation_end = mean - ( 2 * std_deviation), mean + (2 * std_deviation) third_std_deviation_start, third_std_deviation_end = mean - ( 3 * std_deviation), mean + (3 * std_deviation) df = pd.read_csv("data.csv") data = df["average"].tolist() mean_of_sample1 = statistics.mean(data) print("Mean of sample1:- ", mean_of_sample1) fig = ff.create_distplot([mean_list], ["average"], show_hist=False) fig.add_trace( go.Scatter(x=[mean, mean], y=[0, 0.17], mode="lines", name="MEAN")) fig.add_trace( go.Scatter(x=[mean_of_sample1, mean_of_sample1], y=[0, 0.17], mode="lines", name="Average of population")) fig.add_trace( go.Scatter(x=[first_std_deviation_end, first_std_deviation_end], y=[0, 0.17], mode="lines", name="STANDARD DEVIATION 1 END")) fig.add_trace( go.Scatter(x=[second_std_deviation_end, second_std_deviation_end], y=[0, 0.17],
import pandas as pd import csv import plotly.figure_factory as ff df = pd.read_csv("bell.csv") fig = ff.create_distplot([df["Avg Rating"].tolist()], ["Avg Rating"], show_hist=False) fig.show()
import os import pandas as pd import plotly.offline as pyo import plotly.figure_factory as ff import plotly.graph_objs as go data_folder = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data_samples/Data') data_file = os.path.join(data_folder, 'iris.csv') df = pd.read_csv(data_file) print(df['class'].unique()) x1 = df[df['class'] == 'Iris-setosa']['petal_length'] x2 = df[df['class'] == 'Iris-versicolor']['petal_length'] x3 = df[df['class'] == 'Iris-virginica']['petal_length'] hist_data = [x1, x2, x3] group_labels = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'] fig = ff.create_distplot(hist_data, group_labels) pyo.plot(fig)
# Add labels plt.title('Density plot and Histogram of number of words of abstract') plt.xlabel('Count of documents') plt.ylabel('Number of words') plt.show() # histogram plt.hist(doc_length, color='blue', edgecolor='black', bins=size) # Add labels plt.title('Histogram of number of words of abstract') plt.xlabel('Number of words') plt.ylabel('Count of documents') plt.show() import plotly.figure_factory as ff group_labels = ['distplot'] # name of the dataset fig = ff.create_distplot([doc_length], group_labels) py.plot(fig, filename='../schemas/preprocess_distplot_word_count.html') # ====================================================================================================================== # Find the maximum length of abstract in the whole dataset # ====================================================================================================================== print("Maximum length of abstract in the whole dataset", max(data['abstract'].apply(len))) # 3011 print("Maximum length of title in the whole dataset", max(data['title'].apply(len))) # Maximum length of abstract in the whole dataset 1059 # Maximum length of title in the whole dataset 34
####### # This distplot demonstrates that random samples # seldom fit a "normal" distribution. ###### import plotly.offline as pyo import plotly.figure_factory as ff import numpy as np x1 = np.random.randn(200)-2 x2 = np.random.randn(200) x3 = np.random.randn(200)+2 x4 = np.random.randn(200)+4 hist_data = [x1,x2,x3,x4] group_labels = ['Group1','Group2','Group3','Group4'] fig = ff.create_distplot(hist_data, group_labels) pyo.plot(fig, filename='multiset_distplot.html')
def updateGraph(dataFields:list, filterIndex:int, graphType:int, binSize:int): """ updates the graph based on the chosen data fields, data filters, graph type, and bin size (the latter if histogram is selected) """ # title of the graph, set to the filename for now title = 'Without filter' if len(dataFields) == 0: return go.Figure(layout=dict(title=title)) # empty graph if filterIndex is 0: fList = ['isMale'] elif filterIndex is 1: fList = ['analyticMajor'] elif filterIndex is 2: fList = ['nativeEnglish'] # function which filters a piece of data # depending on the filters the user selected def dataFilter(data:dict) -> bool: for name in fList: return True filteredDataSet = tuple(filter(dataFilter, dataSet)) if len(filteredDataSet) == 0: return go.Figure(layout=dict(title=title)) # empty graph # convert the data being plotted into numbers try: traceValues = [ [ float(d[field]) for d in filteredDataSet ] for field in dataFields ] except ValueError: return go.Figure(layout=dict(title="Error: Can't plot non-numeric data on a numeric axis.")) # turn the position on the graph type slider into a graph type name graphType = GRAPHTYPE_CHOICES[graphType] if graphType == 'Histogram': out = ff.create_distplot( traceValues, dataFields, show_curve=False, show_rug=False, bin_size=binSize, ) out.layout['title'] = title return out if graphType == 'Density Plot': out = ff.create_distplot( traceValues, dataFields, show_hist=False, show_rug=False, ) out.layout['title'] = title return out layout = dict(title=title) # layout used by all of the graph types below if graphType == 'Violin Plot': traces = [ dict( type='violin', name=field, y=values, ) for field,values in zip(dataFields,traceValues) ] elif graphType == 'Box Plot': traces = [ go.Box( name=field, y=values, ) for field,values in zip(dataFields,traceValues) ] elif graphType == 'Dot Plot': traces = [ dict( type='scatter', name=field, y=[d[field] for d in filteredDataSet], x=[d[DATA_IDFIELD] for d in filteredDataSet], mode='markers', ) for field in dataFields ] layout['xaxis'] = dict( title=DATA_IDFIELD, type='category', titlefont=dict( size=12, ), ) elif graphType == 'Bar Plot': traces = [ go.Bar( name=field, y=[d[field] for d in filteredDataSet], x=[d[DATA_IDFIELD] for d in filteredDataSet], ) for field in dataFields ] layout['xaxis'] = dict( title=DATA_IDFIELD, type='category', titlefont=dict( size=12, ), ) return go.Figure(data=traces, layout=layout)
####### # This distplot looks back at the Mark Twain/ # Quintus Curtius Snodgrass data and tries # to compare them. ###### import plotly.offline as pyo import plotly.figure_factory as ff snodgrass = [.209,.205,.196,.210,.202,.207,.224,.223,.220,.201] twain = [.225,.262,.217,.240,.230,.229,.235,.217] hist_data = [snodgrass,twain] group_labels = ['Snodgrass','Twain'] fig = ff.create_distplot(hist_data, group_labels, bin_size=[.005,.005]) pyo.plot(fig, filename='SnodgrassTwainDistplot.html')
import statistics as st import csv import plotly_express as px import plotly.figure_factory as ff import pandas as pd df = pd.read_csv("StudentsPerformance.csv") reading_score = df["reading score"].tolist() fig = ff.create_distplot([reading_score], ["reading score"], show_hist=False) fig.show() mean = st.mean(reading_score) print("mean:", mean) std = st.stdev(reading_score) print("standard deviation:", std) SD1_START, SD1_END = mean - std, mean + std SD2_START, SD2_END = mean - (2 * std), mean - (2 * std) SD3_START, SD3_END = mean - (3 * std), mean - (3 * std) listdata_1SD = [ result for result in reading_score if result > SD1_START and result < SD1_END ] listdata_2SD = [ result for result in reading_score if result > SD2_START and result < SD2_END ] listdata_3SD = [ result for result in reading_score if result > SD3_START and result < SD3_END ] print("{}% of data lies between 1SD".format( len(listdata_1SD) * 100.0 / len(reading_score)))
def genFigure(xCfgAirlines, xCfgLocations, xCfgAircraft, normalized): ######################################## ######################################## selectedAirports = xCfgAirlines.get('airports', dataModule.Airports) selectedAirlines = xCfgAirlines.get('airlines', dataModule.Airlines) # selectedAircraft = xCfgAircraft.get('aircraft', dataModule.Aircraft) selectedAircraft = xCfgAircraft routes = dataModule.filterData(selectedAirports, selectedAirlines, selectedAircraft) # YlOrRd = cl.scales['9']['seq']['YlOrRd'] # clrscale = cl.to_rgb(cl.interp( YlOrRd, 10 )) rangeData = [] groupLabels = [] for ac, df in routes.groupby('aircraft'): distances = df['distance'].values if len(distances) < 3: continue ac = ac.replace('Boeing ', 'B').replace('Airbus ', '').replace('McDonnell Douglas ', '').replace('Embraer ', 'E').replace('Aerospatiale/Alenia ','') groupLabels += [ac[0:20]] #max 20 characters m = np.mean(distances) if 'yes' in normalized else 1 rangeData += [distances/m] fig = ff.create_distplot(rangeData, groupLabels, bin_size=100, show_hist = False, show_rug = False, histnorm='probability') #density fig = fig.to_dict() for i, d in enumerate(fig['data']): d['opacity'] = 0.6 d['selectgroup'] = i d['selectedpoints'] = [0] fig['layout'].update( dict( title = 'Distances Flown by Aircraft Type', titlefont = { 'size': 16, 'color': '#a8a8a8', 'family': 'Open Sans' }, font = {'color': '#fff',}, xaxis = dict( type='log', showgrid=True, gridcolor='rgba(255,255,255,.2)', tickfont={'color':'white'}, title= 'Normalized Distance', titlefont= {'color': '#a8a8a8'}), yaxis=dict( showgrid=False, showticklabels=True, ticks='', tickfont={'color':'white'}, visibile=True, title= 'Prob. Density', titlefont={'color':'#a8a8a8'} ), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', margin={'t': 40, 'b':40 , 'r':0, 'l': 50, 'pad': 1}, legend={'orientation':'v', 'xanchor': 'left', 'x': 1}, )) return fig
def normalize_series(dframe, series_col_index): y = dframe[series_col_index] log_y = np.log(y) diff_log_y = log_y - np.average(log_y) var_log_y = sum(diff_log_y**2) / len(y) stdev_log_y = np.sqrt(var_log_y) cnt = 0 for i in range(0, len(log_y)): if (abs(diff_log_y[i]) > (4 * stdev_log_y)): cnt += 1 if (i > 5): y.iloc[i] = y.iloc[i - 1] else: y.iloc[i] = y.iloc[i + 1] #print('cleaned: ', cnt) dframe = pd.read_csv(sys.argv[1]) #index_col=0 hist_data = [ dframe['1thread'], dframe['2thread'], dframe['3thread'], dframe['4thread'], dframe['5thread'], dframe['6thread'], dframe['7thread'], dframe['8thread'] ] group_labels = [ 'Single_Threaded', '2_Threads', '3_Threads', '4_Threads', '5_Threads', '6_Threads', '7_Threads', '8_Threads' ] fig = ff.create_distplot(hist_data, group_labels, show_hist=False) div1 = plotly.offline.plot(fig, include_plotlyjs=False, output_type='div') print(div1)
def show_fig(mean_list): df = mean_list fig = ff.create_distplot([df], ["reading_time"]) fig.show()
def showFig(meanList): df = meanList fig = pf.create_distplot([df], ["Reading Time"], show_hist=False) fig.show()