go.Scatter({ 'x': df1['A'], 'y': df1['B'], 'mode': 'markers', 'marker': { 'size': abs(df1['C']) * 50 } }) ] offl.plot(traces) # Scatter Matrix traces = [ go.Splom( {'dimensions': [{ 'label': col, 'values': df1[col] } for col in df1]}) ] offl.plot(traces) # Geographic (Choropleth) Maps ----------------------------------------------- import plotly.offline as offl import plotly.graph_objs as go import pandas as pd str_inDir = 'C:/Users/robbi/Dropbox/Work & Learning/Language - Python/Udemy - Python for Data Science and Machine Learning/Refactored_Py_DS_ML_Bootcamp-master/09-Geographical-Plotting/' # USA State Example Plot data = [{ 'type': 'choropleth',
# In[60]: trace6 = go.Scatter(x=suicide_data['State'], y=suicide_data['Suicide Rate (per 1 lakh) 2015[4]']) # In[61]: trace11 = go.Splom(dimensions=[ dict(label='Unemployment_Total', values=df1['Unemployment_Total']), dict(label='Unemployment_Urban', values=df1['Unemployment_Urban']), dict(label='Unemployment_Rural', values=df1['Unemployment_Rural']), dict(label='Crime 2014', values=df1['2014']), dict(label='Crime 2015', values=df1['2015']), dict(label='Crime 2016', values=df1['2016']), dict(label='Suicide Rate (per 1 lakh) 2015[4]', values=df1['Suicide Rate (per 1 lakh) 2015[4]']) ], text=cols, marker=dict(color=[ 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab' ], showscale=False, line_color='white', line_width=0.5)) # In[62]: var_exp = var cum_var_exp = np.cumsum(var_exp) trace12 = dict(type='bar',
yaxis4=dict(axis), yaxis5=dict(axis), yaxis6=dict(axis), yaxis7=dict(axis), yaxis8=dict(axis), yaxis9=dict(axis), yaxis10=dict(axis), yaxis11=dict(axis), yaxis12=dict(axis)) trace1 = go.Splom(dimensions=[ dict(label='meanfreq', values=data['meanfreq']), dict(label='sd', values=data['sd']), dict(label='median', values=data['median']), dict(label='Q25', values=data['Q25']), dict(label='Q75', values=data['Q75']), dict(label='IQR', values=data['IQR']), dict(label='skew', values=data['skew']), dict(label='kurt', values=data['kurt']), dict(label='sp.ent', values=data['sp.ent']), dict(label='sfm', values=data['sfm']), dict(label='mode', values=data['mode']), dict(label='centroid', values=data['centroid']) ]) fig1 = dict(data=[trace1], layout=layout) plotly.offline.plot(fig1, filename="Corelation-Pair-Plot.html") # Split To Training And Testing DataSet print("######### Splitting DataSet To Training And Testing #########") x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=74)
dataset = pd.read_csv('stir.csv') X = dataset.iloc[:, 0:3].values y1 = dataset.iloc[:, 3].values y2 = dataset.iloc[:, 4].values y1 = np.reshape(y1, (-1, 1)) y2 = np.reshape(y2, (-1, 1)) dataset = dataset.drop(axis=1, columns=["Unnamed: 5"]) #scattermatrix data1 = go.Splom(dimensions=[ dict(label='Rotational speed', values=dataset['Rotational speed(RPM)']), dict(label='Wield speed', values=dataset['Weilding speed(mm/min)']), dict(label='Axial-load', values=dataset['Axial load(kN)']), dict(label='Tensile elongation (%)', values=dataset['Tensile elongation (%)']), dict(label='tensile strength(MPa)', values=dataset['ultimate tensile strength(MPa)']) ], marker=dict(color='rgb(255, 8, 0)', size=7, showscale=False, line=dict(width=0.5, color='rgb(280,180,230)'))) axis = dict(showline=True, zeroline=False, gridcolor='#fff', ticklen=4) layout = go.Layout(title='Friction stir Weilding data set', dragmode='select', width=1000, height=1000, autosize=False, plot_bgcolor='rgba(240,240,240, 0.95)',
grouped = data.groupby(['professor', 'lecture'], as_index=False).mean() text = [grouped.loc[i, 'professor'] for i in range(len(grouped))] professors = grouped['professor'].drop_duplicates() profColor = {professors.values[i]: i for i in range(len(professors))} color_vals = [profColor[c] for c in grouped['professor']] result = go.Splom(dimensions=[ dict(label='Lecture', values=grouped['lecture']), dict(label='Parti', values=grouped['participants']), dict(label='Exp', values=grouped['professional expertise']), dict(label='Motive', values=grouped['motivation']), dict(label='Present', values=grouped['clear presentation']), dict(label='Imp', values=grouped['overall impression']) ], text=text, marker=dict(color=color_vals, size=6, colorscale='Jet', showscale=True, line=dict(width=0.5, color='rgb(230,230,230)'))) result['diagonal'].update(visible=False) layout = go.Layout(showlegend=True, title=go.layout.Title(text='Scatterplot matrix')) fig1 = dict(data=[result], layout=layout) # py.plot(fig1, filename = "scatterplot matrix") #converting lecture names to lecture numbers
fig.update_layout(margin=dict(t=0, l=0, r=0, b=0)) pyo.plot(fig, filename="sunburst.html") fig.show() # In[98]: df = pd.read_csv( 'https://raw.githubusercontent.com/plotly/datasets/master/iris-data.csv') index_vals = df['class'].astype('category').cat.codes fig = go.Figure(data=go.Splom(dimensions=[ dict(label='sepal length', values=df['sepal length']), dict(label='sepal width', values=df['sepal width']), dict(label='petal length', values=df['petal length']), dict(label='petal width', values=df['petal width']) ], diagonal_visible=False, text=df['class'], marker=dict(color=index_vals, showscale=False, line_color='white', line_width=0.5))) fig.update_layout(title='Iris Data set', width=600, height=600) pyo.plot(fig, filename="scatter_matrix.html") fig.show() # In[99]: x = np.random.uniform(-1, 1, size=500) y = np.random.uniform(-1, 1, size=500) fig = go.Figure(go.Histogram2dContour(x=x, y=y, colorscale='Blues')) pyo.plot(fig, filename="hist_contour_plot.html")
size=16, color="white"), paper_bgcolor='rgba(0,0,0,0.65)', plot_bgcolor='rgba(0,0,0,1)') heat_lay1 = go.Figure(data = trace_heat,layout=heat_layout) #------------------------------------------------------FIGURE2----------------------------------------------------------------------------# scat_matrix = go.Splom( dimensions=[dict(label='teaching', values=df2016['teaching']), dict(label='research', values=df2016['research']), dict(label='citations', values=df2016['citations']), dict(label='income', values=df2016['income']), dict(label='total_score', values=df2016['total_score']) ], text=df2016['world_rank'], marker=dict(showscale=False, # colors encode categorical variables # line_color='white', line_width=0.5 ) ) scat_matrix_layout = go.Layout( title="Les columns teaching, research, citations, income total_score sont-elles corrélées?", height=800, font=dict( family="sans serif", size=14,
def create_plot_5(): session = Session(engine) df = pd.read_sql(f"select Value, Category, Date from mortality_us", con=session.connection()) session.close() df = df.pivot(index='Date', columns='Category', values='Value') ymax = df.max() fig = go.Figure() fig = go.Figure(data=go.Splom( dimensions=[dict(label=c, values=df[c]) for c in df.columns], text=df.index, marker=dict(color=df.index.astype('int'), size=5, colorscale='Bluered', line=dict(width=0.5, color='rgb(230,230,230)')))) # cnt = 1 # for i in df['Date'].unique(): # val = df.query(f'Date == "{i}"')['Value'] # fig.add_trace( # go.Scatter( # visible=False, # x=cat, # assign x as the dataframe column 'x' # y=val, # name = i, # mode='markers' # ) # ) # if cnt == len(df['Date'].unique()): # fig.data[cnt-1].visible = True # else: # cnt += 1 steps = [] for i in range(len(fig.data)): step = dict(method="restyle", args=["visible", [False] * len(fig.data)], label=fig.data[i]['name']) step["args"][1][i] = True # Toggle i'th trace to "visible" steps.append(step) sliders = [ dict(active=10, currentvalue={"prefix": "Year: "}, pad={"t": 50}, steps=steps) ] fig.update_layout(sliders=sliders) fig.update_yaxes(range=[0, ymax]) fig.update_layout(title='Scatter Plot Matrix', dragmode='select', width=1000, height=1000, hovermode='closest') graphJSON = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) #print(graphJSON) return graphJSON
# line=dict( # color='rgba(217, 217, 217, 0.14)', # width=0.5 # ),opacity=0.8)) # data = [trace1] # fig = go.Figure(data=data) pl_colorscale = [[0.0, '#19d3f3'], [0.333, '#19d3f3'], [0.333, '#e763fa'], [0.666, '#e763fa'], [0.666, '#636efa'], [1, '#636efa']] trace1 = go.Splom(dimensions=[ dict(label='sepal length', values=df1["A"]), dict(label='sepal width', values=df1["B"]), dict(label='petal ngth', values=df1["C"]), dict(label='petal len', values=df1["D"]) ], marker=dict(color="green", size=7, colorscale=pl_colorscale, showscale=False, line=dict(width=0.5, color='red'))) axis = dict(showline=True, zeroline=False, gridcolor='yellow', ticklen=4) layout = go.Layout(title='Iris Data set', dragmode='select', width=600, height=600, autosize=False, hovermode='closest', plot_bgcolor='lightgrey',
def parse_contents(contents, filename, date): content_type, content_string = contents.split(',') decoded = base64.b64decode(content_string) pl_colorscale = [[0.0, '#19d3f3'], [0.333, '#19d3f3'], [0.333, '#e763fa'], [0.666, '#e763fa'], [0.666, '#636efa'], [1, '#636efa']] axis = dict(showline=True, zeroline=False, gridcolor='#fff', ticklen=4) try: if 'csv' in filename: # Assume that the user uploaded a CSV file df = pd.read_csv(io.StringIO(decoded.decode('utf-8'))) output = io.StringIO() df.info(buf=output) df_info = pd.DataFrame(columns=['Col'], data=output.getvalue().split('\n')) #val = [ i for i in df_info[2:].Col[2:]] df_desc = pd.concat([ df.describe(include='all').fillna(0).round(), df.isnull().sum().to_frame(name='missing').T, df.dtypes.to_frame(name='dtype').T.astype(str) ]).reset_index() #print(df_desc) corr_matrix = df.corr().reset_index() #print(corr_matrix) classes = np.unique(df['ocean_proximity'].values).tolist() class_code = {classes[k]: k for k in range(len(classes))} color_vals = [ class_code[cl] for cl in df['ocean_proximity'].astype(str) ] #text=[df.loc[ k, 'ocean_proximity'] for k in len(df)] index_vals = df['ocean_proximity'].astype('category').cat.codes elif 'xls' in filename: # Assume that the user uploaded an excel file df = pd.read_excel(io.BytesIO(decoded)) except Exception as e: print(e) return html.Div(['There was an error processing this file.']) return html.Div([ html.H5(filename), dash_table.DataTable( data=df.to_dict('records'), columns=[{ 'name': i, 'id': i } for i in df.columns], style_as_list_view=True, #style_cell={'padding': '5px'}, fixed_rows={ 'headers': True, 'data': 0 }, style_cell={'width': '150px'}, style_header={ 'backgroundColor': 'white', 'fontWeight': 'bold' }, style_table={ 'maxHeight': '300px', #'overflowY': 'scroll', 'border': 'thin lightgrey solid' }), html. H5("Data Describe : This method shows a summary of the numerical attributes" ), dash_table.DataTable( #data=pd.concat([df.describe(include='all').fillna(0).round().reset_index(),df.isnull().sum().to_frame(name = 'missing').T.reset_index()]).to_dict('records'), data=df_desc.to_dict('records'), columns=[{ 'name': i, 'id': i } for i in df.describe(include='all').reset_index().columns], style_as_list_view=True, style_cell={'padding': '15px'}, fixed_rows={ 'headers': True, 'data': 0 }, fixed_columns={ 'headers': True, 'data': 1 }, style_header={ 'backgroundColor': 'white', 'fontWeight': 'bold' }, style_table={ 'maxHeight': '300px', 'maxWidth': '1500px', 'overflowX': 'scroll', 'overflowY': 'scroll', 'border': 'thin lightgrey solid' }), html.H5("Data Correlation"), dash_table.DataTable( #data=pd.concat([df.describe(include='all').fillna(0).round().reset_index(),df.isnull().sum().to_frame(name = 'missing').T.reset_index()]).to_dict('records'), data=corr_matrix.reset_index().to_dict('records'), columns=[{ 'name': i, 'id': i } for i in corr_matrix.reset_index().columns], style_as_list_view=True, style_cell={'padding': '15px'}, fixed_rows={ 'headers': True, 'data': 0 }, fixed_columns={ 'headers': True, 'data': 1 }, style_header={ 'backgroundColor': 'white', 'fontWeight': 'bold' }, style_table={ 'maxHeight': '300px', 'maxWidth': '1500px', 'overflowX': 'scroll', 'overflowY': 'scroll', 'border': 'thin lightgrey solid' }), dcc.Graph( id='SPloM', config={ 'showSendToCloud': True, #'plotlyServerURL': 'https://plot.ly' }, figure={ 'data': [ go.Scatter( x=df["longitude"], y=df["latitude"], mode='markers', ) ] }), dcc.Graph( id='SPloM-selectedPoints', config={ 'showSendToCloud': True, #'plotlyServerURL': 'https://plot.ly' }, figure={ 'data': [ go.Splom( dimensions=[ dict(label='median_house_value', values=df['median_house_value']), dict(label='median_income', values=df['median_income']), dict(label='total_rooms', values=df['total_rooms']), dict(label='housing_median_age', values=df['housing_median_age']) ], text=None, #default axes name assignment : #xaxes= ['x1','x2', 'x3'], #yaxes= ['y1', 'y2', 'y3'], marker=dict( color=index_vals, showscale= False, # colors encode categorical variables line_color='white', line_width=0.5)) ], }, ), html.Hr() # horizontal line # For debugging, display the raw contents provided by the web browser ])
def plot_scatter_matrix(df): """ Plot scatter matrix Args: - df (DataFrame object): Dataframe to be shown """ textd = [ "Responsible (target=0)" if target == 0 else "Delinquent (target=1)" for target in df["SeriousDlqin2yrs"] ] fig = go.Figure( data=go.Splom( dimensions=[ dict( label="RevUtilOfUnsecLines", values=df["RevolvingUtilizationOfUnsecuredLines"], ), dict(label="age", values=df["age"]), dict( label="NTime30-59Days", values=df["NumberOfTime30-59DaysPastDueNotWorse"], ), dict(label="DebtRatio", values=df["DebtRatio"]), dict(label="MonthlyIncome", values=df["MonthlyIncome"]), dict( label="NOpenCreditLinesLoans", values=df["NumberOfOpenCreditLinesAndLoans"], ), dict(label="NTimes90DaysLate", values=df["NumberOfTimes90DaysLate"]), dict( label="NRealEstateLoansLines", values=df["NumberRealEstateLoansOrLines"], ), dict( label="NTime60-89Days", values=df["NumberOfTime60-89DaysPastDueNotWorse"], ), dict(label="NDepend", values=df["NumberOfDependents"]), ], marker=dict( color=df["SeriousDlqin2yrs"], size=5, colorscale="Bluered", line=dict(width=0.5, color="rgb(230,230,230)"), ), text=textd, diagonal=dict(visible=False), ) ) fig.update_layout( title={ "text": "Scatterplot Matrix of Dataset", "x": 0.5, "xanchor": "center", "yanchor": "top", }, dragmode="select", width=1000, height=1000, hovermode="closest", font=dict(size=7, color="#7f7f7f"), ) fig.show()
# Scatter plot with bubbles py.offline.plot([go.Scatter(x=df['A'], y=df['B'], mode='markers')], filename='data/six.html', auto_open=False) # Scatter plot matrix. py.offline.plot([ go.Splom(dimensions=[ { 'label': 'A', 'values': df['A'] }, { 'label': 'B', 'values': df['B'] }, { 'label': 'C', 'values': df['C'] }, { 'label': 'D', 'values': df['D'] }, ], diagonal=dict(visible=False)) ], filename='data/seven.html', auto_open=False)
def correlation_plot_ly(df, title='mytitle', saveto='./myfile', ylabel='ylabel', xlabel='xlabel'): """ : :param df: :param title: :param saveto: :param ylabel: :param xlabel: :return: """ dimensions = [] for col in df.columns: d1 = {'label': col, 'values': df[col]} dimensions.append(d1) trace1 = go.Splom(dimensions=dimensions, diagonal=dict(visible=False)) t_len = len(trace1['dimensions']) if t_len > 1: trace1['dimensions'][1].update(visible=True) if t_len > 2: trace1['showupperhalf'] = False annotation_list = [] yaxis_val = 1 xcounter = 0 for col1 in df.columns: x = df[col1] xcounter += 1 ycounter = 0 for col2 in df.columns: y = df[col2] ycounter += 1 if xcounter == ycounter: continue if xcounter < ycounter: continue slope, intercept, r_value, p_value, std_err = stats.linregress(df[col1], df[col2]) # line = slope * df[col1] + intercept format_r_value = "<i>{}_vs_{}: R<sup>2</sup>={:02.2f}</i>".format(col1, col2, r_value) annot_dict = dict(x=1, y=yaxis_val, xref='paper', yref='paper', text=format_r_value, showarrow=False, font=dict(size=10) ) annotation_list.append(annot_dict) yaxis_val -= 0.05 layout = go.Layout( title=title, dragmode='select', width=600, height=600, autosize=False, hovermode='closest', plot_bgcolor='rgba(240,240,240, 0.95)', annotations=annotation_list ) figure = dict(data=[trace1], layout=layout) py.plot(figure, filename=saveto + '.html', auto_open=False, config=PlotData.plotly_conf()) return None
No_Of_Bedrooms class_code = {No_Of_Bedrooms[k]: k for k in range(len(No_Of_Bedrooms))} class_code color_vals = [class_code[cl] for cl in housingData['bedrooms']] text = [housingData.loc[k, 'bedrooms'] for k in range(len(housingData))] trace1 = go.Splom(dimensions=[ dict(label='sqft_lot', values=housingData['sqft_lot']), dict(label='sqft_above', values=housingData['sqft_above']), dict(label='sqft_basement', values=housingData['sqft_basement']), dict(label='price', values=housingData['price']), dict(label='sqft_living', values=housingData['sqft_living']), dict(label='bedrooms', values=housingData['bedrooms']) ], text=text, marker=dict(color=color_vals, size=7, colorscale='Viridis', showscale=False, line=dict(width=0.5, color='rgb(230,230,230)'))) axis = dict(showline=True, zeroline=False, gridcolor='#fff', ticklen=4) layout = go.Layout( title='House Price Data', dragmode='select', width=900, height=900, autosize=False,
def plots(year, countries, indicator, projection, continents): ############################################First Bar Plot########################################################## data_bar1 = [] df_temp = df.fillna(0.0) df_temp = df_temp.replace(0.0, np.nan) for continent in continents: df_temp = df_temp.loc[(df_temp['Time'] == year)] df_temp = df_temp.loc[(df_temp['Continent'] == continent)] df_temp = df_temp.nlargest(5, [indicator]) x_bar = df_temp['Country Name'] y_bar = df_temp[indicator] data_bar1.append( dict(type='bar', x=x_bar, y=y_bar, name=str(continent), marker_color='#ffff99', marker_line_width=1.5, marker_line_color='#ffff33')) layout_bar1 = dict(title=dict( text='<b>Top 5 countries for continent ' + str(continent) + '</b>', font=dict(family="Verdana,verdana,sans-serif", color='#f6f6f6', size=23), x=0.5, y=0.9, xanchor='center', yanchor='top'), yaxis=dict(title=indicator, type='linear'), xaxis=dict(title='Countries'), font=dict(family="Verdana,verdana,sans-serif", color='#f6f6f6'), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)') ############################################Second Bar Plot########################################################## data_bar2 = [] df_temp = df.fillna(0.0) df_temp = df_temp.replace(0.0, np.nan) for continent in continents: df_temp = df_temp.loc[(df_temp['Time'] == year)] df_temp = df_temp.loc[(df_temp['Continent'] == continent)] df_temp = df_temp.nsmallest(5, [indicator]) x_bar = df_temp['Country Name'] y_bar = df_temp[indicator] data_bar2.append( dict(type='bar', x=x_bar, y=y_bar, name=str(continent), marker_color='#ffff99', marker_line_width=1.5, marker_line_color='#ffff33')) layout_bar2 = dict( title=dict(text='<b>Bottom 5 countries for continent ' + str(continent) + '</b>', font=dict(family="Verdana,verdana,sans-serif", color='#f6f6f6', size=23), x=0.5, y=0.9, xanchor='center', yanchor='top'), yaxis=dict(title=indicator, type='linear'), xaxis=dict(title='Countries'), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font=dict(family="Verdana,verdana,sans-serif", color='#f6f6f6'), ) #############################################Choropleth###################################################### df_emission_0 = df.loc[df['Time'] == year] z = (df_emission_0[indicator]) data_choropleth = dict( type='choropleth', locations=df_emission_0['Country Name'], # There are three ways to 'merge' your data with the data pre embedded in the map locationmode='country names', z=z, text=df_emission_0['Country Name'], colorscale='sunset', colorbar=dict(title='Scale', titlefont=dict(color='#f6f6f6'), tickfont=dict(color='#f6f6f6')), hovertemplate='Country: %{text} <br>' + str(indicator) + ': %{z}', name='') layout_choropleth = dict( geo=dict( scope='world', # default projection=dict( type=['orthographic', 'equirectangular'][projection]), # showland=True, # default = True landcolor='LightGrey', lakecolor='GhostWhite', showocean=True, # default = False oceancolor='#cde7e7', bgcolor='rgba(0,0,0,0)', ), title=dict(text='World ' + str(indicator) + '<br>Choropleth Map on the year ' + str(year), font=dict(family="Verdana,verdana,sans-serif", color='#f6f6f6', size=20), x=0), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)') ############################################## Line Graph ########################################################## data_line = [] for country in countries: df_line = df.loc[(df['Country Name'] == country)] x_line = df_line['Time'] y_line = df_line[indicator] data_line.append( dict(type='scatter', x=x_line, y=y_line, name=country, connectgaps=True)) layout_line = dict( title=dict(text='<b>Country Evolution' + '</b>', font=dict(family="Verdana,verdana,sans-serif", color='#f6f6f6', size=23), x=0.5, y=0.9, xanchor='center', yanchor='top'), yaxis=dict(title=indicator, type='linear'), xaxis=dict(title='Years'), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font=dict(family="Verdana,verdana,sans-serif", color='#f6f6f6'), ) ############################################### Matrix ############################################################# df_matrix = df index_vals = df['Continent'].astype('category').cat.codes data_matrix = go.Splom(dimensions=[ dict(label='Ind.1', values=df['Government expenditure on education, total (% of GDP)'] ), dict(label='Ind.2', values=df['Labor force, female (% of total labor force)']), dict( label='Ind.3', values=df[ 'Literacy rate, adult female (% of females ages 15 and above)'] ), dict(label='Ind.4', values=df['Unemployment, total (% of total labor force)']), dict(label='Ind.5', values=df[ 'Literacy rate, adult male (% of males ages 15 and above)']), dict(label='Ind.6', values=df['GDP per capita (current US$)']) ], text=df['Continent'], marker=dict( color=index_vals, colorscale='sunset', size=5, )) layout_matrix = dict( title=dict(text='<b>Correlation Matrix' + '</b>', font=dict(family="Verdana,verdana,sans-serif", color='#f6f6f6', size=23), x=0.5, y=0.9, xanchor='center', yanchor='top'), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(176,176,176,1)', font=dict(family="Verdana,verdana,sans-serif", color='#f6f6f6'), ) ################################################ Return ############################################################ return go.Figure(data=data_bar1, layout=layout_bar1), \ go.Figure(data=data_choropleth, layout=layout_choropleth), \ go.Figure(data=data_bar2, layout=layout_bar2), \ go.Figure(data=data_line, layout=layout_line), \ go.Figure(data=data_matrix, layout=layout_matrix)
dict(label="num_timesteps", values=[result['params']['num_timesteps'] for result in results]), #dict(label="num_iterations_after_valid", # values=[result['params']['num_iterations_after_valid'] for result in results]), dict(label="num_iterations", values=num_iterations), dict(label="cost", values=[0 if len(result['cost']) == 0 else result['cost'][-1] for result in results]), dict(label="time", values=time)] trace1 = go.Splom(dimensions=dimensions, text=text, marker=dict(#color=time, size=7, # [(30-iteration)/3 for iteration in num_iterations], # colorscale=pl_colorscale, showscale=False, line=dict(width=0.5, color='rgb(230,230,230)')), showupperhalf=False) axis = dict(showline=True, zeroline=False, gridcolor="#fff", ticklen=4) layout = go.Layout( title='stomp std dev evaluation', dragmode='select', width=1500, height=1500, autosize=True,
def Graph (option,k,k_cluster,k_centroid,pl_colorscale,color3,color2,d_cluster,color4,pl_colorscale2,titles): #Underneath is an if statement to check to see if option is 1(K-Means) or 2(Density). This will determine from which Clustering method are we getting our data from #and thus, which type of clustering results are we showing. if option == 1: ilen = len(k_cluster) #Set ilen to lenght of k_cluster data = k_cluster #Set data to k_cluster else: ilen = len(d_cluster) #Set ilen to lenght of d_cluster data = d_cluster #Set data to d_cluster. axis = [] #Make an empty list for all the axis/dimensions to go into. Scatter_dimensions = [] #Make an empty list for all the dimensions specifically for the Scatterplot Matrix axis, Scatter_dimensions = make_dimension(ilen,data,axis,Scatter_dimensions) #Run the make_dimensions function to make dimensions for our graph #based on the data(K-Means Clusters or Density Clusters) #------------Plotting time-------------------------------------------------------- #This is where we graph all the data we got from the dataset. For plotly to graph, you require 3 parameters. First is the data a.k.a the actual graph, a file name and #a boolean value that determines to automatically open the figure or not. Plotly offline mode stores the graph as a local HTML file and opens it on your web browser. #-----------------Scatterplot Matrix------------------------------------------------------ # Underneath is where we graph the scatterplot matrix. This follows the syntax of plotly scatterplot matrix function. trace = [graph.Splom( #Running the function Splom to graph the scatterplot matrix. We need to make #a list called trace because the module that shows the graph only takes list arguments dimensions = Scatter_dimensions, #Add the dimension arguments/ axis directories from our make_dimensions function. marker=dict( color= 'lightsteelblue', #Set the marker style(the way our dots will look) size=5, #Here we set a size for the dots, give them a color showscale=False, line= dict( #Line will represent the set of data from the y axis. It will be represented by blue. #The set of corresponding data from the x axis will be light blue. width=0.5, color='blue' #Set the line color to blue and the width to 0.5 ) ) ) ] #-----------------Parallel Coordinates Graph----------------------------------- #Underneath, we make a list called data to store the function that lets us make the parallel coordinates graph. Again, we need a list to graph the data. #We also run a check to see which clustering method are we representing. Different methods means using a different colorscale and color assignment. if option == 1: data = [ graph.Parcoords( line = dict(color = color3, colorscale = pl_colorscale), #We use the colorscale and color argument to make it so that each line will have #a color identifier between 0 and 1, according to plotly syntax. We then use #the colorscale variable to match the color identifier with a color. Such as #all datapoints with the color identifier 0 are going to be white, and all the #datapoints with the color identifier 1 are going to be black. pl_colorscale is #our colorscale variable and color3 is the list of identifiers. Each identifier #matches the index value of it's corresponding point. dimensions = axis #The number of dimensions is a list, thus we add our list of axis to the dimensions variable. ) ] else: data = [ graph.Parcoords( line = dict(color = color4, colorscale = pl_colorscale2), #This is the same as the one above, only except this is for Density clusters (d_cluster) dimensions = axis #The number of dimensions is a list, thus we add our list of axis to the dimensions variable. ) ] #Underneath, we save the figure/graph as a local HTML file, Give the file the name Parallel Cooridinates.html and make the boolean argument auto_open to true. This #will allow us to graph offline and open it instantly. plotly.offline.plot(data, filename = 'Parallel Coordinates.html',auto_open = True) #Here, we save the figure/graph and plot it. plotly.offline.plot(trace, filename = 'Scatter Plot Matrix.html', auto_open = True) print ("Dimension Legend") #Since on a Parallel Coordinates and Scatterplot Matrix, giving each individual dimension/axis a title will make the #graph too congested. Thus we have decided to give a graph a letter based Identifier which we will now display a legend for. for m in range (0,ilen): #Run a loop that goes for how many columns there are. identifier = chr(m + 65) #Make a capital alphabet character for that axis/column. This is the same as the one we have graphed print (identifier, ":", titles[0][m]) #print out the letter and it's corresponding axis label/Column title. #---------------Scatter Plots------------------------------------------------ #Underneath, We ask the user if they what 3 axis they want to see. This is an option for the user to see the relationship between three specific axis ans visually see #data trends. valid_entry = False #valid_entry is a boolean variable that checks for valid entries. while valid_entry == False: option2 = int(input("Would you like to see a scatterplot of certain dimensions of the data? \n (1)Yes (2) No\n")) #option2 will contain the user entered #input. if option2 == 1: #If option2 is 1(Yes) valid_entry = True #Set valid_entry to True check = False #check is another boolean variable that we use to check for a valid entry on our #second input. while check == False: #Run a troubleshooting loop that will run as long as check is False print ("(1) 2D (2)3D") #dim_option will store the user input. dim_option is a contraction of dimension option as we are #picking between 2 dimensional and 3 dimensional. dim_option = int(input(" ")) print ("**Please use the character identifier from the legend above, not the actual axis title") if (dim_option == 1) or (dim_option == 2): #We run this check to see if the user has picked an actual option. x_option = ((ord(input("Enter the x axis: "))) - 65) #Here we ask the user for an axis that will be plotted on the x dimension. #We ask for the letter identifier. y_option = ((ord(input("Enter the y axis: "))) - 65) #Ask for y dimension if dim_option == 2: #If the user had asked for 3 dimensions, then we ask for z. z_option = ((ord(input("Enter the z axis: ")))- 65) #ask for Z dimension check = True #Set check to true(End loop) else: print ("Invalid Entry") #Not picking a valid option will result in an invalid input message. elif option2 == 2: #If the user has picked option 2(No) valid_entry = True #Set valid_entry to True (end loop) dim_option= 3 #dim_option is now 3 else: print ("Invalid Entry\n") #Improper value entered. if dim_option != 3: #As long as dim_option isn't 3. trace = [] #Make an empty list called trace. if dim_option == 1: #if the user picked 2 dimensions if option == 1: #Check if K-Means was the clustering method used. trace.append( #Append into the list trace. graph.Scatter( #Run the function Scatter from plotly to make the 2D scatterplot. x = k_cluster[x_option], #Set the x axis to have the values from k_cluster based on the axis picked by the user y = k_cluster[y_option], #Set the y axis values from k_cluster mode = "markers", #Set the mode to markers which will plot only dots marker = dict( #Make a dictionary for marker size = 10, #make the size of each datapoint 10 color = color3, #Run the colorscaling colorscale = pl_colorscale ))) trace.append( #Append the results from another Scatter function for the centroids. graph.Scatter( x = k_centroid[x_option], #Pick the x axis from k_centroids list y = k_centroid[y_option], #Pick the y_axis from k_centroids list mode = "markers", #Set the mode to markers(dots) marker = dict( #Make a dictionary for these marker points as well size = 13, #To distinguish between centroids and datapoints, the centroids will be larger than all the datapoints. color = color2, #Use the colorscaling for centroids. colorscale = pl_colorscale ))) else: trace.append( #If K-means is not the clustering method that was picked, then it is obviously Density. graph.Scatter( #Run a Scatter Funtion for Density. It is the same for Density as it is for K-Means x = d_cluster[x_option], y = d_cluster[y_option], mode = "markers", marker = dict( size = 10, color = color4, colorscale = pl_colorscale2 ))) layout = graph.Layout( #make a layout using the layout function xaxis = dict( #Give the x axis it's corresponding title from titles title= titles[0][x_option]), yaxis = dict( #Give the y axis it's corresponding title from titles. title=titles[0][y_option])) else: #If option2 is 2(User has picked 3d) if option == 1: #Check to see if the Clustering method was K-Means trace.append( #Append into trace the results from the follwoing function graph.Scatter3d( #Run the function Scatter3d. We use Scatter3d to make 3D scatterplots in Plotly #Set the Dimensions and the points underneath x = k_cluster[x_option], y = k_cluster[y_option], z = k_cluster[z_option], mode = "markers", marker = dict( size = 10, #Colorscaling, same as what was described for 2D Scatterplot color = color3, colorscale = pl_colorscale ))) trace.append( graph.Scatter3d( #Append in a second element in trace to graph the centroids as a scatterplot. #Set the dimensions x = k_centroid[x_option], y = k_centroid[y_option], z = k_centroid[z_option], mode = "markers", marker = dict( size = 13, #As shown above, centroids will be larger than other datapoints to distinguish them color = color2, colorscale = pl_colorscale ))) else: trace.append( graph.Scatter3d( #Run the Scatter3d function to make a scatterplot of clusters made by Density x = d_cluster[x_option], y = d_cluster[y_option], z = d_cluster[z_option], mode = "markers", marker = dict( size = 10, color = color4, colorscale = pl_colorscale2 ))) #Underneath, we make a layout for the 3D Scatterplot to give each axis a label. We make a dictionary for each axis and then run the title argument to give it #an actual title from titles list. layout = graph.Layout( scene = dict( #Make a scene. This is essential for 3D graphs because Plotly. xaxis = dict( title = titles[0][x_option]), #Give the x-axis a title yaxis = dict( title = titles[0][y_option]), #Give the y-axis a title zaxis = dict( title = titles[0][z_option]),) #Give the z-axis a title ) fig = graph.Figure(trace,layout) plotly.offline.plot(fig, filename = "Scatter Plot.html", auto_open = True) #Graph the 2D/3D Scatterplot
''' Here, plot a diabetic Reference: https://plotly.com/python/splom/# ''' import plotly as py import plotly.graph_objs as go import pandas as pd # ============================DATA========================================== df = pd.read_csv("data/diabetes.csv") # create a list with '0' as non-diabetic & '1' as diabetic text_d = ['non-diabetic' if cl=0 else 'diabetic' for cl in df['Outcome']] fig = go.Figure(data=go.Splom( dimensions= [ dict(label='Pregnancies', values=df['Pregnancies']), dict(label='Glucose', values=df['BloodPressure']), dict(label='SkinThickness', values=df['SkinThickness']), dict(label='Insulin', values=df['Insulin']), dict(label='BMI', values=df['BMI']), dict(label='DiabetesPedigreeFunction', values=df['DiabetesPedigreeFunction']), dict(label='Age', values=df['Age'])], ))
[0.666, '#636efa'], [1, '#636efa']] text=[df.loc[ k, 'class'] for k in range(len(df))] trace1 = go.Splom(dimensions=[dict(label='sepal length', values=df['sepal length']), dict(label='sepal width', values=df['sepal width']), dict(label='petal length', values=df['petal length']), dict(label='petal width', values=df['petal width'])], text=text, #default axes name assignment : #xaxes= ['x1','x2', 'x3'], #yaxes= ['y1', 'y2', 'y3'], marker=dict(color=color_vals, size=7, colorscale=pl_colorscale, showscale=False, line=dict(width=0.5, color='rgb(230,230,230)')) ) axis = dict(showline=True, zeroline=False, gridcolor='#fff', ticklen=4)
[0.888, '#0dea0b'], [0.888, '#0dea0b'], [0.999, '#00ff68'], [1, '#00ff68'] ] # 6 text = [df.loc[k, 'score'] for k in range(len(df))] # 7 trace1 = go.Splom(dimensions=[ dict(label='new', values=df['new']), dict(label='resolved', values=df['resolved']), dict(label='unresolved', values=df['unresolved']), dict(label='insertions', values=df['insertions']) ], text=text, marker=dict(color=color_vals, size=7, colorscale=pl_colorscale, showscale=False, line=dict(width=0.5, color='rgb(230,230,230)'))) # 8 axis = dict(showline=True, zeroline=False, gridcolor='#fff', ticklen=4) layout = go.Layout(title='Iris Data set', dragmode='select', width=600, height=600, autosize=False, hovermode='closest',