go.Scatter({
        'x': df1['A'],
        'y': df1['B'],
        'mode': 'markers',
        'marker': {
            'size': abs(df1['C']) * 50
        }
    })
]
offl.plot(traces)

# Scatter Matrix
traces = [
    go.Splom(
        {'dimensions': [{
            'label': col,
            'values': df1[col]
        } for col in df1]})
]
offl.plot(traces)

# Geographic (Choropleth) Maps -----------------------------------------------

import plotly.offline as offl
import plotly.graph_objs as go
import pandas as pd
str_inDir = 'C:/Users/robbi/Dropbox/Work & Learning/Language - Python/Udemy - Python for Data Science and Machine Learning/Refactored_Py_DS_ML_Bootcamp-master/09-Geographical-Plotting/'

# USA State Example Plot
data = [{
    'type': 'choropleth',
# In[60]:

trace6 = go.Scatter(x=suicide_data['State'],
                    y=suicide_data['Suicide Rate (per 1 lakh) 2015[4]'])

# In[61]:

trace11 = go.Splom(dimensions=[
    dict(label='Unemployment_Total', values=df1['Unemployment_Total']),
    dict(label='Unemployment_Urban', values=df1['Unemployment_Urban']),
    dict(label='Unemployment_Rural', values=df1['Unemployment_Rural']),
    dict(label='Crime 2014', values=df1['2014']),
    dict(label='Crime 2015', values=df1['2015']),
    dict(label='Crime 2016', values=df1['2016']),
    dict(label='Suicide Rate (per 1 lakh) 2015[4]',
         values=df1['Suicide Rate (per 1 lakh) 2015[4]'])
],
                   text=cols,
                   marker=dict(color=[
                       'mistyrose', 'moccasin', 'navajowhite', 'navy',
                       'oldlace', 'olive', 'olivedrab'
                   ],
                               showscale=False,
                               line_color='white',
                               line_width=0.5))

# In[62]:

var_exp = var
cum_var_exp = np.cumsum(var_exp)

trace12 = dict(type='bar',
Esempio n. 3
0
                   yaxis4=dict(axis),
                   yaxis5=dict(axis),
                   yaxis6=dict(axis),
                   yaxis7=dict(axis),
                   yaxis8=dict(axis),
                   yaxis9=dict(axis),
                   yaxis10=dict(axis),
                   yaxis11=dict(axis),
                   yaxis12=dict(axis))
trace1 = go.Splom(dimensions=[
    dict(label='meanfreq', values=data['meanfreq']),
    dict(label='sd', values=data['sd']),
    dict(label='median', values=data['median']),
    dict(label='Q25', values=data['Q25']),
    dict(label='Q75', values=data['Q75']),
    dict(label='IQR', values=data['IQR']),
    dict(label='skew', values=data['skew']),
    dict(label='kurt', values=data['kurt']),
    dict(label='sp.ent', values=data['sp.ent']),
    dict(label='sfm', values=data['sfm']),
    dict(label='mode', values=data['mode']),
    dict(label='centroid', values=data['centroid'])
])
fig1 = dict(data=[trace1], layout=layout)
plotly.offline.plot(fig1, filename="Corelation-Pair-Plot.html")

# Split To Training And Testing DataSet
print("######### Splitting DataSet To Training And Testing #########")
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=74)
Esempio n. 4
0
dataset = pd.read_csv('stir.csv')
X = dataset.iloc[:, 0:3].values
y1 = dataset.iloc[:, 3].values
y2 = dataset.iloc[:, 4].values
y1 = np.reshape(y1, (-1, 1))
y2 = np.reshape(y2, (-1, 1))
dataset = dataset.drop(axis=1, columns=["Unnamed: 5"])

#scattermatrix
data1 = go.Splom(dimensions=[
    dict(label='Rotational speed', values=dataset['Rotational speed(RPM)']),
    dict(label='Wield speed', values=dataset['Weilding speed(mm/min)']),
    dict(label='Axial-load', values=dataset['Axial load(kN)']),
    dict(label='Tensile elongation (%)',
         values=dataset['Tensile elongation (%)']),
    dict(label='tensile strength(MPa)',
         values=dataset['ultimate tensile strength(MPa)'])
],
                 marker=dict(color='rgb(255, 8, 0)',
                             size=7,
                             showscale=False,
                             line=dict(width=0.5, color='rgb(280,180,230)')))

axis = dict(showline=True, zeroline=False, gridcolor='#fff', ticklen=4)

layout = go.Layout(title='Friction stir Weilding data set',
                   dragmode='select',
                   width=1000,
                   height=1000,
                   autosize=False,
                   plot_bgcolor='rgba(240,240,240, 0.95)',
Esempio n. 5
0
grouped = data.groupby(['professor', 'lecture'], as_index=False).mean()
text = [grouped.loc[i, 'professor'] for i in range(len(grouped))]

professors = grouped['professor'].drop_duplicates()

profColor = {professors.values[i]: i for i in range(len(professors))}

color_vals = [profColor[c] for c in grouped['professor']]

result = go.Splom(dimensions=[
    dict(label='Lecture', values=grouped['lecture']),
    dict(label='Parti', values=grouped['participants']),
    dict(label='Exp', values=grouped['professional expertise']),
    dict(label='Motive', values=grouped['motivation']),
    dict(label='Present', values=grouped['clear presentation']),
    dict(label='Imp', values=grouped['overall impression'])
],
                  text=text,
                  marker=dict(color=color_vals,
                              size=6,
                              colorscale='Jet',
                              showscale=True,
                              line=dict(width=0.5, color='rgb(230,230,230)')))

result['diagonal'].update(visible=False)

layout = go.Layout(showlegend=True,
                   title=go.layout.Title(text='Scatterplot matrix'))
fig1 = dict(data=[result], layout=layout)
# py.plot(fig1, filename =  "scatterplot matrix")

#converting lecture names to lecture numbers
Esempio n. 6
0
fig.update_layout(margin=dict(t=0, l=0, r=0, b=0))
pyo.plot(fig, filename="sunburst.html")
fig.show()

# In[98]:

df = pd.read_csv(
    'https://raw.githubusercontent.com/plotly/datasets/master/iris-data.csv')
index_vals = df['class'].astype('category').cat.codes

fig = go.Figure(data=go.Splom(dimensions=[
    dict(label='sepal length', values=df['sepal length']),
    dict(label='sepal width', values=df['sepal width']),
    dict(label='petal length', values=df['petal length']),
    dict(label='petal width', values=df['petal width'])
],
                              diagonal_visible=False,
                              text=df['class'],
                              marker=dict(color=index_vals,
                                          showscale=False,
                                          line_color='white',
                                          line_width=0.5)))
fig.update_layout(title='Iris Data set', width=600, height=600)
pyo.plot(fig, filename="scatter_matrix.html")
fig.show()

# In[99]:

x = np.random.uniform(-1, 1, size=500)
y = np.random.uniform(-1, 1, size=500)
fig = go.Figure(go.Histogram2dContour(x=x, y=y, colorscale='Blues'))
pyo.plot(fig, filename="hist_contour_plot.html")
Esempio n. 7
0
                        size=16,
                        color="white"),
                    paper_bgcolor='rgba(0,0,0,0.65)',
                    plot_bgcolor='rgba(0,0,0,1)')

heat_lay1 = go.Figure(data = trace_heat,layout=heat_layout)
#------------------------------------------------------FIGURE2----------------------------------------------------------------------------#
scat_matrix = go.Splom(
                dimensions=[dict(label='teaching',
                                 values=df2016['teaching']),
                            dict(label='research',
                                 values=df2016['research']),
                            dict(label='citations',
                                 values=df2016['citations']),
                            dict(label='income',
                                 values=df2016['income']),
                            dict(label='total_score',
                                 values=df2016['total_score'])     
                                 ],
                text=df2016['world_rank'],
                marker=dict(showscale=False, # colors encode categorical variables
                            # line_color='white', line_width=0.5
                            )
                )

scat_matrix_layout = go.Layout(
                    title="Les columns teaching, research, citations, income total_score sont-elles corrélées?",
                    height=800,
                    font=dict(
                        family="sans serif",
                        size=14,
Esempio n. 8
0
def create_plot_5():
    session = Session(engine)
    df = pd.read_sql(f"select Value, Category, Date from mortality_us",
                     con=session.connection())
    session.close()

    df = df.pivot(index='Date', columns='Category', values='Value')

    ymax = df.max()

    fig = go.Figure()

    fig = go.Figure(data=go.Splom(
        dimensions=[dict(label=c, values=df[c]) for c in df.columns],
        text=df.index,
        marker=dict(color=df.index.astype('int'),
                    size=5,
                    colorscale='Bluered',
                    line=dict(width=0.5, color='rgb(230,230,230)'))))

    # cnt = 1
    # for i in df['Date'].unique():
    #     val = df.query(f'Date == "{i}"')['Value']
    #     fig.add_trace(
    #         go.Scatter(
    #             visible=False,
    #             x=cat, # assign x as the dataframe column 'x'
    #             y=val,
    #             name = i,
    #             mode='markers'
    #         )
    #     )
    #     if cnt == len(df['Date'].unique()):
    #         fig.data[cnt-1].visible = True
    #     else:
    #         cnt += 1

    steps = []
    for i in range(len(fig.data)):
        step = dict(method="restyle",
                    args=["visible", [False] * len(fig.data)],
                    label=fig.data[i]['name'])
        step["args"][1][i] = True  # Toggle i'th trace to "visible"
        steps.append(step)

    sliders = [
        dict(active=10,
             currentvalue={"prefix": "Year: "},
             pad={"t": 50},
             steps=steps)
    ]

    fig.update_layout(sliders=sliders)

    fig.update_yaxes(range=[0, ymax])
    fig.update_layout(title='Scatter Plot Matrix',
                      dragmode='select',
                      width=1000,
                      height=1000,
                      hovermode='closest')

    graphJSON = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)
    #print(graphJSON)
    return graphJSON
Esempio n. 9
0
#         line=dict(
#             color='rgba(217, 217, 217, 0.14)',
#             width=0.5
#         ),opacity=0.8))
# data = [trace1]
# fig = go.Figure(data=data)

pl_colorscale = [[0.0, '#19d3f3'], [0.333, '#19d3f3'], [0.333, '#e763fa'],
                 [0.666, '#e763fa'], [0.666, '#636efa'], [1, '#636efa']]

trace1 = go.Splom(dimensions=[
    dict(label='sepal length', values=df1["A"]),
    dict(label='sepal width', values=df1["B"]),
    dict(label='petal ngth', values=df1["C"]),
    dict(label='petal len', values=df1["D"])
],
                  marker=dict(color="green",
                              size=7,
                              colorscale=pl_colorscale,
                              showscale=False,
                              line=dict(width=0.5, color='red')))

axis = dict(showline=True, zeroline=False, gridcolor='yellow', ticklen=4)

layout = go.Layout(title='Iris Data set',
                   dragmode='select',
                   width=600,
                   height=600,
                   autosize=False,
                   hovermode='closest',
                   plot_bgcolor='lightgrey',
Esempio n. 10
0
def parse_contents(contents, filename, date):
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    pl_colorscale = [[0.0, '#19d3f3'], [0.333, '#19d3f3'], [0.333, '#e763fa'],
                     [0.666, '#e763fa'], [0.666, '#636efa'], [1, '#636efa']]
    axis = dict(showline=True, zeroline=False, gridcolor='#fff', ticklen=4)
    try:
        if 'csv' in filename:
            # Assume that the user uploaded a CSV file
            df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
            output = io.StringIO()
            df.info(buf=output)
            df_info = pd.DataFrame(columns=['Col'],
                                   data=output.getvalue().split('\n'))
            #val =  [ i for i in df_info[2:].Col[2:]]
            df_desc = pd.concat([
                df.describe(include='all').fillna(0).round(),
                df.isnull().sum().to_frame(name='missing').T,
                df.dtypes.to_frame(name='dtype').T.astype(str)
            ]).reset_index()
            #print(df_desc)
            corr_matrix = df.corr().reset_index()
            #print(corr_matrix)
            classes = np.unique(df['ocean_proximity'].values).tolist()
            class_code = {classes[k]: k for k in range(len(classes))}
            color_vals = [
                class_code[cl] for cl in df['ocean_proximity'].astype(str)
            ]
            #text=[df.loc[ k, 'ocean_proximity'] for k in len(df)]
            index_vals = df['ocean_proximity'].astype('category').cat.codes
        elif 'xls' in filename:
            # Assume that the user uploaded an excel file
            df = pd.read_excel(io.BytesIO(decoded))
    except Exception as e:
        print(e)
        return html.Div(['There was an error processing this file.'])
    return html.Div([
        html.H5(filename),
        dash_table.DataTable(
            data=df.to_dict('records'),
            columns=[{
                'name': i,
                'id': i
            } for i in df.columns],
            style_as_list_view=True,
            #style_cell={'padding': '5px'},
            fixed_rows={
                'headers': True,
                'data': 0
            },
            style_cell={'width': '150px'},
            style_header={
                'backgroundColor': 'white',
                'fontWeight': 'bold'
            },
            style_table={
                'maxHeight': '300px',
                #'overflowY': 'scroll',
                'border': 'thin lightgrey solid'
            }),
        html.
        H5("Data Describe : This method shows a summary of the numerical attributes"
           ),
        dash_table.DataTable(
            #data=pd.concat([df.describe(include='all').fillna(0).round().reset_index(),df.isnull().sum().to_frame(name = 'missing').T.reset_index()]).to_dict('records'),
            data=df_desc.to_dict('records'),
            columns=[{
                'name': i,
                'id': i
            } for i in df.describe(include='all').reset_index().columns],
            style_as_list_view=True,
            style_cell={'padding': '15px'},
            fixed_rows={
                'headers': True,
                'data': 0
            },
            fixed_columns={
                'headers': True,
                'data': 1
            },
            style_header={
                'backgroundColor': 'white',
                'fontWeight': 'bold'
            },
            style_table={
                'maxHeight': '300px',
                'maxWidth': '1500px',
                'overflowX': 'scroll',
                'overflowY': 'scroll',
                'border': 'thin lightgrey solid'
            }),
        html.H5("Data Correlation"),
        dash_table.DataTable(
            #data=pd.concat([df.describe(include='all').fillna(0).round().reset_index(),df.isnull().sum().to_frame(name = 'missing').T.reset_index()]).to_dict('records'),
            data=corr_matrix.reset_index().to_dict('records'),
            columns=[{
                'name': i,
                'id': i
            } for i in corr_matrix.reset_index().columns],
            style_as_list_view=True,
            style_cell={'padding': '15px'},
            fixed_rows={
                'headers': True,
                'data': 0
            },
            fixed_columns={
                'headers': True,
                'data': 1
            },
            style_header={
                'backgroundColor': 'white',
                'fontWeight': 'bold'
            },
            style_table={
                'maxHeight': '300px',
                'maxWidth': '1500px',
                'overflowX': 'scroll',
                'overflowY': 'scroll',
                'border': 'thin lightgrey solid'
            }),
        dcc.Graph(
            id='SPloM',
            config={
                'showSendToCloud': True,
                #'plotlyServerURL': 'https://plot.ly'
            },
            figure={
                'data': [
                    go.Scatter(
                        x=df["longitude"],
                        y=df["latitude"],
                        mode='markers',
                    )
                ]
            }),
        dcc.Graph(
            id='SPloM-selectedPoints',
            config={
                'showSendToCloud': True,
                #'plotlyServerURL': 'https://plot.ly'
            },
            figure={
                'data': [
                    go.Splom(
                        dimensions=[
                            dict(label='median_house_value',
                                 values=df['median_house_value']),
                            dict(label='median_income',
                                 values=df['median_income']),
                            dict(label='total_rooms',
                                 values=df['total_rooms']),
                            dict(label='housing_median_age',
                                 values=df['housing_median_age'])
                        ],
                        text=None,
                        #default axes name assignment :
                        #xaxes= ['x1','x2',  'x3'],
                        #yaxes=  ['y1', 'y2', 'y3'],
                        marker=dict(
                            color=index_vals,
                            showscale=
                            False,  # colors encode categorical variables
                            line_color='white',
                            line_width=0.5))
                ],
            },
        ),
        html.Hr()  # horizontal line
        # For debugging, display the raw contents provided by the web browser
    ])
def plot_scatter_matrix(df):
    """
    Plot scatter matrix
    
    Args:
        - df (DataFrame object): Dataframe to be shown
    """
    textd = [
        "Responsible (target=0)" if target == 0 else "Delinquent (target=1)"
        for target in df["SeriousDlqin2yrs"]
    ]

    fig = go.Figure(
        data=go.Splom(
            dimensions=[
                dict(
                    label="RevUtilOfUnsecLines",
                    values=df["RevolvingUtilizationOfUnsecuredLines"],
                ),
                dict(label="age", values=df["age"]),
                dict(
                    label="NTime30-59Days",
                    values=df["NumberOfTime30-59DaysPastDueNotWorse"],
                ),
                dict(label="DebtRatio", values=df["DebtRatio"]),
                dict(label="MonthlyIncome", values=df["MonthlyIncome"]),
                dict(
                    label="NOpenCreditLinesLoans",
                    values=df["NumberOfOpenCreditLinesAndLoans"],
                ),
                dict(label="NTimes90DaysLate", values=df["NumberOfTimes90DaysLate"]),
                dict(
                    label="NRealEstateLoansLines",
                    values=df["NumberRealEstateLoansOrLines"],
                ),
                dict(
                    label="NTime60-89Days",
                    values=df["NumberOfTime60-89DaysPastDueNotWorse"],
                ),
                dict(label="NDepend", values=df["NumberOfDependents"]),
            ],
            marker=dict(
                color=df["SeriousDlqin2yrs"],
                size=5,
                colorscale="Bluered",
                line=dict(width=0.5, color="rgb(230,230,230)"),
            ),
            text=textd,
            diagonal=dict(visible=False),
        )
    )

    fig.update_layout(
        title={
            "text": "Scatterplot Matrix of Dataset",
            "x": 0.5,
            "xanchor": "center",
            "yanchor": "top",
        },
        dragmode="select",
        width=1000,
        height=1000,
        hovermode="closest",
        font=dict(size=7, color="#7f7f7f"),
    )

    fig.show()
# Scatter plot with bubbles
py.offline.plot([go.Scatter(x=df['A'], y=df['B'], mode='markers')],
                filename='data/six.html',
                auto_open=False)

# Scatter plot matrix.
py.offline.plot([
    go.Splom(dimensions=[
        {
            'label': 'A',
            'values': df['A']
        },
        {
            'label': 'B',
            'values': df['B']
        },
        {
            'label': 'C',
            'values': df['C']
        },
        {
            'label': 'D',
            'values': df['D']
        },
    ],
             diagonal=dict(visible=False))
],
                filename='data/seven.html',
                auto_open=False)
Esempio n. 13
0
    def correlation_plot_ly(df, title='mytitle', saveto='./myfile', ylabel='ylabel', xlabel='xlabel'):
        """
        :
        :param df:
        :param title:
        :param saveto:
        :param ylabel:
        :param xlabel:
        :return:
        """
        dimensions = []
        for col in df.columns:
            d1 = {'label': col,
                  'values': df[col]}
            dimensions.append(d1)

        trace1 = go.Splom(dimensions=dimensions, diagonal=dict(visible=False))
	
        t_len = len(trace1['dimensions'])
   
        if t_len > 1:
            trace1['dimensions'][1].update(visible=True)
        if t_len > 2:
            trace1['showupperhalf'] = False
        annotation_list = []
        yaxis_val = 1
        xcounter = 0
        for col1 in df.columns:
            x = df[col1]
            xcounter += 1
            ycounter = 0
            for col2 in df.columns:
                y = df[col2]
                ycounter += 1

                if xcounter == ycounter:
                    continue
                if xcounter < ycounter:
                    continue

                slope, intercept, r_value, p_value, std_err = stats.linregress(df[col1], df[col2])
                # line = slope * df[col1] + intercept
                format_r_value = "<i>{}_vs_{}: R<sup>2</sup>={:02.2f}</i>".format(col1, col2, r_value)
                annot_dict = dict(x=1,
                                  y=yaxis_val,
                                  xref='paper',
                                  yref='paper',
                                  text=format_r_value,
                                  showarrow=False,
                                  font=dict(size=10)
                                  )
                annotation_list.append(annot_dict)
                yaxis_val -= 0.05

        layout = go.Layout(
            title=title,
            dragmode='select',
            width=600,
            height=600,
            autosize=False,
            hovermode='closest',
            plot_bgcolor='rgba(240,240,240, 0.95)',
            annotations=annotation_list
        )

        figure = dict(data=[trace1], layout=layout)
        py.plot(figure, filename=saveto + '.html', auto_open=False, config=PlotData.plotly_conf())

        return None
Esempio n. 14
0
No_Of_Bedrooms

class_code = {No_Of_Bedrooms[k]: k for k in range(len(No_Of_Bedrooms))}
class_code

color_vals = [class_code[cl] for cl in housingData['bedrooms']]

text = [housingData.loc[k, 'bedrooms'] for k in range(len(housingData))]

trace1 = go.Splom(dimensions=[
    dict(label='sqft_lot', values=housingData['sqft_lot']),
    dict(label='sqft_above', values=housingData['sqft_above']),
    dict(label='sqft_basement', values=housingData['sqft_basement']),
    dict(label='price', values=housingData['price']),
    dict(label='sqft_living', values=housingData['sqft_living']),
    dict(label='bedrooms', values=housingData['bedrooms'])
],
                  text=text,
                  marker=dict(color=color_vals,
                              size=7,
                              colorscale='Viridis',
                              showscale=False,
                              line=dict(width=0.5, color='rgb(230,230,230)')))

axis = dict(showline=True, zeroline=False, gridcolor='#fff', ticklen=4)

layout = go.Layout(
    title='House Price Data',
    dragmode='select',
    width=900,
    height=900,
    autosize=False,
Esempio n. 15
0
def plots(year, countries, indicator, projection, continents):
    ############################################First Bar Plot##########################################################
    data_bar1 = []
    df_temp = df.fillna(0.0)
    df_temp = df_temp.replace(0.0, np.nan)

    for continent in continents:
        df_temp = df_temp.loc[(df_temp['Time'] == year)]
        df_temp = df_temp.loc[(df_temp['Continent'] == continent)]
        df_temp = df_temp.nlargest(5, [indicator])
        x_bar = df_temp['Country Name']
        y_bar = df_temp[indicator]

        data_bar1.append(
            dict(type='bar',
                 x=x_bar,
                 y=y_bar,
                 name=str(continent),
                 marker_color='#ffff99',
                 marker_line_width=1.5,
                 marker_line_color='#ffff33'))

    layout_bar1 = dict(title=dict(
        text='<b>Top 5 countries for continent ' + str(continent) + '</b>',
        font=dict(family="Verdana,verdana,sans-serif",
                  color='#f6f6f6',
                  size=23),
        x=0.5,
        y=0.9,
        xanchor='center',
        yanchor='top'),
                       yaxis=dict(title=indicator, type='linear'),
                       xaxis=dict(title='Countries'),
                       font=dict(family="Verdana,verdana,sans-serif",
                                 color='#f6f6f6'),
                       paper_bgcolor='rgba(0,0,0,0)',
                       plot_bgcolor='rgba(0,0,0,0)')

    ############################################Second Bar Plot##########################################################
    data_bar2 = []
    df_temp = df.fillna(0.0)
    df_temp = df_temp.replace(0.0, np.nan)

    for continent in continents:
        df_temp = df_temp.loc[(df_temp['Time'] == year)]
        df_temp = df_temp.loc[(df_temp['Continent'] == continent)]
        df_temp = df_temp.nsmallest(5, [indicator])
        x_bar = df_temp['Country Name']
        y_bar = df_temp[indicator]

        data_bar2.append(
            dict(type='bar',
                 x=x_bar,
                 y=y_bar,
                 name=str(continent),
                 marker_color='#ffff99',
                 marker_line_width=1.5,
                 marker_line_color='#ffff33'))

    layout_bar2 = dict(
        title=dict(text='<b>Bottom 5 countries for continent ' +
                   str(continent) + '</b>',
                   font=dict(family="Verdana,verdana,sans-serif",
                             color='#f6f6f6',
                             size=23),
                   x=0.5,
                   y=0.9,
                   xanchor='center',
                   yanchor='top'),
        yaxis=dict(title=indicator, type='linear'),
        xaxis=dict(title='Countries'),
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        font=dict(family="Verdana,verdana,sans-serif", color='#f6f6f6'),
    )

    #############################################Choropleth######################################################

    df_emission_0 = df.loc[df['Time'] == year]

    z = (df_emission_0[indicator])

    data_choropleth = dict(
        type='choropleth',
        locations=df_emission_0['Country Name'],
        # There are three ways to 'merge' your data with the data pre embedded in the map
        locationmode='country names',
        z=z,
        text=df_emission_0['Country Name'],
        colorscale='sunset',
        colorbar=dict(title='Scale',
                      titlefont=dict(color='#f6f6f6'),
                      tickfont=dict(color='#f6f6f6')),
        hovertemplate='Country: %{text} <br>' + str(indicator) + ': %{z}',
        name='')

    layout_choropleth = dict(
        geo=dict(
            scope='world',  # default
            projection=dict(
                type=['orthographic', 'equirectangular'][projection]),
            # showland=True,   # default = True
            landcolor='LightGrey',
            lakecolor='GhostWhite',
            showocean=True,  # default = False
            oceancolor='#cde7e7',
            bgcolor='rgba(0,0,0,0)',
        ),
        title=dict(text='World ' + str(indicator) +
                   '<br>Choropleth Map on the year ' + str(year),
                   font=dict(family="Verdana,verdana,sans-serif",
                             color='#f6f6f6',
                             size=20),
                   x=0),
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)')

    ############################################## Line Graph ##########################################################
    data_line = []
    for country in countries:
        df_line = df.loc[(df['Country Name'] == country)]
        x_line = df_line['Time']
        y_line = df_line[indicator]

        data_line.append(
            dict(type='scatter',
                 x=x_line,
                 y=y_line,
                 name=country,
                 connectgaps=True))

    layout_line = dict(
        title=dict(text='<b>Country Evolution' + '</b>',
                   font=dict(family="Verdana,verdana,sans-serif",
                             color='#f6f6f6',
                             size=23),
                   x=0.5,
                   y=0.9,
                   xanchor='center',
                   yanchor='top'),
        yaxis=dict(title=indicator, type='linear'),
        xaxis=dict(title='Years'),
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        font=dict(family="Verdana,verdana,sans-serif", color='#f6f6f6'),
    )

    ############################################### Matrix #############################################################

    df_matrix = df
    index_vals = df['Continent'].astype('category').cat.codes

    data_matrix = go.Splom(dimensions=[
        dict(label='Ind.1',
             values=df['Government expenditure on education, total (% of GDP)']
             ),
        dict(label='Ind.2',
             values=df['Labor force, female (% of total labor force)']),
        dict(
            label='Ind.3',
            values=df[
                'Literacy rate, adult female (% of females ages 15 and above)']
        ),
        dict(label='Ind.4',
             values=df['Unemployment, total (% of total labor force)']),
        dict(label='Ind.5',
             values=df[
                 'Literacy rate, adult male (% of males ages 15 and above)']),
        dict(label='Ind.6', values=df['GDP per capita (current US$)'])
    ],
                           text=df['Continent'],
                           marker=dict(
                               color=index_vals,
                               colorscale='sunset',
                               size=5,
                           ))

    layout_matrix = dict(
        title=dict(text='<b>Correlation Matrix' + '</b>',
                   font=dict(family="Verdana,verdana,sans-serif",
                             color='#f6f6f6',
                             size=23),
                   x=0.5,
                   y=0.9,
                   xanchor='center',
                   yanchor='top'),
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(176,176,176,1)',
        font=dict(family="Verdana,verdana,sans-serif", color='#f6f6f6'),
    )

    ################################################ Return ############################################################

    return go.Figure(data=data_bar1, layout=layout_bar1), \
           go.Figure(data=data_choropleth, layout=layout_choropleth), \
           go.Figure(data=data_bar2, layout=layout_bar2), \
           go.Figure(data=data_line, layout=layout_line), \
           go.Figure(data=data_matrix, layout=layout_matrix)
  dict(label="num_timesteps",
       values=[result['params']['num_timesteps'] for result in results]),
  #dict(label="num_iterations_after_valid",
  #     values=[result['params']['num_iterations_after_valid'] for result in results]),
  dict(label="num_iterations",
       values=num_iterations),
  dict(label="cost",
       values=[0 if len(result['cost']) == 0 else result['cost'][-1] for result in results]),
  dict(label="time",
       values=time)]

trace1 = go.Splom(dimensions=dimensions,
                  text=text,
                  marker=dict(#color=time,
                              size=7, # [(30-iteration)/3 for iteration in num_iterations],
                              # colorscale=pl_colorscale,
                              showscale=False,
                              line=dict(width=0.5,
                                        color='rgb(230,230,230)')),
                  showupperhalf=False)

axis = dict(showline=True,
            zeroline=False,
            gridcolor="#fff",
            ticklen=4)
layout = go.Layout(
  title='stomp std dev evaluation',
  dragmode='select',
  width=1500,
  height=1500,
  autosize=True,
Esempio n. 17
0
def Graph (option,k,k_cluster,k_centroid,pl_colorscale,color3,color2,d_cluster,color4,pl_colorscale2,titles):
    #Underneath is an if statement to check to see if option is 1(K-Means) or 2(Density). This will determine from which Clustering method are we getting our data from
    #and thus, which type of clustering results are we showing.
    if option == 1:
        ilen = len(k_cluster)               #Set ilen to lenght of k_cluster
        data = k_cluster                    #Set data to k_cluster
    else:
        ilen = len(d_cluster)               #Set ilen to lenght of d_cluster
        data = d_cluster                    #Set data to d_cluster.
    axis = []                                                                     #Make an empty list for all the axis/dimensions to go into.
    Scatter_dimensions = []                                         #Make an empty list for all the dimensions specifically for the Scatterplot Matrix
    axis, Scatter_dimensions = make_dimension(ilen,data,axis,Scatter_dimensions)        #Run the make_dimensions function to make dimensions for our graph
                                                                                        #based on the data(K-Means Clusters or Density Clusters)
    #------------Plotting time--------------------------------------------------------
    #This is where we graph all the data we got from the dataset. For plotly to graph, you require 3 parameters. First is the data a.k.a the actual graph, a file name and
    #a boolean value that determines to automatically open the figure or not. Plotly offline mode stores the graph as a local HTML file and opens it on your web browser.
    #-----------------Scatterplot Matrix------------------------------------------------------
    # Underneath is where we graph the scatterplot matrix. This follows the syntax of plotly scatterplot matrix function.
    trace = [graph.Splom(                                                                   #Running the function Splom to graph the scatterplot matrix. We need to make
                                                                                            #a list called trace because the module that shows the graph only takes list arguments
        
                dimensions = Scatter_dimensions,                                            #Add the dimension arguments/ axis directories from our make_dimensions function.
                marker=dict(
                    color= 'lightsteelblue',                                                #Set the marker style(the way our dots will look)
                    size=5,                                                                 #Here we set a size for the dots, give them a color
                    showscale=False,
                    line= dict(                                                             #Line will represent the set of data from the y axis. It will be represented by blue.
                                                                                            #The set of corresponding data from the x axis will be light blue.
                        width=0.5,                                                       
                        color='blue'                                                        #Set the line color to blue and the width to 0.5
                               )
                    )
                )
             ]

    #-----------------Parallel Coordinates Graph-----------------------------------
    #Underneath, we make a list called data to store the function that lets us make the parallel coordinates graph. Again, we need a list to graph the data.
    #We also run a check to see which clustering method are we representing. Different methods means using a different colorscale and color assignment.
    if option == 1:
        data = [                                                                            
                    graph.Parcoords(                                                        
                        line = dict(color = color3, colorscale = pl_colorscale),        #We use the colorscale and color argument to make it so that each line will have
                                                                                        #a color identifier between 0 and 1, according to plotly syntax. We then use
                                                                                        #the colorscale variable to match the color identifier with a color. Such as
                                                                                        #all datapoints with the color identifier 0 are going to be white, and all the
                                                                                        #datapoints with the color identifier 1 are going to be black. pl_colorscale is
                                                                                        #our colorscale variable and color3 is the list of identifiers. Each identifier
                                                                                        #matches the index value of it's corresponding point.
                        
                        dimensions = axis                           #The number of dimensions is a list, thus we add our list of axis to the dimensions variable.
                    )                                                                       
                ]
    else:
        data = [                                                                            
                graph.Parcoords(                                                        
                    line = dict(color = color4, colorscale = pl_colorscale2),           #This is the same as the one above, only except this is for Density clusters (d_cluster)
                    dimensions = axis                                   #The number of dimensions is a list, thus we add our list of axis to the dimensions variable.
                )                                                                       
            ]

    #Underneath, we save the figure/graph as a local HTML file, Give the file the name Parallel Cooridinates.html and make the boolean argument auto_open to true. This
    #will allow us to graph offline and open it instantly.
    plotly.offline.plot(data, filename = 'Parallel Coordinates.html',auto_open = True)  #Here, we save the figure/graph and plot it.
    plotly.offline.plot(trace, filename = 'Scatter Plot Matrix.html', auto_open = True)
    print ("Dimension Legend")                  #Since on a Parallel Coordinates and Scatterplot Matrix, giving each individual dimension/axis a title will make the
                                                #graph too congested. Thus we have decided to give a graph a letter based Identifier which we will now display a legend for.
    for m in range (0,ilen):                #Run a loop that goes for how many columns there are.
        identifier = chr(m + 65)            #Make a capital alphabet character for that axis/column. This is the same as the one we have graphed
        print (identifier, ":", titles[0][m])       #print out the letter and it's corresponding axis label/Column title.
    #---------------Scatter Plots------------------------------------------------
    #Underneath, We ask the user if they what 3 axis they want to see. This is an option for the user to see the relationship between three specific axis ans visually see
    #data trends.
    valid_entry = False                                         #valid_entry is a boolean variable that checks for valid entries.
    while valid_entry == False:
        option2 = int(input("Would you like to see a scatterplot of certain dimensions of the data? \n (1)Yes         (2) No\n"))   #option2 will contain the user entered
                                                                                                                                    #input.
        if option2 == 1:                                                            #If option2 is 1(Yes)
            valid_entry = True                                                      #Set valid_entry to True 
            check = False                                                           #check is another boolean variable that we use to check for a valid entry on our
                                                                                    #second input.
            while check == False:                                   #Run a troubleshooting loop that will run as long as check is False
                print ("(1) 2D  (2)3D")                             #dim_option will store the user input. dim_option is a contraction of dimension option as we are
                                                                    #picking between 2 dimensional and 3 dimensional.
                dim_option = int(input(" "))
                print ("**Please use the character identifier from the legend above, not the actual axis title")
                if (dim_option == 1) or (dim_option == 2):                          #We run this check to see if the user has picked an actual option.
                    x_option = ((ord(input("Enter the x axis: "))) - 65)            #Here we ask the user for an axis that will be plotted on the x dimension.
                                                                                    #We ask for the letter identifier.
                    y_option = ((ord(input("Enter the y axis: "))) - 65)            #Ask for y dimension
                    
                    if dim_option == 2:                                         #If the user had asked for 3 dimensions, then we ask for z.
                        z_option = ((ord(input("Enter the z axis: ")))- 65)            #ask for Z dimension    
                    check = True                            #Set check to true(End loop)
                else:
                    print ("Invalid Entry")                 #Not picking a valid option will result in an invalid input message.                        
        elif option2 == 2:                      #If the user has picked option 2(No)
            valid_entry = True                               #Set valid_entry to True (end loop)
            dim_option= 3                               #dim_option is now 3
        else:
            print ("Invalid Entry\n")       #Improper value entered.
    if dim_option != 3:     #As long as dim_option isn't 3.
        trace = []                                                  #Make an empty list called trace.
        if dim_option == 1:                             #if the user picked 2 dimensions
            if option == 1:                         #Check if K-Means was the clustering method used.
                trace.append(                       #Append into the list trace.
                    graph.Scatter(                  #Run the function Scatter from plotly to make the 2D scatterplot.
                        x = k_cluster[x_option],        #Set the x axis to have the values from k_cluster based on the axis picked by the user
                        y = k_cluster[y_option],        #Set the y axis values from k_cluster
                        mode = "markers",               #Set the mode to markers which will plot only dots
                        marker = dict(              #Make a dictionary for marker
                            size = 10,          #make the size of each datapoint 10
                            color = color3,         #Run the colorscaling
                            colorscale = pl_colorscale
                            )))
                trace.append(                   #Append the results from another Scatter function for the centroids.
                    graph.Scatter(              
                        x = k_centroid[x_option],       #Pick the x axis from k_centroids list
                        y = k_centroid[y_option],       #Pick the y_axis from k_centroids list
                        mode = "markers",           #Set the mode to markers(dots)
                        marker = dict(          #Make a dictionary for these marker points as well
                            size = 13,          #To distinguish between centroids and datapoints, the centroids will be larger than all the datapoints.
                            color = color2,         #Use the colorscaling for centroids.
                            colorscale = pl_colorscale
                            )))
            else:
                trace.append(               #If K-means is not the clustering method that was picked, then it is obviously Density.
                    graph.Scatter(          #Run a Scatter Funtion for Density. It is the same for Density as it is for K-Means
                        x = d_cluster[x_option],
                        y = d_cluster[y_option],
                        mode = "markers",
                        marker = dict(
                            size = 10,
                            color = color4,
                            colorscale = pl_colorscale2
                            )))
            layout = graph.Layout(                  #make a layout using the layout function
                xaxis = dict(                   #Give the x axis it's corresponding title from titles
                    title= titles[0][x_option]),
                yaxis = dict(                   #Give the y axis it's corresponding title from titles.
                    title=titles[0][y_option]))
        else:                           #If option2 is 2(User has picked 3d)
            if option == 1:             #Check to see if the Clustering method was K-Means
                trace.append(           #Append into trace the results from the follwoing function
                    graph.Scatter3d(            #Run the function Scatter3d. We use Scatter3d to make 3D scatterplots in Plotly
                        #Set the Dimensions and the points underneath
                        x = k_cluster[x_option],
                        y = k_cluster[y_option],
                        z = k_cluster[z_option],
                        mode = "markers",
                        marker = dict(
                            size = 10,
                            #Colorscaling, same as what was described for 2D Scatterplot
                            color = color3,
                            colorscale = pl_colorscale
                            )))
                trace.append(
                    graph.Scatter3d(                        #Append in a second element in trace to graph the centroids as a scatterplot.
                        #Set the dimensions
                        x = k_centroid[x_option],
                        y = k_centroid[y_option],
                        z = k_centroid[z_option],
                        mode = "markers",
                        marker = dict(
                            size = 13,                          #As shown above, centroids will be larger than other datapoints to distinguish them
                            color = color2,
                            colorscale = pl_colorscale
                            )))
            else:
                trace.append(
                    graph.Scatter3d(                        #Run the Scatter3d function to make a scatterplot of clusters made by Density
                        x = d_cluster[x_option],
                        y = d_cluster[y_option],
                        z = d_cluster[z_option],
                        mode = "markers",
                        marker = dict(
                            size = 10,
                            color = color4,
                            colorscale = pl_colorscale2
                            )))
        #Underneath, we make a layout for the 3D Scatterplot to give each axis a label. We make a dictionary for each axis and then run the title argument to give it
        #an actual title from titles list.
            layout = graph.Layout(
                            scene = dict(               #Make a scene. This is essential for 3D graphs because Plotly.
                            xaxis = dict(
                                title = titles[0][x_option]),       #Give the x-axis a title
                            yaxis = dict(
                                title = titles[0][y_option]),       #Give the y-axis a title
                            zaxis = dict(
                                title = titles[0][z_option]),)      #Give the z-axis a title
                          )
        fig = graph.Figure(trace,layout)
        plotly.offline.plot(fig, filename = "Scatter Plot.html", auto_open = True)  #Graph the 2D/3D Scatterplot
Esempio n. 18
0
'''
        Here, plot a diabetic 
        Reference: https://plotly.com/python/splom/#

'''

import plotly as py
import plotly.graph_objs as go
import pandas as pd

# ============================DATA==========================================
df = pd.read_csv("data/diabetes.csv")

# create a list with '0' as non-diabetic & '1' as diabetic
text_d = ['non-diabetic' if cl=0 else 'diabetic' for cl in df['Outcome']]

fig = go.Figure(data=go.Splom(
                        dimensions= [
                            dict(label='Pregnancies', values=df['Pregnancies']),
                            dict(label='Glucose', values=df['BloodPressure']),
                            dict(label='SkinThickness', values=df['SkinThickness']),
                            dict(label='Insulin', values=df['Insulin']),
                            dict(label='BMI', values=df['BMI']),
                            dict(label='DiabetesPedigreeFunction', values=df['DiabetesPedigreeFunction']),
                            dict(label='Age', values=df['Age'])],

    ))



Esempio n. 19
0
               [0.666, '#636efa'],
               [1, '#636efa']]

text=[df.loc[ k, 'class'] for k in range(len(df))]


trace1 = go.Splom(dimensions=[dict(label='sepal length',
                                 values=df['sepal length']),
                            dict(label='sepal width',
                                 values=df['sepal width']),
                            dict(label='petal length',
                                 values=df['petal length']),
                            dict(label='petal width',
                                 values=df['petal width'])],
                text=text,
                #default axes name assignment :
                #xaxes= ['x1','x2',  'x3'],
                #yaxes=  ['y1', 'y2', 'y3'], 
                marker=dict(color=color_vals,
                            size=7,
                            colorscale=pl_colorscale,
                            showscale=False,
                            line=dict(width=0.5,
                                      color='rgb(230,230,230)'))
                )

axis = dict(showline=True,
          zeroline=False,
          gridcolor='#fff',
          ticklen=4)
Esempio n. 20
0
    [0.888, '#0dea0b'],
    [0.888, '#0dea0b'],
    [0.999, '#00ff68'],
    [1, '#00ff68']
]

# 6
text = [df.loc[k, 'score'] for k in range(len(df))]

# 7
trace1 = go.Splom(dimensions=[
    dict(label='new', values=df['new']),
    dict(label='resolved', values=df['resolved']),
    dict(label='unresolved', values=df['unresolved']),
    dict(label='insertions', values=df['insertions'])
],
                  text=text,
                  marker=dict(color=color_vals,
                              size=7,
                              colorscale=pl_colorscale,
                              showscale=False,
                              line=dict(width=0.5, color='rgb(230,230,230)')))

# 8
axis = dict(showline=True, zeroline=False, gridcolor='#fff', ticklen=4)

layout = go.Layout(title='Iris Data set',
                   dragmode='select',
                   width=600,
                   height=600,
                   autosize=False,
                   hovermode='closest',