Python violin Beispiele

Programmiersprache: Python

Namespace / Paketname: plotly.express

Methode / Funktion: violin

Beispiele auf hotexamples.com: 30

Python violin - 30 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die plotly.express.violin, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: app.py Projekt: dsc-iem/AI-Hacktoberfest

            fig = ff.create_distplot(hist_data, group_labels)
            st.plotly_chart(fig)

        if plot_type2 == "Histogram of Area":
            fig = pd.DataFrame(df["area"]).iplot(kind="histogram",
                                                 bins=40,
                                                 theme="white",
                                                 title="Histogram of area",
                                                 xTitle='area',
                                                 yTitle='Count',
                                                 asFigure=True)

            st.plotly_chart(fig)

        if plot_type2 == "Violin Plot of Area":
            fig = px.violin(df, y="area", box=True, points='all')

            st.plotly_chart(fig)

if red == "About the Project":
    # st.text("hello world")
    components.html("""
    <div style="background-color:#ff0055;padding:10px">
    <h1 style="color:white;text-align:center;">About the Project</h1>
    </div>
    
    """)
    img_top = """<center><img src="https://i.imgur.com/yOS7IGv.png" width="700px"></center>"""
    st.markdown(img_top, unsafe_allow_html=True)
    topic = """

Beispiel #2

Datei anzeigen

data.append(trace)

fig = go.Figure(data=data, layout=layout)

fig.show()
# -

# ### Violin Plot

# +
import plotly.express as px

fig = px.violin(
    df_features_targets,
    y="g_o",
    x="effective_ox_state",
    box=True,
    points="all",
)

# +
layout = go.Layout(
    xaxis=go.layout.XAxis(title=dict(text="Ir Effective Oxidation State", ), ),
    yaxis=go.layout.YAxis(title=dict(text="ΔG<sub>O</sub>", ), ),
)

tmp = fig.update_layout(layout)

tmp = fig.update_layout(layout_shared)
# -

Beispiel #3

Datei anzeigen

Datei: challenge_solution.py Projekt: shuvro-baset/Dash-by-Plotly

print(df[:10][[
    'Year', 'Period', 'State', 'Affected by', 'Percent of Colonies Impacted'
]])

#--------------------------------------------------------------------------------
# Build the violin/box plot

violinfig = px.violin(
    data_frame=df.query("State == ['{}','{}']".format('TEXAS', 'IDAHO')),
    x="Affected by",
    y="Percent of Colonies Impacted",
    orientation="v",
    points='all',
    box=True,
    color='State',
    color_discrete_map={
        "TEXAS": "limegreen",
        "IDAHO": "red"
    },
    hover_data=['Period'],
    labels={"State": "STATE"},
    title='What is killing our Bees',
    width=1400,
    height=600,
    template='plotly_dark',
)

violinfig.update_traces(meanline_visible=True, meanline_color='blue')

pio.show(violinfig)

Beispiel #4

Datei anzeigen

def mainFunc(request):

    #윤진씨
    studentData = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/student.csv',
        encoding='cp949')
    #     print(studentData.head(3))
    #     studentData.성적.value_counts()

    #     plot_div = sns.countplot(x='부모의학교만족도',data = studentData, hue='성적',palette='bright')
    #     fig = plt.gcf()
    #     fig.savefig('C:/work/py_sou/chartdb/mychart/static/image/chart.png', dpi=fig.dpi)
    #     df = studentData.groupby(['성적','부모의학교만족도'])
    print(studentData.groupby(['성적'])['부모의학교만족도'].size())

    fig = go.Figure(data=[
        go.Bar(name='Good',
               x=['H', 'L', 'M'],
               y=studentData[studentData['부모의학교만족도'] == 'Good'].groupby(
                   ['성적', '부모의학교만족도']).size(),
               marker_color='#9bb1d6'),
        go.Bar(name='Bad',
               x=['H', 'L', 'M'],
               y=studentData[studentData['부모의학교만족도'] == 'Bad'].groupby(
                   ['성적', '부모의학교만족도']).size(),
               marker_color='#a39bd6'),
    ])
    # Change the bar mode
    fig = fig.update_layout(barmode='group',
                            width=600,
                            height=600,
                            xaxis_title='성적',
                            yaxis_title='합계',
                            plot_bgcolor='rgba(0,0,0,0)',
                            paper_bgcolor='rgba(255,255,255,0)',
                            font=dict(family='Courier New, monospace',
                                      color='#fff',
                                      size=18))  #데이터를 그룹화하여 표에 적용

    #     fig = px.bar(studentData, x='성적', y='부모의학교만족도', barmode='group',height=400)

    plot_div = plot(fig, output_type='div')

    #전공별 비율
    fig2 = go.Figure(data=[
        go.Pie(labels=[
            'Arabic', 'Biology', 'Chemistry', 'English', 'French', 'Geology',
            'History', 'IT', 'Math', 'Quran', 'Science', 'Spanish'
        ],
               values=studentData.groupby(['전공']).size(),
               textinfo='label+percent',
               insidetextorientation='radial')
    ])
    fig2 = fig2.update_layout(width=550,
                              height=500,
                              paper_bgcolor='rgba(255,255,255,0)',
                              plot_bgcolor='rgba(0,0,0,0)',
                              font=dict(family='Courier New, monospace',
                                        color='#fff',
                                        size=18),
                              showlegend=False)
    pie_div = plot(fig2, output_type='div')
    #     print('룰루랄ㄹ라\n',studentData.groupby(['전공']).size())
    #전공별 성적 비교----의미가 있는지 없는지 확인받기
    majors = [
        'Arabic', 'Biology', 'Chemistry', 'English', 'French', 'Geology',
        'History', 'IT', 'Math', 'Quran', 'Science', 'Spanish'
    ]

    fig3 = go.Figure()
    fig3.add_trace(
        go.Scatter(
            x=studentData[studentData['성적'] == 'H'].groupby(['전공']).size(),
            y=majors,
            marker=dict(color="crimson", size=12),
            mode="markers",
            name="High",
        ))

    fig3.add_trace(
        go.Scatter(
            x=studentData[studentData['성적'] == 'M'].groupby(['전공']).size(),
            y=majors,
            marker=dict(color="gold", size=12),
            mode="markers",
            name="Middle",
        ))

    fig3.add_trace(
        go.Scatter(
            x=studentData[studentData['성적'] == 'L'].groupby(['전공']).size(),
            y=majors,
            marker=dict(color="black", size=12),
            mode="markers",
            name="Low",
        ))

    fig3.update_layout(xaxis_title="학생수",
                       yaxis_title="전공",
                       width=700,
                       height=500,
                       paper_bgcolor='rgba(255,255,255,0)',
                       plot_bgcolor='rgba(0,0,0,0)',
                       font=dict(family='Courier New, monospace',
                                 color='#fff',
                                 size=18))

    last_div = plot(fig3, output_type='div')

    fig4 = go.Figure(data=[
        go.Bar(name='7일이하',
               x=['H', 'L', 'M'],
               y=studentData[studentData['결석일수'] == 'Under-7'].groupby(
                   ['성적']).size(),
               text=studentData[studentData['결석일수'] == 'Under-7'].groupby(
                   ['성적']).size(),
               textposition='auto',
               marker_color='rgb(204,153,153)'),
        go.Bar(name='7일이상',
               x=['H', 'L', 'M'],
               y=studentData[studentData['결석일수'] == 'Above-7'].groupby(
                   ['성적']).size(),
               text=studentData[studentData['결석일수'] == 'Above-7'].groupby(
                   ['성적']).size(),
               textposition='auto',
               marker_color='rgb(255,204,204)')
    ])
    print(studentData[studentData['결석일수'] == 'Above-7'].groupby(['성적']).size())
    fig4.update_layout(xaxis_title="성적",
                       yaxis_title="학생수",
                       width=600,
                       height=600,
                       paper_bgcolor='rgba(255,255,255,0)',
                       plot_bgcolor='rgba(0,0,0,0)',
                       font=dict(family='Courier New, monospace',
                                 color='#fff',
                                 size=18))

    ab_plot = plot(fig4, output_type='div')

    fig5 = px.scatter_matrix(studentData,
                             dimensions=["발표수", "과정반복수", "새공지사항확인수", "토론참여수"],
                             color="성적",
                             width=1200,
                             height=1000)
    fig5.update_layout(width=800,
                       height=800,
                       paper_bgcolor='rgba(255,255,255,0)',
                       plot_bgcolor='rgba(0,0,0,0)',
                       font=dict(family='Courier New, monospace',
                                 color='#fff',
                                 size=18))

    plot5_div = plot(fig5, output_type='div')

    #찬규씨
    plt.clf()
    data = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/student.csv',
        encoding='euc-kr')
    data['성적'] = data['성적'].map({'H': 2, 'M': 1, 'L': 0})

    # 국가별 성적 상.중.하 인원 분포도

    df = pd.DataFrame({"국적": data['국적'], "성적": data['성적']})
    df7 = pd.crosstab(df.성적, df.국적, margins=True)
    #     print(df7.columns)

    for i in df7.columns:
        df7[i] = df7[i].values / df7.loc['All', i] * 100
    # result = df7['Egypt'].values / df7.loc['All', 'Egypt'] * 100
    # print(result)
    df7 = df7.drop(['All'])
    fig8 = go.Figure(data=[
        go.Bar(name='H',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df7.iloc[0, :17].values,
               marker_color='rgb(152,105,247)'),
        go.Bar(name='M',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df7.iloc[1, :17].values,
               marker_color='rgb(247,152,105)'),
        go.Bar(name='L',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df7.iloc[2, :17].values,
               marker_color='rgb(105,247,152)'),
    ])
    # Change the bar mode
    fig8.update_layout(barmode='stack',
                       width=900,
                       paper_bgcolor='rgba(255,255,255,0)',
                       plot_bgcolor='rgba(0,0,0,0)',
                       font=dict(family='Courier New, monospace',
                                 color='#fff',
                                 size=18))
    plot10_div = plot(fig8, output_type='div')

    #--------- 국가별 성별 비율 그래프

    df1 = pd.DataFrame({"국적": data['국적'], "성별": data['성별']})
    print(df1)
    df8 = pd.crosstab(df1.성별, df1.국적, margins=True)
    print(df8)
    df8 = df8.drop(['All'])
    fig50 = go.Figure(data=[
        go.Bar(name='M',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[0, :17].values,
               marker_color='rgb(247,105,200)'),
        go.Bar(name='F',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[1, :17].values,
               marker_color='rgb(105,200,247)'),
    ])
    # Change the bar mode
    fig50.update_layout(barmode='stack',
                        width=900,
                        paper_bgcolor='rgba(255,255,255,0)',
                        plot_bgcolor='rgba(0,0,0,0)',
                        font=dict(family='Courier New, monospace',
                                  color='#fff',
                                  size=18))
    cg_graph = plot(fig50, output_type='div')

    fig9 = px.violin(data,
                     y="과정반복수",
                     x="결석일수",
                     color="성별",
                     box=True,
                     points="all",
                     hover_data=df.columns)
    fig9.update_layout(width=1300)
    fig9.update_layout(height=300)
    plot11_div = plot(fig9, output_type='div')

    # 3---1-1-1-1-1-1-1-1-1-1-1-1-1-1-1=-1--1-1-1-1-1-1--

    df1 = pd.DataFrame({"결석일수": data['결석일수'], "새공지사항확인수": data['새공지사항확인수']})
    print(df1)
    df8 = pd.crosstab(df1.결석일수, df1.새공지사항확인수, margins=True)
    print(df8)
    df8 = df8.drop(['All'])
    fig50 = go.Figure(data=[
        go.Bar(name='Under-7',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[0, :17].values),
        go.Bar(name='Above-7',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[1, :17].values),
    ])
    # Change the bar mode
    fig50.update_layout(xaxis_title='국적', yaxis_title='새공지사항확인수')
    fig50.update_layout(width=700)
    fig50.update_layout(height=250)
    cg_graph7 = plot(fig50, output_type='div')

    df1 = pd.DataFrame({"결석일수": data['결석일수'], "토론참여수": data['토론참여수']})
    print(df1)
    df8 = pd.crosstab(df1.결석일수, df1.토론참여수, margins=True)
    print(df8)
    df8 = df8.drop(['All'])
    fig51 = go.Figure(data=[
        go.Bar(name='Under-7',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[0, :17].values),
        go.Bar(name='Above-7',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[1, :17].values),
    ])
    # Change the bar mode
    fig51.update_layout(xaxis_title='국적', yaxis_title='토론참여수')
    fig51.update_layout(width=700)
    fig51.update_layout(height=250)
    cg_graph71 = plot(fig51, output_type='div')

    df1 = pd.DataFrame({"결석일수": data['결석일수'], "발표수": data['발표수']})
    print(df1)
    df8 = pd.crosstab(df1.결석일수, df1.발표수, margins=True)
    print(df8)
    df8 = df8.drop(['All'])
    fig52 = go.Figure(data=[
        go.Bar(name='Under-7',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[0, :17].values),
        go.Bar(name='Above-7',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[1, :17].values),
    ])
    # Change the bar mode
    fig52.update_layout(xaxis_title='국적', yaxis_title='발표수')
    fig52.update_layout(width=700)
    fig52.update_layout(height=250)
    cg_graph72 = plot(fig52, output_type='div')

    # 33333 3 3  3 3 - - - - - - -2-2-2- 2- 2- 22 2 2  2 222  3D model
    data = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/student.csv',
        encoding='euc-kr')
    data['결석일수'] = data['결석일수'].map({'Under-7': 0, 'Above-7': 1})
    data['성별'] = data['성별'].map({'M': 0, 'F': 1})
    data['성적'] = data['성적'].map({'L': 0, 'M': 1, 'H': 2})

    fig54 = px.scatter_3d(data[:480],
                          x='발표수',
                          y='토론참여수',
                          z='새공지사항확인수',
                          color='결석일수',
                          size_max=5,
                          symbol='성별')

    # tight layout
    fig54.update_layout(margin=dict(l=0, r=0, b=0, t=0))
    fig54.update_traces(opacity=1,
                        marker=dict(showscale=True, reversescale=True, cmid=6))
    fig54.update_layout(width=800)
    fig54.update_layout(height=700)
    fig54.update_layout(plot_bgcolor='rgba(255,255,255,0.5)')

    cg_graph74 = plot(fig54, output_type='div')

    # 랜덤포레스트 사용
    # 모델 평가 생성
    data = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/student.csv',
        encoding='euc-kr')

    df2 = pd.DataFrame({
        "결석일수": data['결석일수'],
        "발표수": data['발표수'],
        "새공지사항확인수": data['새공지사항확인수'],
        "토론참여수": data['토론참여수']
    })
    df2['결석일수'] = data['결석일수'].map({'Under-7': 0, 'Above-7': 1})

    x = df2[['발표수', '토론참여수', '새공지사항확인수']].values  # 2차원
    y = df2[['결석일수']].values  # 1차원

    # 여기서부터 분류 예측 모델별 생성 후 정확도 분석
    # 모델 평가 생성 - > 정확도 분석

    # 랜덤포레스트
    new_x = [[0, 20, 10]]

    model = RandomForestRegressor(n_estimators=1000, criterion='mse').fit(x, y)
    modelflatten = model.predict(x)[:10]

    a = model.predict(x)
    xh = np.where(a.flatten() > 0.5, 1, 0)
    # print(xh.shape)
    # print(y.shape)
    #     print('RandomForestRegressor : ' , accuracy_score(y,xh))
    #     print('RandomForestRegressor 새로운 값으로 예측 : ' ,model.predict(new_x))
    #     print('===========================================================')

    # LinearRegression
    model1 = LinearRegression().fit(x, y)
    model1flatten = model1.predict(x)[:10]
    #     print('예측값 ', np.where(model1flatten.flatten() > 0.5 , 1, 0))
    #     print('실제값 : ', y[:10].ravel())
    a = model1.predict(x)
    xh = np.where(a.flatten() > 0.5, 1, 0)
    resultLinearRegression = accuracy_score(y, xh)
    #     print('LinearRegression : ' , resultLinearRegression)
    #     print('LinearRegression 새로운 값으로 예측 : ' ,model.predict(new_x))
    #     print('===========================================================')
    #

    # KNeighborsRegressor
    model2 = KNeighborsRegressor(n_neighbors=3).fit(x, y)
    model2flatten = model2.predict(x)[:10]
    #     print('예측값 ', np.where(model2flatten.flatten() > 0.5 , 1, 0))
    #     print('실제값 : ', y[:10].ravel())
    a = model2.predict(x)
    xh = np.where(a.flatten() > 0.5, 1, 0)
    resultKNeighborsRegressor = accuracy_score(y, xh)
    #     print('KNeighborsRegressor : ' , accuracy_score(y,xh))
    #     print('KNeighborsRegressor 새로운 값으로 예측 : ' ,model.predict(new_x))
    #     print('===========================================================')
    #

    # XGboost 96%의 확률
    model3 = XGBRegressor(n_estimators=100).fit(x, y)
    model3flatten = model3.predict(x)[:10]
    #     print('예측값 ', np.where(model3flatten.flatten() > 0.5 , 1, 0))
    #     print('실제값 : ', y[:10].ravel())
    a = model3.predict(x)
    xh = np.where(a.flatten() > 0.5, 1, 0)
    resultXGboost = accuracy_score(y, xh)
    #     print('XGboost : ' , accuracy_score(y,xh))
    #     print('XGboost 새로운 값으로 예측 : ' ,model.predict(new_x))
    #     print('===========================================================')
    #     # = = = = == = = = = = = = 모델별 시각화
    import plotly.figure_factory as ff

    text = [['모델명', '예측값', '실제값', '정확도'],
            [
                'RandomForestRegressor',
                np.where(modelflatten.flatten() > 0.5, 1, 0), y[:10].ravel(),
                accuracy_score(y, xh)
            ],
            [
                'LinearRegression',
                np.where(model1flatten.flatten() > 0.5, 1, 0), y[:10].ravel(),
                resultLinearRegression
            ],
            [
                'KNeighborsRegressor',
                np.where(model2flatten.flatten() > 0.5, 1, 0), y[:10].ravel(),
                resultKNeighborsRegressor
            ],
            [
                'XGboost',
                np.where(model3flatten.flatten() > 0.5, 1, 0), y[:10].ravel(),
                resultXGboost
            ]]

    colorscale = [[0, '#272D31'], [.5, '#ffffff'], [1, '#ffffff']]
    font = ['#FCFCFC', 'red', 'black', 'black', 'red']

    fig51 = ff.create_table(text, colorscale=colorscale, font_colors=font)
    fig51.layout.width = 1300
    cg_chart = plot(fig51, output_type='div')

    # =================================

    #경석씨
    df = data
    #     print(data.columns)
    # LabelEncoder
    le = LabelEncoder()
    # apply "le.fit_transform"
    df = df.apply(le.fit_transform)
    # 성적 순으로 숫자를 재배치
    df.loc[df['성적'] == 0, '성적'] = 3
    df.loc[df['성적'] == 2, '성적'] = 2
    df.loc[df['성적'] == 1, '성적'] = 1

    #     print(df['성적'].head())
    # #     print(df)
    #     print(df['발표수'].head(20))
    #     print(np.corrcoef(df['발표수'], df['국적']))
    #     print(df.corr())
    #     피어슨의 상관계수는 일반적으로,
    #     값이 -1.0 ~ -0.7 이면, 강한 음적 상관관계
    #     값이 -0.7 ~ -0.3 이면, 뚜렷한 음적 상관관계
    #     값이 -0.3 ~ -0.1 이면, 약한 음적 상관관계
    #     값이 -0.1 ~ +0.1 이면, 없다고 할 수 있는 상관관계
    #     값이 +0.1 ~ +0.3 이면, 약한 양적 상관관계
    #     값이 +0.3 ~ +0.7 이면, 뚜렷한 양적 상관관계
    #     값이 +0.7 ~ +1.0 이면, 강한 양적 상관관계로 해석됩니다.

    fig20 = px.imshow(df.corr(),
                      x=[
                          '성별', '국적', '출생지', '교육단계', '학년', '학급', '전공', '학기',
                          '담당부모', '발표수', '과정반복수', '새공지사항확인수', '토론참여수',
                          '부모의학교만족도', '결석일수', '성적'
                      ],
                      y=[
                          '성별', '국적', '출생지', '교육단계', '학년', '학급', '전공', '학기',
                          '담당부모', '발표수', '과정반복수', '새공지사항확인수', '토론참여수',
                          '부모의학교만족도', '결석일수', '성적'
                      ],
                      width=1000,
                      height=900,
                      color_continuous_scale='RdBu_r')
    fig20.update_layout(width=800,
                        height=800,
                        paper_bgcolor='rgba(255,255,255,0)',
                        plot_bgcolor='rgba(0,0,0,0)',
                        font=dict(family='Courier New, monospace',
                                  color='#fff',
                                  size=18))
    plot20_div = plot(fig20, output_type='div')
    #     print(studentData[studentData['성적']=='M'].groupby(['전공']).size())

    #--------------------------------------

    #박윤호
    data = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/education.csv',
        encoding='euc-kr')

    #     print(data)
    label = LabelEncoder()
    Cat_Colums = data.dtypes.pipe(
        lambda Features: Features[Features == 'object']).index

    for col in Cat_Colums:
        data[col] = label.fit_transform(data[col])

    x = data.drop('성적', axis=1)
    y = data['성적']

    X_train, X_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=52)

    model = XGBClassifier(max_depth=10,
                          learning_rate=0.1,
                          n_estimators=100,
                          seed=10)
    fit_model = model.fit(X_train, y_train)

    pred = fit_model.predict(X_train)
    #     model.save('yh_xgboost.hdf5')

    #     model = tf.keras.models.load_model('yh_xgboost.hdf5')
    #yh_acc = accuracy_score(y_train, y_test)
    #print('예측값 : ', pred[:5])
    #print('실제값 : ', np.array(test_y[:5]))

    #print('분류 정확도 : ', accuracy_score(test_y, pred))
    feature_important = model.get_booster().get_score(importance_type='weight')
    keys = list(feature_important.keys())
    values = list(feature_important.values())

    #print('특성 중요도 :\n{}'.format(model.feature_importances_))

    yh_fig1 = go.Figure(
        go.Bar(
            x=values,
            y=keys,
            marker=dict(color='#F88137'),
            name='성적과 관련된 중요도 그래프',
            orientation='h',
        ))

    yh_fig1.update_layout(legend=dict(x=0.029, y=1.038, font_size=10),
                          margin=dict(l=100, r=20, t=70, b=70),
                          paper_bgcolor='rgba(255,255,255,0.2)',
                          plot_bgcolor='rgba(255,255,255,0.2)',
                          font=dict(color='#000', size=30),
                          height=600)

    yh_fig1.update_layout(yaxis={'categoryorder': 'total ascending'})

    yh_grap1 = plot(yh_fig1, output_type='div')

    data = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/education.csv',
        encoding='euc-kr')
    yh_fig2 = px.scatter(data,
                         x="발표수",
                         y="토론참여수",
                         color="성적",
                         size='과정반복수',
                         hover_data=['토론참여수'])
    yh_fig2.update_layout(
        paper_bgcolor='rgba(255,255,255,0.2)',
        plot_bgcolor='rgba(255,255,255,0.2)',
        font=dict(color='#000', size=30),
        height=550,
        width=1400,
    )
    yh_grap2 = plot(yh_fig2, output_type='div')

    data = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/student.csv',
        encoding='euc-kr')
    data['성적'] = data['성적'].map({'H': 2, 'M': 1, 'L': 0})
    fig = px.scatter_3d(data,
                        x='발표수',
                        y='토론참여수',
                        z='과정반복수',
                        color='성적',
                        opacity=0.7)
    # tight layout
    fig.update_layout(margin=dict(l=0, r=0, b=0, t=0), width=600, height=700)
    yh_3D = plot(fig, output_type='div')

    #--------------------------------------------------------------

    #----------------------------------------------------------------------------
    #경석이형

    return render(
        request, 'full.html', {
            'yh_grap1': yh_grap1,
            'yh_grap2': yh_grap2,
            'yh_3D': yh_3D,
            'cg_graph74': cg_graph74,
            'cg_graph72': cg_graph72,
            'cg_graph71': cg_graph71,
            'cg_graph7': cg_graph7,
            'yj_grap1': plot_div,
            'yj_pie': pie_div,
            'yj_grap2': last_div,
            'yj_grap3': ab_plot,
            'yj_scatter': plot5_div,
            'cg_graph1': plot10_div,
            'cg_graph2': plot11_div,
            'cg_graph3': cg_graph,
            'cg_chart': cg_chart,
            'heatmap': plot20_div
        })

Beispiel #5

Datei anzeigen

Datei: 06_StripBoxViolin.py Projekt: martindwyer/DataScienceResources

import plotly.express as px

df = px.data.tips()

fig = px.strip(df, x="total_bill", y="time", orientation="h", color="smoker")
fig.show()

fig = px.box(df, x="day", y="total_bill", color="smoker", notched=True)
fig.show()

fig = px.violin(df,
                y="tip",
                x="smoker",
                color="sex",
                box=True,
                points="all",
                hover_data=df.columns)
fig.show()

Beispiel #6

Datei anzeigen

    def make_grouped_plot(
        self,
        ensembles: list,
        parameters: List[Any],
        plot_type: str = "distribution",
    ) -> go.Figure:
        """Create subplots for selected parameters"""
        df = self.dataframe_melted.copy()
        df = df[df["ENSEMBLE"].isin(ensembles)]
        df = df[df["PARAMETER"].isin(parameters)]
        df = self._sort_parameters_col(df, parameters)

        if plot_type == "distribution":
            fig = (px.violin(
                df,
                x="VALUE",
                facet_col="PARAMETER",
                facet_col_wrap=min(
                    min([
                        x for x in range(100)
                        if (x * (x + 1)) >= len(parameters)
                    ]),
                    20,
                ),
                facet_row_spacing=max((0.08 - (0.00071 * len(parameters))),
                                      0.03),
                color="ENSEMBLE",
                color_discrete_sequence=self.colorway,
                custom_data=["PARAMETER"],
            ).update_xaxes(
                matches=None,
                fixedrange=True,
                title=None,
                showticklabels=len(parameters) <= 100,
                tickangle=0,
                tickfont_size=max((18 - (0.4 * len(parameters))), 10),
            ).update_yaxes(
                showticklabels=False).for_each_trace(lambda t: t.update(
                    y0=0,
                    hoveron="violins",
                    hoverinfo="none",
                    meanline_visible=True,
                    orientation="h",
                    side="positive",
                    width=2,
                    points=False,
                )).for_each_annotation(lambda a: a.update(
                    text=(a.text.split("=")[-1]),
                    visible=len(parameters) <= 42,
                    font_size=max((18 - (0.4 * len(parameters))), 10),
                )))

        # Create invisible boxes used for hoverinfo on the violin plots
        # Necessary due to https://github.com/plotly/plotly.js/issues/2145
        ensembles = df["ENSEMBLE"].unique()
        hovertraces = []
        for trace in fig["data"]:
            parameter = trace["customdata"][0][0]
            # check of parameter value to determine print formatter
            value = abs(
                self.get_stat_value(parameter, ensembles[0],
                                    stat_column="Avg"))
            form = ".1f" if value > 10 else ".2g"
            hovertraces.append(
                go.Scatter(
                    x=[min(trace.x),
                       min(trace.x),
                       max(trace.x),
                       max(trace.x)],
                    y=[0, 1, 1, 0],
                    xaxis=trace.xaxis,
                    yaxis=trace.yaxis,
                    mode="lines",
                    fill="toself",
                    opacity=0,
                    showlegend=False,
                    text=(f"<b>{parameter}</b><br>" + "<br>".join(
                        f"<b>{ens}:</b><br>"
                        "Avg: "
                        f"{self.get_stat_value(parameter, ens, stat_column='Avg'):{form}}<br>"
                        "Std: "
                        f"{self.get_stat_value(parameter, ens, stat_column='Stddev'):{form}}"
                        for ens in ensembles)),
                    hoverinfo="text",
                    hoverlabel=dict(bgcolor="#E6FAEC",
                                    font=dict(color="#243746", size=15)),
                ))
        fig = fig.to_dict()
        fig["data"].extend(hovertraces)
        fig["layout"] = self.theme.create_themed_layout(fig["layout"])
        fig["layout"].update(paper_bgcolor="white", plot_bgcolor="white")
        return fig

Beispiel #7

Datei anzeigen

Datei: Листинг 29.py Projekt: ekaterinafilinav/metod_plotly_soiro2

import json

import pandas as pd
import plotly
import plotly.express as px

file = 'задание 14.xls'

xl = pd.read_excel(file)
fig4 = px.violin(xl,
                 x='предмет',
                 y='балл',
                 color="округ",
                 template='presentation',
                 points='outliers',
                 box=True)

fig4.show()
fig4.write_html('file.html')
print(plotly.io.to_json(fig4))

with open('file.json', 'w', encoding='utf-8') as f:
    json.dump(fig4,
              f,
              cls=plotly.utils.PlotlyJSONEncoder,
              ensure_ascii=False,
              indent=4)

Beispiel #8

Datei anzeigen

Datei: reports.py Projekt: embo-press/matchpub

 def generate_report(self):
     grouped = self.all_rejects[[
         'journal', 'count'
     ]].groupby("journal").count()  # journal becomes the index
     if not self.selected_external_journal_names:
         external_jou_names = grouped.sort_values(by='count',
                                                  ascending=False)
         self.selected_external_journal_names = list(
             external_jou_names[:self.n_top].index)
     external_journals = self.found[self.found['journal'].isin(
         self.selected_external_journal_names)].copy()
     my_journal = self.found[self.found['decision'] == 'accepted'].copy()
     virtual_journal = pd.concat([external_journals, my_journal])
     external_journals['category'] = 'Assemblage'
     virtual_journal['category'] = 'Cuvee'
     my_journal['category'] = 'Grand Cru'
     fig = px.violin(
         pd.concat([external_journals, virtual_journal, my_journal]),
         y="citations",
         x="category",
         category_orders={"category": ['Grand Cru', 'Cuvee', 'Assemblage']},
         # color_discrete_sequence=px.colors.qualitative.G10,
         color_discrete_map={
             "Grand Cru": "lime",
             "Cuvee": "red",
             'Assemblage': "orange"
         },
         points="all",
         title=f"Citation distributions",
         color="category",
         template="plotly_dark",
         hover_name="journal",
         hover_data={
             "citations": False,
             "decision": False,
             "category": False,
             "doi": True,
             "retrieved_title": True,
             "original_title": True,
             "manuscript_nm": True,
         })
     fig.update_traces(
         marker={
             "opacity": 0.4,
             "size": 5,
             "symbol":
             'circle-open'  # https://plotly.com/python/marker-style/#custom-marker-symbols
         },
         jitter=0.6,
         meanline_visible=True,
     )
     fig.update_layout(height=800)
     fig.update_xaxes(tickfont_size=24)
     fig.update_yaxes(tickfont_size=24)
     # fig.add_annotation(
     #     x=1.1, y=0.95, xref="paper", yref="paper",
     #     text=f"Average number<br>of transfers:<br>{avg_transfer:.2f}",
     #     showarrow=False,
     #     font={"size": 12}
     # )
     return fig

Beispiel #9

Datei anzeigen

fig.write_html(os.path.join(dir_name, "histogram_histfunc.html"))

tips = px.data.tips()
fig = px.strip(tips, x="total_bill", y="time", orientation="h", color="smoker")
fig.write_html(os.path.join(dir_name, "strip.html"))

tips = px.data.tips()
fig = px.box(tips, x="day", y="total_bill", color="smoker", notched=True)
fig.write_html(os.path.join(dir_name, "box.html"))

tips = px.data.tips()
fig = px.violin(
    tips,
    y="tip",
    x="smoker",
    color="sex",
    box=True,
    points="all",
    hover_data=tips.columns,
)
fig.write_html(os.path.join(dir_name, "violin.html"))

# #### Ternary Coordinates

election = px.data.election()
fig = px.scatter_ternary(
    election,
    a="Joly",
    b="Coderre",
    c="Bergeron",
    color="winner",

Beispiel #10

Datei anzeigen

Datei: app.py Projekt: aliciacisnerosm/airbnb-data-viz

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.read_csv('airbnb_NYC_2019.csv')
fig = px.scatter_mapbox(df,
                        lat="latitude",
                        lon="longitude",
                        color="neighbourhood_group",
                        zoom=9)
fig.update_layout(mapbox_style="carto-positron")
figSunburst = px.sunburst(df,
                          path=['neighbourhood_group', 'room_type'],
                          values='price')
room_histogram = px.histogram(df, x="room_type", color="neighbourhood_group")
violin_fig = px.violin(df, y="price", x='neighbourhood_group', box=True)
treemap_fig = px.treemap(
    df,
    path=[px.Constant('nyc'), 'neighbourhood_group', 'neighbourhood'],
    values='price',
    hover_data=['neighbourhood'])

app.layout = html.Div([
    html.H1(children='Airbnb NYC',
            style={
                'font-family': 'Helvetica',
                'textAlign': 'center'
            }),
    html.Div([
        html.Label('Select a neighborhood', style={'font-family':
                                                   'Helvetica'}),

Beispiel #11

Datei anzeigen

def main():
    st.title("Speech Emotion Recognition")
    st.sidebar.markdown("### Use the menu to navigate on the site")
    img = Image.open("images/emotion3.jpg")
    with st.sidebar:
        st.image(img, width=300)

    menu = ["Emotion recognition", "Dataset description", "Our team", "Leave feedback"]
    choice = st.sidebar.selectbox("Menu", menu)

    if choice == "Emotion recognition":
        audio_file = st.file_uploader("Upload audio file", type=['wav'])
        if st.button('Record'):
            with st.sidebar.spinner(f'Recording for 5 seconds ....'):
                st.sidebar.write("Recording...")
                time.sleep(3)
            st.sidebar.success("Recording completed")



        if audio_file is not None:
            st.markdown("## Analyzing...")
            st.sidebar.subheader("Audio file")
            file_details = {"Filename": audio_file.name, "FileSize": audio_file.size}
            st.sidebar.write(file_details)
            st.audio(audio_file, format='audio/wav', start_time=0)

            st.sidebar.markdown("### Settings:")
            show_more_labels = st.sidebar.checkbox("Show prediction for 7 emotions")
            show_mel = st.sidebar.checkbox("Show Mel-spec model prediction")
            show_gender = st.sidebar.checkbox("Show gender prediction")

            path = os.path.join("audio", audio_file.name)
            save_audio(audio_file)

            # extract features
            wav, sr = librosa.load(path, sr=44100)
            Xdb = get_melspec(path)[1]

            fig, ax = plt.subplots(1, 2, figsize=(12, 4), sharex=True)
            fig.set_facecolor('#d1d1e0')

            plt.subplot(211)
            plt.title("Wave-form")
            librosa.display.waveplot(wav, sr=sr)
            plt.gca().axes.get_yaxis().set_visible(False)
            plt.gca().axes.get_xaxis().set_visible(False)
            plt.gca().axes.spines["right"].set_visible(False)
            plt.gca().axes.spines["left"].set_visible(False)
            plt.gca().axes.spines["top"].set_visible(False)
            plt.gca().axes.spines["bottom"].set_visible(False)
            plt.gca().axes.set_facecolor('#d1d1e0')

            plt.subplot(212)
            plt.title("Mel-log-spectrogram")
            librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
            plt.gca().axes.get_yaxis().set_visible(False)
            plt.gca().axes.spines["right"].set_visible(False)
            plt.gca().axes.spines["left"].set_visible(False)
            plt.gca().axes.spines["top"].set_visible(False)
            st.write(fig)

            st.markdown("## Getting the result...")

            # mfccs model results
            with st.spinner('Wait for it...'):
                mfccs = get_mfccs(path, model.input_shape[-1])
                mfccs = mfccs.reshape(1, *mfccs.shape)
                pred = model.predict(mfccs)[0]
                txt = "MFCCs\n" + get_title(pred)
                fig = plt.figure(figsize=(10, 4))
                plot_emotions(data6=pred, fig=fig, title=txt)
                st.write(fig)

            if show_more_labels:
                with st.spinner('Wait for it...'):
                    model_ = load_model("model4.h5")
                    mfccs_ = get_mfccs(path, model_.input_shape[-2])
                    mfccs_ = mfccs_.T.reshape(1, *mfccs_.T.shape)
                    pred = model_.predict(mfccs_)[0]
                    txt = "MFCCs\n" + get_title(pred, CAT7)
                    fig = plt.figure(figsize=(10, 4))
                    plot_polar(fig, predictions=pred, categories=CAT7, title=txt)
                    st.write(fig)

            if show_gender:
                with st.spinner('Wait for it...'):
                    gmodel = load_model("model_mw.h5")
                    gmfccs = get_mfccs(path, gmodel.input_shape[-1])
                    gmfccs = gmfccs.reshape(1, *gmfccs.shape)
                    gpred = gmodel.predict(gmfccs)[0]
                    gdict = [["female","woman.png"], ["male","man.png"]]
                    ind = gpred.argmax()
                    txt = "Predicted gender: " + gdict[ind][0]
                    st.subheader(txt)
                    img = Image.open("images/"+ gdict[ind][1])
                    st.image(img, width=300)

            if show_mel:
                #################################################################################
                st.subheader("This section was disabled")
                st.write("Since we are currently using a free tier instance of AWS, "
                         "we are not going to deploy this model.\n\n"
                         "If you want to try it we recommend to clone our GitHub repo")
                link = '[GitHub](https://github.com/CyberMaryVer/speech-emotion-webapp)'
                st.markdown(link, unsafe_allow_html=True)

                st.write("After that, just uncomment this section in the main file "
                         "to use the mel-spectrograms model:")
                code = '''
                # tmodel = load_model("tmodel_all.h5")
                #
                # # mel-spec model results
                # mel = get_melspec(path)[0]
                # mel = mel.reshape(1, *mel.shape)
                # tpred = tmodel.predict(mel)[0]
                # txt = "Mel-spectrograms" + get_title(tpred)
                # fig = plt.figure(figsize=(10, 4))
                # plot_emotions(data6=tpred, fig=fig, title=txt)
                # st.write(fig)'''
                st.code(code, language='python')
                #################################################################################

                ############## Uncomment this section below to enable the model #################
                # tmodel = load_model("tmodel_all.h5")
                #
                # # mel-spec model results
                # mel = get_melspec(path)[0]
                # mel = mel.reshape(1, *mel.shape)
                # tpred = tmodel.predict(mel)[0]
                # txt = "Mel-spectrograms\n" + get_title(tpred)
                # fig = plt.figure(figsize=(10, 4))
                # plot_emotions(data6=tpred, fig=fig, title=txt)
                # st.write(fig)
                #################################################################################

    elif choice == "Dataset description":
        st.subheader("Dataset analysis")
        link = '[GitHub](https://github.com/talbaram3192/Emotion_Recognition)'
        st.markdown(link, unsafe_allow_html=True)

        df = pd.read_csv("df_audio.csv")
        fig = px.violin(df, y="source", x="emotion4", color="actors", box=True, points="all", hover_data=df.columns)
        st.plotly_chart(fig, use_container_width=True)
        # st.write(df.source.value_counts())
        # st.write(df.actors.value_counts())
        # st.write(df.emotion4.value_counts())

    elif choice == "Our team":
        st.subheader("Our team")
        st.info("*****@*****.**")
        st.info("*****@*****.**")
        st.info("*****@*****.**")
        st.balloons()

    else:
        st.subheader("Leave feedback")
        user_input = st.text_area("Your feedback is greatly appreciated")
        user_name = st.selectbox("Choose your personality", ["checker1","checker2","checker3","checker4"])
        if st.button("Submit"):
            log_file(user_name + " " + user_input)
            st.success(f"Message\n\"\"\"{user_input}\"\"\"\nwas sent")
            thankimg = Image.open("images/sticky.png")
            st.image(thankimg)

Beispiel #12

Datei anzeigen

def main():
    start_t = datetime.now()
    # Input dataset
    dataset = datasets.load_diabetes()
    X = dataset.data
    y = dataset.target

    for idx, column in enumerate(X.T):
        feature_name = dataset.feature_names[idx]
        predictor = statsmodels.api.add_constant(column)

        # Continuous predictor, X, and continuous response, y
        if y.dtype == np.number and X.dtype == np.number:
            linear_regression_model = statsmodels.api.OLS(y, predictor)
            linear_regression_model_fitted = linear_regression_model.fit()
            print(f"Variable: {feature_name}")
            print(linear_regression_model_fitted.summary())
            # Get the stats
            t_value = round(linear_regression_model_fitted.tvalues[1], 6)
            p_value = "{:.6e}".format(
                linear_regression_model_fitted.pvalues[1])
            # Plot the figure to a local html file
            fig = px.scatter(x=column, y=y, trendline="ols")
            fig2 = px.histogram(x=column, nbins=20)
            # This is not working, giving NaN
            _, bins = np.histogram(column, bins=20)
            bin_means = pd.Series(column).groupby(pd.cut(column, bins)).mean()
            Sumallll = sum(column)
            pop_avg = Sumallll / all(column)
            Diff = 1 / 20 * sum(bin_means - pop_avg)**2
            fig.update_layout(
                title=
                f"Variable:{feature_name}:(t-value={t_value})(p-value={p_value})",
                xaxis_title=f"Variable: {feature_name}",
                yaxis_title="y",
            )
            fig2.update_layout(
                title=f"Variable:{feature_name}:Mean difference = {Diff}",
                xaxis_title=f"Variable: {feature_name}",
                yaxis_title="count",
            )
            with open("./p_graph.html", "a") as f:
                f.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))
                f.write(fig2.to_html(full_html=False, include_plotlyjs="cdn"))

        # Categorical predictor, X, and continuous response, y
        elif y.dtype == np.number and X.dtype != np.number:
            logistic_regression_model = LogisticRegression(
                random_state=1234).fit(predictor, y)
            print(f"Variable: {feature_name} Fit Score")
            print(logistic_regression_model.score(predictor, y))
            score = logistic_regression_model.score(predictor, y)
            # Plot the Figure to a local html file
            fig = px.violin(x=y, y=column)
            fig.update_layout(
                title=
                f"Variable:{feature_name},Logistic Regression Fit Score={score}",
                xaxis_title=f"Variable: {feature_name}",
                yaxis_title="y",
            )
            with open("./p_graph.html", "a") as f:
                f.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))

        # Continuous predictor, X, and categorical response, y
        elif y.dtype != np.number and X.dtype == np.number:
            logistic_regression_model = LogisticRegression(
                random_state=1234).fit(predictor, y)
            print(f"Variable: {feature_name} Fit Score")
            print(logistic_regression_model.score(predictor, y))
            score = logistic_regression_model.score(predictor, y)
            # Plot the Figure to a local html file
            fig = px.violin(x=y, y=column)
            fig.update_layout(
                title=
                f"Variable: {feature_name},Logistic Regression Fit Score={score}",
                xaxis_title=f"Variable: {feature_name}",
                yaxis_title="y",
            )
            with open("./p_graph.html", "a") as f:
                f.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))

        else:
            # Categorical response, X, categorical response, y
            logistic_regression_model = LogisticRegression(
                random_state=1234).fit(predictor, y)
            print(f"Variable: {feature_name} Fit Score")
            print(logistic_regression_model.score(predictor, y))
            score = logistic_regression_model.score(predictor, y)
            # Plot the Figure to a local html file
            fig = px.violin(x=y, y=column)
            fig.update_layout(
                title=
                f"Variable:{feature_name},Logistic Regression Fit Score={score}",
                xaxis_title=f"Variable: {feature_name}",
                yaxis_title="y",
            )
            with open("./p_graph.html", "a") as f:
                f.write(fig.to_html(full_html=False, include_plotlyjs="cdn"))

    # Continuous predictor, X, and continuous response, y
    if y.dtype == np.number and X.dtype == np.number:
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=0)
        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)
        regressor = RandomForestRegressor(n_estimators=200, random_state=0)
        regressor.fit(X_train, y_train)
        y_pred = regressor.predict(X_test)
        print("******Random forest regression performance*******")
        print("Mean Absolute Error:",
              metrics.mean_absolute_error(y_test, y_pred))
        print("Mean Squared Error:",
              metrics.mean_squared_error(y_test, y_pred))
        print(
            "Root Mean Squared Error:",
            np.sqrt(metrics.mean_squared_error(y_test, y_pred)),
        )
        importances = regressor.feature_importances_
        indices = np.argsort(importances)[::-1]
        print("*****Feature ranking:*****")
        for f in range(X.shape[1]):
            print("%d. feature %d (%f)" %
                  (f + 1, indices[f], importances[indices[f]]))

    # Categorical predictor, X, and continuous response, y
    elif y.dtype == np.number and X.dtype != np.number:
        rf = RandomForestClassifier(max_depth=2, random_state=0)
        rf.fit(X, y)
        importances = rf.feature_importances_
        indices = np.argsort(importances)[::-1]
        print("*****Feature ranking:*****")
        for f in range(X.shape[1]):
            print("%d. feature %d (%f)" %
                  (f + 1, indices[f], importances[indices[f]]))

    # Continuous predictor, X, and categorical response, y
    elif y.dtype != np.number and X.dtype == np.number:
        rf = RandomForestClassifier(max_depth=2, random_state=0)
        rf.fit(X, y)
        importances = rf.feature_importances_
        indices = np.argsort(importances)[::-1]
        print("*****Feature ranking:*****")
        for f in range(X.shape[1]):
            print("%d. feature %d (%f)" %
                  (f + 1, indices[f], importances[indices[f]]))

    else:
        # Categorical response, X, categorical response, y
        rf = RandomForestClassifier(max_depth=2, random_state=0)
        rf.fit(X, y)
        importances = rf.feature_importances_
        indices = np.argsort(importances)[::-1]
        print("*****Feature ranking:*****")
        for f in range(X.shape[1]):
            print("%d. feature %d (%f)" %
                  (f + 1, indices[f], importances[indices[f]]))

    print(f" {(datetime.now() - start_t)} seconds")

Beispiel #13

Datei anzeigen

Datei: eda_utils.py Projekt: open-data-science/ods_channel_stats_eda

 def plot_age(self):
     top_k_countries = self.countries_to_plot["Country"][:5]
     df_top_countries = self.df.loc[self.df['Country'].isin(top_k_countries)].copy()
     df_top_countries_by_age = df_top_countries.sort_values(by="Age", ascending=True)
     return px.violin(df_top_countries_by_age, y="Age", x="Country", title="Relationship between countries and age",
                      hover_data=self.df.columns)

Beispiel #14

Datei anzeigen

Datei: Project.py Projekt: harshitpaliwal-07/Streaming-Services-Content-Comparison

# ### Plotting
# #### (1) To get the Service with the most content.

# In[9]:

tv_shows_combined.groupby('StreamingOn').Title.count().plot(kind='bar')

# #### (2) Violin charts to gauge the content rating(IMDB) and freshness (Rotten Tomatoes) accross all the streaming service.

# In[10]:

figure = []
figure.append(
    px.violin(tv_shows_both_ratings,
              x='StreamingOn',
              y='IMDb',
              color='StreamingOn'))
figure.append(
    px.violin(tv_shows_both_ratings,
              x='StreamingOn',
              y='Rotten Tomatoes',
              color='StreamingOn'))
fig = make_subplots(rows=2, cols=4, shared_yaxes=True)

for i in range(2):
    for j in range(4):
        fig.add_trace(figure[i]['data'][j], row=i + 1, col=j + 1)

fig.update_layout(autosize=False, width=800, height=800)
fig.show()

Beispiel #15

Datei anzeigen

# Correlation matrix
st.subheader("Feature correlation")
st.write("Correlation matrix that shows the features that have highest \
    linear correlation with metallicity. Each quantity is calculated purely \
    based on the crystal composition.")
corr = data.corr().abs()
top_5 = corr["is_metal"].sort_values(ascending=False).index[:5]
corr = data[top_5].corr().abs()
corr_fig = px.imshow(corr)
st.plotly_chart(corr_fig)

# Feature relationships
top_3 = corr["is_metal"].sort_values(ascending=False).index[1:4]
for idx, col in enumerate(top_3):
    box_fig = px.violin(data,
                        x="Metal vs Nonmetal",
                        y=col,
                        box=True,
                        color="Metal vs Nonmetal")
    st.plotly_chart(box_fig)

# Pivot tables
st.subheader("Pivot tables")
st.write("Pivot tables help us understand the statistical relationships "
         "between a target variable and other descriptors of a sample.")
table = pd.pivot_table(data,
                       index="Metal vs Nonmetal",
                       values=top_3,
                       aggfunc=[np.mean, np.median, np.var])
st.write(table)

Beispiel #16

Datei anzeigen

Datei: m5_comp_submission.py Projekt: Sandy4321/Kaggle_M5_Competition

df = pd.melt(sales, id_vars=['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'], var_name='d', value_name='sold').dropna()

## Merge calendar and prices dataframes into df
df = pd.merge(df, calendar, on='d', how='left')
df = pd.merge(df, prices, on=['store_id','item_id','wm_yr_wk'], how='left') 


## EXPLORATORY DATA ANALYSIS ## 
## Check average sale price for each item
## the groupby function, groups together specific columns and column values. Here I specify that I want the
## average sell price to be from each state, store and item
group_price_store = df.groupby(['state_id','store_id','item_id'],as_index=False)['sell_price'].mean().dropna()

## Using plotly express, plot the prices vs the store. Display different colours for the state and use the hover
## property to identify which item is present
fig = px.violin(group_price_store, x='store_id', color='state_id', y='sell_price',box=True, hover_name='item_id')
fig.update_xaxes(title_text='Store')
fig.update_yaxes(title_text='Selling Price($)')
fig.update_layout(template='seaborn',title='Distribution of Items prices wrt Stores',legend_title_text='State')
fig.show()


## FEATURE ENGINEERING ## 
## Label encoding
## Store the different categories found in each column with their corresponding codes
d_id = dict(zip(df.id.cat.codes, df.id))
d_item_id = dict(zip(df.item_id.cat.codes, df.item_id))
d_dept_id = dict(zip(df.dept_id.cat.codes, df.dept_id))
d_cat_id = dict(zip(df.cat_id.cat.codes, df.cat_id))
d_store_id = dict(zip(df.store_id.cat.codes, df.store_id))
d_state_id = dict(zip(df.state_id.cat.codes, df.state_id))

Beispiel #17

Datei anzeigen

Datei: filter_view.py Projekt: gfnarvaezh/ds4a_project_team73

def test_update_graph(df_2019, var_to_filter, filter, var_to_see, score_to_see):
    df = filter_function(df_2019, var_to_filter, filter)
    df.sort_values(var_to_see, inplace=True)
    return px.violin(df, x=var_to_see, y=score_to_see, box=True)

Beispiel #18

Datei anzeigen

Datei: AnalysisHelpers.py Projekt: mberk06/SeniorThesis

    def violinPlot(self, df, yName, title=None):
        """Create and show a violin plot"""

        # create violin plot
        fig = px.violin(df, y=yName)
        fig.show()

Beispiel #19

Datei anzeigen

Datei: team_3_nlp_model.py Projekt: rahulgadekar070/NLP-Abusive-Non-Abusive-classifier-project

email.drop(['Unnamed: 0', 'filename', 'Message-ID'],axis=1,inplace=True)

email.groupby('Class').describe().T

email['label'] = email['Class'].map({'Non Abusive': 0, 'Abusive': 1})     # add 'label' column

sns.set_style('whitegrid')
sns.countplot(email['Class'])
plt.title('Distribution of Abusive and Non_Abusive')

# Make a new column to show the length of content messages
email['length'] = email['content'].apply(len)

import plotly.express as px
fig = px.violin(data_frame=email, y="length", points="all", color="Class", 
                width=800, height=600)
fig.show()
# it shows median length of Abusive mail = 591.5 & Non_aAbusive = 735

colors = ['#ff9999','#66b3ff']
email["Class"].value_counts().plot(kind = 'pie',colors = colors ,explode = (0.1,0),autopct = '%1.1f%%')

from collections import Counter
count1 = Counter(" ".join(email[email['Class']=='Non Abusive']["content"]).split()).most_common(30)
df1 = pd.DataFrame.from_dict(count1)
df1 = df1.rename(columns={0: "words in Non-Abusive", 1 : "count"})

count2 = Counter(" ".join(email[email['Class']=='Abusive']["content"]).split()).most_common(30)
df2 = pd.DataFrame.from_dict(count2)
df2 = df2.rename(columns={0: "words in Abusive", 1 : "count_"})

Beispiel #20

Datei anzeigen

def main():
    st.sidebar.title('O que você gostaria de ver?')
    opcao = ['Análise Estatística', 'Gráficos', 'Referências']
    choice = st.sidebar.radio(' ', opcao)

    st.sidebar.title('Data')
    st.sidebar.info("""
        Caso não tenha um arquivo csv por perto, você pode baixar por aqui:\n
        [Titanic](http://kopasite.net/up/1/titanic.csv)\n
        [Iris](http://kopasite.net/up/0/iris.csv)\n
        [Tips](http://kopasite.net/up/0/tips.csv)\n
        """)
    st.sidebar.title('Créditos')
    st.sidebar.info("""
    Feito por Raul.\n
    [github](https://github.com/taikutsu91)
    [linkedin](https://www.linkedin.com/in/raul-avelino-959b16149)\n
    Duvidas ou informações:
    [email protected] ou 
    [email protected]\n
    Para mais informações do projeto ver Referências.
    """)
    if choice == 'Análise Estatística':
        st.header('Análise Estatística')
        st.subheader("""
        Para começar carregue um arquivo csv.
        """)
        data = st.file_uploader(' ', type=['csv'])
        if data is not None:
            df = pd.read_csv(data)
            st.write(df.head())
            aux = pd.DataFrame({'colunas': df.columns, 'tipos': df.dtypes})
            col_num = list(aux[aux['tipos'] != 'object']['colunas'])
            col_obj = list(aux[aux['tipos'] == 'object']['colunas'])
            colunas = list(df.columns)
            hue = df.columns[df.nunique() < 10].values.tolist()

            if st.checkbox('Tamanho DataFrame'):
                st.write('Número de linhas', df.shape[0])
                st.write('Número de colunas', df.shape[1])

            if st.checkbox('Mostrar colunas'):
                colunas = list(df.columns)
                st.write(colunas)

            if st.checkbox('Mostrar tipos das colunas'):
                st.write(df.dtypes)

            if st.checkbox('Mostrar Sumário'):
                st.write(df.describe())

            if st.checkbox('Mostrar % valores nulos'):
                st.write(df.isnull().mean() * 100)

            if st.checkbox('Escolha uma coluna númerica'):
                coluna_num = st.selectbox('Coluna', col_num)
                st.write('Média', df[coluna_num].mean())
                st.write('Moda', df[coluna_num].mode()[0])
                st.write('Desvio Padrão', df[coluna_num].std())
                st.write('Maior Valor', df[coluna_num].max())
                st.write('Menor Valor', df[coluna_num].min())
                st.write('% datos faltantes',
                         (df[coluna_num].isnull().mean() * 100))
                st.write('Valores únicos na coluna', df[coluna_num].nunique())

    elif choice == 'Gráficos':
        st.header('Visualização dos Dados.')
        st.subheader('Para começar carregue um arquivo csv.')
        st.info(""" Alguns gráficos terão uma opção para selecionar a cor.
                A opção cor vai determinar qual coluna no dataframe deve ser usada para codificação de cores, 
                adicionando o parâmetro cor, indica para o gráfico que você deseja colorir os dados de maneira diferente com base na coluna selecionada.
                """)
        data = st.file_uploader(' ', type=['csv'], key='graficos')
        if data is not None:
            df = pd.read_csv(data)
            st.write(df.head())
            aux = pd.DataFrame({'colunas': df.columns, 'tipos': df.dtypes})
            col_num = list(aux[aux['tipos'] != 'object']['colunas'])
            col_obj = list(aux[aux['tipos'] == 'object']['colunas'])
            colunas = list(df.columns)
            hue = df.columns[df.nunique() < 10].values.tolist()

            st.info("""
            Caso seu dataframe seja muito grande, você pode selecionar um sample do mesmo,
             escolhendo qual a porcentagem dos dados que serão usadas.
            """)
            pct = st.slider("Sample size % :", int(0), int(100), int(100))
            frame = df.sample(frac=(pct / 100))

            if st.checkbox('Tamanho do DataFrame'):
                st.write('Número de linhas', frame.shape[0])
                st.write('Número de colunas', frame.shape[1])

            if st.checkbox('Scatter Plot'):

                eixo_x = st.selectbox('Selecione Eixo X',
                                      col_num,
                                      key='unique')
                eixo_y = st.selectbox('Selecione Eixo Y',
                                      col_num,
                                      key='unique')
                cor = st.selectbox('Cor', hue, key='unique')
                fig = px.scatter(frame,
                                 x=eixo_x,
                                 y=eixo_y,
                                 color=cor,
                                 template=cmap,
                                 title=f'Scatter Plot {eixo_x} x {eixo_y}')
                st.plotly_chart(fig, use_container_width=True)

            if st.checkbox('Histogram'):
                st.info("""
                Caso o parâmetro cor tenha algum dado faltante o mesmo será preenchido,
                 automaticamente com a moda da coluna selecionada.
                """)
                coluna_num = st.selectbox('Selecione Eixo X',
                                          col_num,
                                          key='coluna_num')
                color = st.selectbox('Cor', hue, key='color')
                fig1 = px.histogram(frame,
                                    x=coluna_num,
                                    color=frame[color].fillna(
                                        frame[color].mode()),
                                    histfunc='sum',
                                    title=f'Histogram {coluna_num}')
                st.plotly_chart(fig1, use_container_width=True)

            if st.checkbox('Counts'):
                st.info("""
                Este gráfico retorna, os valores únicos do dataframe em porcetagem,
                com base na coluna selecionada.
                
                """)
                value = st.selectbox('Selecione uma coluna',
                                     colunas,
                                     key='value')
                plot_value = frame[value].value_counts(normalize=True)
                fig = plot_value.plot(kind='bar',
                                      title=f'Count Coluna: {value}')
                st.plotly_chart(fig, use_container_width=True)

            if st.checkbox('Bar Plot'):
                cols = st.selectbox('Selecione uma coluna',
                                    colunas,
                                    key='cols')
                target = st.selectbox('Cor', hue, key='target')
                gb = pd.crosstab(frame[cols], frame[target])
                fig = gb.plot(kind='bar', title=f'Bar Plot {cols} x {target}')
                st.plotly_chart(fig, use_container_width=True)

            if st.checkbox('Violin Plot'):
                coluna_num1 = st.selectbox('Selecione o Eixo X',
                                           col_obj,
                                           key='coluna_num1')
                coluna_x1 = st.selectbox('Selecione o Eixo Y',
                                         col_num,
                                         key='coluna_x1')
                color1 = st.selectbox('Cor', hue, key='color1')
                fig = px.violin(
                    frame,
                    x=coluna_num1,
                    y=coluna_x1,
                    color=color1,
                    title=f'Violet Plot {coluna_num1} x {coluna_x1}')
                st.plotly_chart(fig, use_container_width=True)

            if st.checkbox('Box Plot'):
                coluna_num2 = st.selectbox('Selecione o Eixo X',
                                           col_obj,
                                           key='coluna_num2')
                coluna_x2 = st.selectbox('Selecione o Eixo Y',
                                         col_num,
                                         key='coluna_x2')
                color2 = st.selectbox('Cor', hue, key='color2')
                fig = px.box(frame,
                             x=coluna_num2,
                             y=coluna_x2,
                             color=color2,
                             title=f'Box Plot {coluna_num2} x {coluna_x2}')
                st.plotly_chart(fig, use_container_width=True)

            if st.checkbox('Line Plot'):
                st.info("""
                Line Plot é normalmanete usado para medir variações atráves do tempo,
                caso seu dataframe não tenha uma coluna que seja datetime , não é recomendado o uso do Line Plot,
                use Scatter Plot ou Bar Plot.
                """)
                eixo_x2 = st.selectbox('Selecione o Eixo X',
                                       colunas,
                                       key='eixo_x2')
                eixo_y2 = st.selectbox('Selecione o Eixo Y',
                                       colunas,
                                       key='eixo_y2')
                fig = frame.plot(kind='line',
                                 x=eixo_x2,
                                 y=eixo_y2,
                                 title=f'Line Plot {eixo_x2} x {eixo_y2}')
                st.plotly_chart(fig, use_container_width=True)

            if st.checkbox('Heatmap'):
                metodo = st.selectbox('Selecione o Método de correlação',
                                      ['pearson', 'kendall', 'spearman'],
                                      key='metodo')
                corr = frame.corr(method=metodo)
                ax = sns.heatmap(corr, annot=True, fmt=".2f", cmap="YlGnBu")
                st.pyplot()

            if st.checkbox('Heatmap Colunas'):
                st.info(
                    """Você pode selecionar quais colunas você quer usar para fazer seu heatmap.
                """)
                metodo1 = st.selectbox('Selecione o Método de correlação',
                                       ['pearson', 'kendall', 'spearman'],
                                       key='metodo1')

                cols = st.multiselect('Selecione colunas que serão usadas',
                                      col_num,
                                      key='cols')

                corr = frame[cols].corr(method=metodo1)
                ax = sns.heatmap(corr, annot=True, fmt=".2f", cmap="YlGnBu")
                st.pyplot()

    elif choice == 'Referências':
        st.image(codenation, width=800, format='PNG')
        st.header('Referências')
        st.write("""
        Esse projeto foi proposto pelo Professor [Túlio Souza](https://www.linkedin.com/in/tuliovieira/) da [Codenation](https://www.codenation.dev/), na semana 3 do Acelera Dev Data Science turma do meio do ano de 2020.
        Para saber mais sobre a codenation e seus "Aceleramentos" de carreira e ver o trabalho incrível que eles fazem, podem entra no [Site Codenation](https://www.codenation.dev/), [facebook](https://pt-br.facebook.com/dev.codenation) ou pelo
        [linkedin](https://br.linkedin.com/company/code-nation).
        """)
        st.subheader('Gráficos')
        st.write("""
        Alguns gráficos desse projeto foram feitos usando a biblioteca plotly e sua extensão para pandas.
        https://plotly.com/python/bar-charts/ \n
        https://plotly.com/python/pandas-backend/ \n
        
        Heatmap foi feito usando seaborn.\n
        https://seaborn.pydata.org/generated/seaborn.heatmap.html\n
        
        Os datasets no menu lateral foram arquivados no site kopasite.net, uma ótima forma de guardar arquivos curtos e com download simples.\n
        http://kopasite.net/upload
        
        Para informações adicionais do streamlit acesse: \n
        https://docs.streamlit.io/ \n
        https://www.streamlit.io/ \n
        
        Tips e iris data set foram retirados.\n
        https://github.com/mwaskom/seaborn-data\n
        Titanic data set.\n
        https://www.kaggle.com/c/titanic\n
        
        Deploy do projeto feito no heroku:\n
        https://www.youtube.com/watch?v=mQ7rGcE766k
        
        
        Obrigado por ver meu primeiro app e obrigado codenation pela oportunidade de aprender, qualquer dúvida manda um email.\n
        [email protected] ou [email protected]
        """)

Beispiel #21

Datei anzeigen

        visible_table = cache_get(session_id, CACHE_KEYS['visible_table'])
        filtered_table = filter_all(
            data,
            num_keys,
            num_values,
            cat_keys,
            cat_values,
            visible_table,
            visible_list
        )

        if c_violin == 'None':
            violin_fig = px.violin(filtered_table,
                                   x=x_key,
                                   y=y_key,
                                   box=True,
                                   violinmode='group',
                                   labels={x_key: x_label,
                                           y_key: y_label})
        else:
            violin_fig = px.violin(filtered_table,
                                   x=x_key,
                                   y=y_key,
                                   color=c_violin,
                                   box=True,
                                   violinmode='group',
                                   labels={x_key: x_label,
                                           y_key: y_label})
    else:
        violin_fig = {
            'data': [{'type': 'histogram',

Beispiel #22

Datei anzeigen

                             value=[],  #list(self.volumes["SOURCE"])[0]
                             placeholder="Select months",
                             style={'backgroundColor': '#1E1E1E'},
                             className='monthselector'),
                     ],
                     style={'color': '#1E1E1E'})
             ]),
         html.Div(className='eight columns div-for-charts',
                  children=[
                      dcc.Graph(id='up_down',
                                config={'displayModeBar': False},
                                animate=True,
                                figure=px.violin(
                                    template='plotly_dark').update_layout({
                                        'plot_bgcolor':
                                        'rgba(0, 0, 0, 0)',
                                        'paper_bgcolor':
                                        'rgba(0, 0, 0, 0)'
                                    }))
                  ])
     ]),
 html.Br(),
 html.Br(),
 html.Br(),
 html.Br(),
 html.Div(
     className='row',
     children=[
         html.Div(
             className='four columns div-user-controls',
             children=[

Beispiel #23

Datei anzeigen

def main():
    # reading in the data
    column_names = [
        "sepal_length",
        "sepal_width",
        "petal_length",
        "petal_width",
        "class",
    ]
    iris_df = pd.read_csv(
        "C:/Users/KRATI PATIDAR/Desktop/BDA696_MLE/iris.data", names=column_names
    )
    print(iris_df.head())

    # summary statistics
    iris_arr = iris_df.to_numpy()
    print(iris_arr)

    print("Mean = ", np.mean(iris_df))
    print("Minimum = ", np.min(iris_df))
    print("Maximum = ", np.max(iris_df))

    print("First quantile = ", np.quantile(iris_arr[:, :-1], q=0.25, axis=0))
    print("Second quantile = ", np.quantile(iris_arr[:, :-1], q=0.50, axis=0))
    print("Third quantile = ", np.quantile(iris_arr[:, :-1], q=0.75, axis=0))
    print("Fourth quantile = ", np.quantile(iris_arr[:, :-1], q=1, axis=0))

    print(iris_df["class"].unique())

    # making plots

    plot_1 = px.scatter(
        iris_df,
        x="sepal_width",
        y="sepal_length",
        size="petal_length",
        hover_data=["petal_width"],
        color="class",
        title="Scatter Plot for all variables for different classes",
    )

    plot_1.show()

    plot_2 = px.line(
        iris_df,
        x="petal_width",
        y="petal_length",
        color="class",
        title="Line Plot for Petal Width and Petal Length for all classes",
    )
    plot_2.show()

    plot_3 = px.violin(
        iris_df,
        x="sepal_width",
        y="sepal_length",
        color="class",
        title="Violin Plot for sepal length and sepal width for all classes",
    )
    plot_3.show()

    plot_4 = px.scatter_3d(
        iris_df,
        x="sepal_length",
        y="sepal_width",
        z="petal_length",
        color="class",
        title="3-D Scatter Plot for sepal length, sepal width and petal length",
    )
    plot_4.show()

    plot_5 = px.line_3d(
        iris_df,
        x="petal_width",
        y="petal_length",
        z="sepal_width",
        hover_data=["sepal_length"],
        color="class",
        title="3-D Line Plot for all variables of all classes ",
    )

    plot_5.show()

    # normalization, random forest and decision tree classifiers

    x = iris_arr[:, 0:-1]
    y = iris_df["class"].values

    # pipeline_1 for random forest classifier

    pipeline_1 = Pipeline(
        [
            ("normalize", Normalizer()),
            ("randomforest", RandomForestClassifier(random_state=1234)),
        ]
    )

    print(pipeline_1.fit(x, y))

    # pipeline_2 for decision tree classifier

    pipeline_2 = Pipeline(
        [
            ("normalize", Normalizer()),
            ("decisiontree", DecisionTreeClassifier()),
        ]
    )

    print(pipeline_2.fit(x, y))

    if __name__ == "__main__":
        sys.exit(main())

Beispiel #24

Datei anzeigen

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

# assume you have a "long-form" data frame
# see https://plotly.com/python/px-arguments/ for more options
df = pd.read_csv(
    'https://raw.githubusercontent.com/daniel-dc-cd/data_science/master/module_3_Python/data/titanic.csv'
)
df["Survived"] = df["Survived"].replace([0], "Didn't Survived")
df["Survived"] = df["Survived"].replace([1], "Did Survived")

fig1 = px.scatter(df, x="Age", y="Fare", color="Pclass", log_x=False)

fig2 = px.violin(df,
                 y="Age",
                 x="Survived",
                 color="Sex",
                 box=True,
                 points="all",
                 hover_data=df.columns)

fig3 = px.histogram(df, x="Survived", color="Pclass")

fig4 = px.histogram(df, x="Survived", color="Sex")

app.layout = html.Div(children=[
    # All elements from the top of the page
    html.Div([
        html.H1(children='Yasir Alhejaili Project'),
        html.Div(children='''
            A scatter plot to see the relation between Age , Fare and each class
        '''),

Beispiel #25

Datei anzeigen

Datei: health_data_comparer.py Projekt: GermanCM/Covid19_data_analyzer

    def return_tests_and_deaths_violin_figure(self, multiselection):
        try:
            import pandas as pd
            import math
            import plotly.express as px
            from page_numbers_normalized_by_population import normalized_numbers_by_population_evolution as population_num

            multiselection_tests_data = [
                'United States' if x == 'US' else x for x in multiselection
            ]
            selected_countries_tests_data = self.get_tests_evolution_data(
                multiselection_tests_data)

            not_nan_mask = pd.isna(
                selected_countries_tests_data['Cumulative total per thousand']
            ) == False
            selected_countries_tests_data = selected_countries_tests_data[
                not_nan_mask]

            selected_countries_tests_data[
                'Cumulative total per thousand'] = selected_countries_tests_data[
                    'Cumulative total per thousand'].apply(
                        lambda x: int(x) if math.isnan(x) == False else x)

            desired_cols = ['Date', 'Country', 'Cumulative total per thousand']
            tests_sub_data = selected_countries_tests_data[desired_cols]
            violin_tests_data = pd.DataFrame()

            for country in tests_sub_data.Country.unique():
                country_tests_violin_df = pd.DataFrame(columns=[
                    'Date', 'Country', 'Tests_per_thousand', 'Violin_color'
                ])
                country_sub_data = tests_sub_data[tests_sub_data.Country ==
                                                  country]
                for date in country_sub_data.Date:
                    country_date_tests_violin_df = pd.DataFrame(columns=[
                        'Date', 'Country', 'Tests_per_thousand', 'Violin_color'
                    ])
                    country_sub_data_in_date = country_sub_data[
                        country_sub_data.Date == date]
                    cum_per_thousand_in_date = int(
                        country_sub_data_in_date.iloc[-1]
                        ['Cumulative total per thousand'])
                    tests_dates = [
                        date for _ in range(cum_per_thousand_in_date)
                    ]
                    country_date_tests_violin_df['Date'] = tests_dates
                    country_date_tests_violin_df['Country'] = country
                    country_date_tests_violin_df[
                        'Tests_per_thousand'] = cum_per_thousand_in_date

                    country_tests_violin_df = country_tests_violin_df.append(
                        country_date_tests_violin_df)

                country_tests_violin_df[
                    'Violin_color'] = country_tests_violin_df[
                        'Tests_per_thousand'].max()
                violin_tests_data = violin_tests_data.append(
                    country_tests_violin_df)

            import numpy as np
            import plotly.express as px

            fig = px.violin(
                violin_tests_data,
                y="Date",
                x="Country",  #color=DEATH RATE 
                box=True,
                hover_data=violin_tests_data.columns,  #['Tests_per_thousand'],
                title='Tests carried out per country',
                color='Violin_color')

            fig.update_layout(margin={
                "r": 10,
                "t": 60,
                "l": 10,
                "b": 10
            },
                              height=600,
                              width=710,
                              showlegend=False,
                              paper_bgcolor="#EBF2EC")
            return fig

        except Exception as exc:
            logger.exception('raised exception at {}: {}'.format(
                logger.name + '.' + 'return_tests_and_deaths_figure', exc))

Beispiel #26

Datei anzeigen

Datei: plots.py Projekt: williamaurreav23/No-code-ML-platform-DashB.ai

    def build_graph(graph_type, x_axis, y_axis, color):
        dff = df

        if graph_type == 'LINE':
            fig = px.line(dff, x=x_axis, y=y_axis, color=color, height=600)
            fig.update_layout(yaxis={'title': y_axis},
                              title={
                                  'text': x_axis + ' vs ' + y_axis,
                                  'font': {
                                      'size': 28
                                  },
                                  'x': 0.5,
                                  'xanchor': 'center'
                              })

        elif graph_type == 'SCATTER':
            fig = px.scatter(dff, x=x_axis, y=y_axis, color=color, height=600)
            fig.update_layout(yaxis={'title': y_axis},
                              title={
                                  'text': x_axis + ' vs ' + y_axis,
                                  'font': {
                                      'size': 28
                                  },
                                  'x': 0.5,
                                  'xanchor': 'center'
                              })

        elif graph_type == 'BAR':
            fig = px.bar(dff, x=x_axis, y=y_axis, color=color, height=600)
            fig.update_xaxes(type='category')
            fig.update_layout(yaxis={'title': y_axis},
                              title={
                                  'text': x_axis + ' vs ' + y_axis,
                                  'font': {
                                      'size': 28
                                  },
                                  'x': 0.5,
                                  'xanchor': 'center'
                              })

        elif graph_type == 'AREA':
            fig = px.area(dff, x=x_axis, y=y_axis, color=color, height=600)
            fig.update_layout(yaxis={'title': y_axis},
                              title={
                                  'text': x_axis + ' vs ' + y_axis,
                                  'font': {
                                      'size': 28
                                  },
                                  'x': 0.5,
                                  'xanchor': 'center'
                              })

        elif graph_type == 'HEET':
            fig = px.density_heatmap(dff,
                                     x=x_axis,
                                     y=y_axis,
                                     nbinsx=20,
                                     nbinsy=20,
                                     marginal_x="histogram",
                                     marginal="histogram")

        elif graph_type == 'BOX':
            fig = px.box(dff, x=x_axis, y=y_axis, color=color, height=600)
            fig.update_layout(yaxis={'title': y_axis},
                              title={
                                  'text': x_axis + ' vs ' + y_axis,
                                  'font': {
                                      'size': 28
                                  },
                                  'x': 0.5,
                                  'xanchor': 'center'
                              })

        elif graph_type == 'PIE':
            fig = px.pie(dff, values=x_axis, names=y_axis, height=600)
            fig.update_layout(yaxis={'title': y_axis},
                              title={
                                  'text': x_axis + ' vs ' + y_axis,
                                  'font': {
                                      'size': 28
                                  },
                                  'x': 0.5,
                                  'xanchor': 'center'
                              })

        elif graph_type == 'HIST':
            fig = px.histogram(dff,
                               x=x_axis,
                               y=y_axis,
                               marginal="box",
                               color=color,
                               height=600)  # can be box or violin
            fig.update_layout(yaxis={'title': y_axis},
                              title={
                                  'text': x_axis + ' vs ' + y_axis,
                                  'font': {
                                      'size': 28
                                  },
                                  'x': 0.5,
                                  'xanchor': 'center'
                              })

        elif graph_type == 'SMX':
            fig = px.scatter_matrix(dff, color=color)
            fig.update_layout(yaxis={'title': y_axis},
                              title={
                                  'text': x_axis + ' vs ' + y_axis,
                                  'font': {
                                      'size': 28
                                  },
                                  'x': 0.5,
                                  'xanchor': 'center'
                              })

        elif graph_type == 'VIOLIN':
            fig = px.violin(dff,
                            x=x_axis,
                            y=y_axis,
                            box=True,
                            color=color,
                            height=600)  # can be box or violin
            fig.update_layout(yaxis={'title': y_axis},
                              title={
                                  'text': x_axis + ' vs ' + y_axis,
                                  'font': {
                                      'size': 28
                                  },
                                  'x': 0.5,
                                  'xanchor': 'center'
                              })

        return fig

Beispiel #27

Datei anzeigen

def main():
    side_img = Image.open("images/emotion3.jpg")
    with st.sidebar:
        st.image(side_img, width=300)
    st.sidebar.subheader("Menu")
    website_menu = st.sidebar.selectbox(
        "Menu", ("Emotion Recognition", "Project description", "Our team",
                 "Leave feedback", "Relax"))
    st.set_option('deprecation.showfileUploaderEncoding', False)

    if website_menu == "Emotion Recognition":
        st.sidebar.subheader("Model")
        model_type = st.sidebar.selectbox("How would you like to predict?",
                                          ("mfccs", "mel-specs"))
        em3 = em6 = em7 = gender = False
        st.sidebar.subheader("Settings")

        st.markdown("## Upload the file")
        with st.container():
            col1, col2 = st.columns(2)
            # audio_file = None
            # path = None
            with col1:
                audio_file = st.file_uploader("Upload audio file",
                                              type=['wav', 'mp3', 'ogg'])
                if audio_file is not None:
                    if not os.path.exists("audio"):
                        os.makedirs("audio")
                    path = os.path.join("audio", audio_file.name)
                    if_save_audio = save_audio(audio_file)
                    if if_save_audio == 1:
                        st.warning("File size is too large. Try another file.")
                    elif if_save_audio == 0:
                        # extract features
                        # display audio
                        st.audio(audio_file, format='audio/wav', start_time=0)
                        try:
                            wav, sr = librosa.load(path, sr=44100)
                            Xdb = get_melspec(path)[1]
                            mfccs = librosa.feature.mfcc(wav, sr=sr)
                            # # display audio
                            # st.audio(audio_file, format='audio/wav', start_time=0)
                        except Exception as e:
                            audio_file = None
                            st.error(
                                f"Error {e} - wrong format of the file. Try another .wav file."
                            )
                    else:
                        st.error("Unknown error")
                else:
                    if st.button("Try test file"):
                        wav, sr = librosa.load("test.wav", sr=44100)
                        Xdb = get_melspec("test.wav")[1]
                        mfccs = librosa.feature.mfcc(wav, sr=sr)
                        # display audio
                        st.audio("test.wav", format='audio/wav', start_time=0)
                        path = "test.wav"
                        audio_file = "test"
            with col2:
                if audio_file is not None:
                    fig = plt.figure(figsize=(10, 2))
                    fig.set_facecolor('#d1d1e0')
                    plt.title("Wave-form")
                    librosa.display.waveplot(wav, sr=44100)
                    plt.gca().axes.get_yaxis().set_visible(False)
                    plt.gca().axes.get_xaxis().set_visible(False)
                    plt.gca().axes.spines["right"].set_visible(False)
                    plt.gca().axes.spines["left"].set_visible(False)
                    plt.gca().axes.spines["top"].set_visible(False)
                    plt.gca().axes.spines["bottom"].set_visible(False)
                    plt.gca().axes.set_facecolor('#d1d1e0')
                    st.write(fig)
                else:
                    pass
            #     st.write("Record audio file")
            #     if st.button('Record'):
            #         with st.spinner(f'Recording for 5 seconds ....'):
            #             st.write("Recording...")
            #             time.sleep(3)
            #         st.success("Recording completed")
            #         st.write("Error while loading the file")

        if model_type == "mfccs":
            em3 = st.sidebar.checkbox("3 emotions", True)
            em6 = st.sidebar.checkbox("6 emotions", True)
            em7 = st.sidebar.checkbox("7 emotions")
            gender = st.sidebar.checkbox("gender")

        elif model_type == "mel-specs":
            st.sidebar.warning("This model is temporarily disabled")

        else:
            st.sidebar.warning("This model is temporarily disabled")

        # with st.sidebar.expander("Change colors"):
        #     st.sidebar.write("Use this options after you got the plots")
        #     col1, col2, col3, col4, col5, col6, col7 = st.columns(7)
        #
        #     with col1:
        #         a = st.color_picker("Angry", value="#FF0000")
        #     with col2:
        #         f = st.color_picker("Fear", value="#800080")
        #     with col3:
        #         d = st.color_picker("Disgust", value="#A52A2A")
        #     with col4:
        #         sd = st.color_picker("Sad", value="#ADD8E6")
        #     with col5:
        #         n = st.color_picker("Neutral", value="#808080")
        #     with col6:
        #         sp = st.color_picker("Surprise", value="#FFA500")
        #     with col7:
        #         h = st.color_picker("Happy", value="#008000")
        #     if st.button("Update colors"):
        #         global COLOR_DICT
        #         COLOR_DICT = {"neutral": n,
        #                       "positive": h,
        #                       "happy": h,
        #                       "surprise": sp,
        #                       "fear": f,
        #                       "negative": a,
        #                       "angry": a,
        #                       "sad": sd,
        #                       "disgust": d}
        #         st.success(COLOR_DICT)

        if audio_file is not None:
            st.markdown("## Analyzing...")
            if not audio_file == "test":
                st.sidebar.subheader("Audio file")
                file_details = {
                    "Filename": audio_file.name,
                    "FileSize": audio_file.size
                }
                st.sidebar.write(file_details)

            with st.container():
                col1, col2 = st.columns(2)
                with col1:
                    fig = plt.figure(figsize=(10, 2))
                    fig.set_facecolor('#d1d1e0')
                    plt.title("MFCCs")
                    librosa.display.specshow(mfccs, sr=sr, x_axis='time')
                    plt.gca().axes.get_yaxis().set_visible(False)
                    plt.gca().axes.spines["right"].set_visible(False)
                    plt.gca().axes.spines["left"].set_visible(False)
                    plt.gca().axes.spines["top"].set_visible(False)
                    st.write(fig)
                with col2:
                    fig2 = plt.figure(figsize=(10, 2))
                    fig2.set_facecolor('#d1d1e0')
                    plt.title("Mel-log-spectrogram")
                    librosa.display.specshow(Xdb,
                                             sr=sr,
                                             x_axis='time',
                                             y_axis='hz')
                    plt.gca().axes.get_yaxis().set_visible(False)
                    plt.gca().axes.spines["right"].set_visible(False)
                    plt.gca().axes.spines["left"].set_visible(False)
                    plt.gca().axes.spines["top"].set_visible(False)
                    st.write(fig2)

            if model_type == "mfccs":
                st.markdown("## Predictions")
                with st.container():
                    col1, col2, col3, col4 = st.columns(4)
                    mfccs = get_mfccs(path, model.input_shape[-1])
                    mfccs = mfccs.reshape(1, *mfccs.shape)
                    pred = model.predict(mfccs)[0]

                    with col1:
                        if em3:
                            pos = pred[3] + pred[5] * .5
                            neu = pred[2] + pred[5] * .5 + pred[4] * .5
                            neg = pred[0] + pred[1] + pred[4] * .5
                            data3 = np.array([pos, neu, neg])
                            txt = "MFCCs\n" + get_title(data3, CAT3)
                            fig = plt.figure(figsize=(5, 5))
                            COLORS = color_dict(COLOR_DICT)
                            plot_colored_polar(fig,
                                               predictions=data3,
                                               categories=CAT3,
                                               title=txt,
                                               colors=COLORS)
                            # plot_polar(fig, predictions=data3, categories=CAT3,
                            # title=txt, colors=COLORS)
                            st.write(fig)
                    with col2:
                        if em6:
                            txt = "MFCCs\n" + get_title(pred, CAT6)
                            fig2 = plt.figure(figsize=(5, 5))
                            COLORS = color_dict(COLOR_DICT)
                            plot_colored_polar(fig2,
                                               predictions=pred,
                                               categories=CAT6,
                                               title=txt,
                                               colors=COLORS)
                            # plot_polar(fig2, predictions=pred, categories=CAT6,
                            #            title=txt, colors=COLORS)
                            st.write(fig2)
                    with col3:
                        if em7:
                            model_ = load_model("model4.h5")
                            mfccs_ = get_mfccs(path, model_.input_shape[-2])
                            mfccs_ = mfccs_.T.reshape(1, *mfccs_.T.shape)
                            pred_ = model_.predict(mfccs_)[0]
                            txt = "MFCCs\n" + get_title(pred_, CAT7)
                            fig3 = plt.figure(figsize=(5, 5))
                            COLORS = color_dict(COLOR_DICT)
                            plot_colored_polar(fig3,
                                               predictions=pred_,
                                               categories=CAT7,
                                               title=txt,
                                               colors=COLORS)
                            # plot_polar(fig3, predictions=pred_, categories=CAT7,
                            #            title=txt, colors=COLORS)
                            st.write(fig3)
                    with col4:
                        if gender:
                            with st.spinner('Wait for it...'):
                                gmodel = load_model("model_mw.h5")
                                gmfccs = get_mfccs(path,
                                                   gmodel.input_shape[-1])
                                gmfccs = gmfccs.reshape(1, *gmfccs.shape)
                                gpred = gmodel.predict(gmfccs)[0]
                                gdict = [["female", "woman.png"],
                                         ["male", "man.png"]]
                                ind = gpred.argmax()
                                txt = "Predicted gender: " + gdict[ind][0]
                                img = Image.open("images/" + gdict[ind][1])

                                fig4 = plt.figure(figsize=(3, 3))
                                fig4.set_facecolor('#d1d1e0')
                                plt.title(txt)
                                plt.imshow(img)
                                plt.axis("off")
                                st.write(fig4)

            # if model_type == "mel-specs":
            # st.markdown("## Predictions")
            # st.warning("The model in test mode. It may not be working properly.")
            # if st.checkbox("I'm OK with it"):
            #     try:
            #         with st.spinner("Wait... It can take some time"):
            #             global tmodel
            #             tmodel = load_model_cache("tmodel_all.h5")
            #             fig, tpred = plot_melspec(path, tmodel)
            #         col1, col2, col3 = st.columns(3)
            #         with col1:
            #             st.markdown("### Emotional spectrum")
            #             dimg = Image.open("images/spectrum.png")
            #             st.image(dimg, use_column_width=True)
            #         with col2:
            #             fig_, tpred_ = plot_melspec(path=path,
            #                                         tmodel=tmodel,
            #                                         three=True)
            #             st.write(fig_, use_column_width=True)
            #         with col3:
            #             st.write(fig, use_column_width=True)
            #     except Exception as e:
            #         st.error(f"Error {e}, model is not loaded")

    elif website_menu == "Project description":
        import pandas as pd
        import plotly.express as px
        st.title("Project description")
        st.subheader("GitHub")
        link = '[GitHub repository of the web-application]' \
               '(https://github.com/CyberMaryVer/speech-emotion-webapp)'
        st.markdown(link, unsafe_allow_html=True)

        st.subheader("Theory")
        link = '[Theory behind - Medium article]' \
               '(https://talbaram3192.medium.com/classifying-emotions-using-audio-recordings-and-python-434e748a95eb)'
        st.markdown(link + ":clap::clap::clap: Tal!", unsafe_allow_html=True)
        with st.expander("See Wikipedia definition"):
            components.iframe(
                "https://en.wikipedia.org/wiki/Emotion_recognition",
                height=320,
                scrolling=True)

        st.subheader("Dataset")
        txt = """
            This web-application is a part of the final **Data Mining** project for **ITC Fellow Program 2020**. 

            Datasets used in this project
            * Crowd-sourced Emotional Mutimodal Actors Dataset (**Crema-D**)
            * Ryerson Audio-Visual Database of Emotional Speech and Song (**Ravdess**)
            * Surrey Audio-Visual Expressed Emotion (**Savee**)
            * Toronto emotional speech set (**Tess**)    
            """
        st.markdown(txt, unsafe_allow_html=True)

        df = pd.read_csv("df_audio.csv")
        fig = px.violin(df,
                        y="source",
                        x="emotion4",
                        color="actors",
                        box=True,
                        points="all",
                        hover_data=df.columns)
        st.plotly_chart(fig, use_container_width=True)

        st.subheader("FYI")
        st.write(
            "Since we are currently using a free tier instance of AWS, "
            "we disabled mel-spec and ensemble models.\n\n"
            "If you want to try them we recommend to clone our GitHub repo")
        st.code(
            "git clone https://github.com/CyberMaryVer/speech-emotion-webapp.git",
            language='bash')

        st.write(
            "After that, just uncomment the relevant sections in the app.py file "
            "to use these models:")

    elif website_menu == "Our team":
        st.subheader("Our team")
        st.balloons()
        col1, col2 = st.columns([3, 2])
        with col1:
            st.info("*****@*****.**")
            st.info("*****@*****.**")
            st.info("*****@*****.**")
        with col2:
            liimg = Image.open("images/LI-Logo.png")
            st.image(liimg)
            st.markdown(
                f""":speech_balloon: [Maria Startseva](https://www.linkedin.com/in/maria-startseva)""",
                unsafe_allow_html=True)
            st.markdown(
                f""":speech_balloon: [Tal Baram](https://www.linkedin.com/in/tal-baram-b00b66180)""",
                unsafe_allow_html=True)
            st.markdown(
                f""":speech_balloon: [Asher Holder](https://www.linkedin.com/in/asher-holder-526a05173)""",
                unsafe_allow_html=True)

    elif website_menu == "Leave feedback":
        st.subheader("Leave feedback")
        user_input = st.text_area("Your feedback is greatly appreciated")
        user_name = st.selectbox(
            "Choose your personality",
            ["checker1", "checker2", "checker3", "checker4"])

        if st.button("Submit"):
            st.success(f"Message\n\"\"\"{user_input}\"\"\"\nwas sent")

            if user_input == "log123456" and user_name == "checker4":
                with open("log0.txt", "r", encoding="utf8") as f:
                    st.text(f.read())
            elif user_input == "feedback123456" and user_name == "checker4":
                with open("log.txt", "r", encoding="utf8") as f:
                    st.text(f.read())
            else:
                log_file(user_name + " " + user_input)
                thankimg = Image.open("images/sticky.png")
                st.image(thankimg)

    else:
        import requests
        import json

        url = 'http://api.quotable.io/random'
        if st.button("get random mood"):
            with st.container():
                col1, col2 = st.columns(2)
                n = np.random.randint(1, 1000, 1)[0]
                with col1:
                    quotes = {
                        "Good job and almost done":
                        "checker1",
                        "Great start!!":
                        "checker2",
                        "Please make corrections base on the following observation":
                        "checker3",
                        "DO NOT train with test data":
                        "folk wisdom",
                        "good work, but no docstrings":
                        "checker4",
                        "Well done!":
                        "checker3",
                        "For the sake of reproducibility, I recommend setting the random seed":
                        "checker1"
                    }
                    if n % 5 == 0:
                        a = np.random.choice(list(quotes.keys()), 1)[0]
                        quote, author = a, quotes[a]
                    else:
                        try:
                            r = requests.get(url=url)
                            text = json.loads(r.text)
                            quote, author = text['content'], text['author']
                        except Exception as e:
                            a = np.random.choice(list(quotes.keys()), 1)[0]
                            quote, author = a, quotes[a]
                    st.markdown(f"## *{quote}*")
                    st.markdown(f"### ***{author}***")
                with col2:
                    st.image(image=f"https://picsum.photos/800/600?random={n}")

Beispiel #28

Datei anzeigen

Datei: BDA696_Assignment1.py Projekt: vbihare/BDA696_MachineLearningEngineering

def main():
    columns = ["Sepal_Length", "Sepal_Width", "Petal_Length", "Petal_Width", "Species"]
    iris_data = pd.read_csv("Iris.data", names=columns)
    print(iris_data.head(10))
    print(iris_data.info())

    # getting the number of missing values
    missing = iris_data.isnull().sum()
    print(missing)

    # calculating the summary statistics
    print("Mean statistics:", np.mean(iris_data))
    print("Maximum value:", np.max(iris_data))
    print("Minimum value:", np.min(iris_data))

    # Calculating quantiles
    iris_np = np.array(iris_data)
    print("25 percent- ", np.quantile(iris_np[:, :-1], 0.25, axis=0))
    print("50 percent-", np.quantile(iris_np[:, :-1], 0.50, axis=0))
    print("75 percent-", np.quantile(iris_np[:, :-1], 0.75, axis=0))

    # Visualizing the data
    plot1 = px.violin(
        iris_data,
        y="Sepal_Length",
        x="Species",
        color="Species",
        hover_data=iris_data.columns,
        title="Violin plot to visualize Sepal Length ",
    )
    plot1.show()

    plot2 = px.scatter_3d(
        iris_data,
        x="Sepal_Width",
        y="Sepal_Length",
        z="Petal_Width",
        color="Species",
        hover_data=iris_data.columns,
        title="3d Scatter plot between Sepal Width, " "Sepal length and Petal width",
    )
    plot2.show()

    plot3 = px.histogram(
        iris_data,
        x="Sepal_Length",
        y="Sepal_Width",
        color="Species",
        title="Distribution of Sepal length and Sepal width with respect to species",
    )
    plot3.show()

    iris_data.plot(kind="scatter", x="Sepal_Length", y="Petal_Length")
    plt.title("Scatter plot between Sepal Length and petal Length")
    plt.show()

    sns.set_style("whitegrid")
    sns.FacetGrid(iris_data, hue="Species", height=6).map(
        plt.scatter, "Sepal_Length", "Petal_Length"
    ).add_legend()
    plt.title(
        "Scatter plot between Sepal Length and Petal Length, with different species"
    )
    plt.show()

    # Splitting the dataset into train and test
    X_train, X_test, y_train, y_test = train_test_split(
        iris_data.iloc[:, :-1].values,
        iris_data["Species"],
        test_size=0.2,
        random_state=1234,
    )
    # Making a Pipeline for Normalizing the data and fitting RandomForestClassifier
    pipeline = Pipeline(
        [("Normalize", Normalizer()), ("rf", RandomForestClassifier(random_state=1234))]
    )
    pipeline.fit(X_train, y_train)
    predict = pipeline.predict(X_test)

    # Generating the confusion matrix and accuracy score for the RandomForestClassifier
    cm = confusion_matrix(y_test, predict)
    print("Confusion Matrix- RandomForestClassifier", cm)
    accuracy = accuracy_score(y_test, predict)
    print("Accuracy of RandomForestClassifier", accuracy)

    # Making a pipeline for Normalizing the data and fitting LDAClassifier
    pipeline = Pipeline([("Normalize", Normalizer()), ("lda", LDA(n_components=1))])
    pipeline.fit(X_train, y_train)
    predict = pipeline.predict(X_test)
    # Generating the confusion matrix and accuracy score for the LDAClassifier
    cm = confusion_matrix(y_test, predict)
    print("Confusion Matrix- LDAClassifier", cm)
    accuracy = accuracy_score(y_test, predict)
    print("Accuracy of LDAClassifier", accuracy)

Beispiel #29

Datei anzeigen

Datei: Data Visualization.py Projekt: ricardoval4/Data-Analysis

!pip install sweetviz

import sweetviz as sv

my_report = sv.analyze(dataset[['profit', 'sales', 'Cost', 'quantity']].sample(frac=0.2))

my_report.show_notebook()

country = dataset.loc[dataset.country.isin(['Germany', 'United Kingdom'])]
sns.countplot(y='country', data=country, order=['Germany', 'United Kingdom'], palette=['Red', 'yellow'])
plt.show()
# country['country'].value_counts().index

"""# Plotly Express"""

import plotly.express as px

fig = px.violin(dataset, y='sales', x='ship_mode')
fig.show()

country = dataset.groupby(['country']).sum()['quantity'].reset_index()
px.bar(country, x='country', y='quantity').show()

"""# Folium"""

import folium

dataset.head()

m = folium.Map(location=[49.006890, 8.403653])
m

Beispiel #30

Datei anzeigen

Datei: views.py Projekt: johnenderton/capstone

def plot_graphs(request):
    print("plot_graphs function")
    global ppd
    fig = None
    fig_error = False

    blank_choice = (None, '---------')
    features_name = [(i, i) for i in ppd.getFeatureName()]
    category_features_name = [(i, i) for i in ppd.get_category_list()]
    numeric_features_name = [(i, i) for i in ppd.get_numeric_features_name()]
    features_name.append(blank_choice)
    category_features_name.append(blank_choice)
    numeric_features_name.append(blank_choice)

    if request.method == 'POST':
        if 'scatter_btn' in request.POST:
            print("scatter form")
            print(request.POST)
            scatter = Scatter_form(request.POST)
            scatter.fields['x'].choices = features_name
            scatter.fields['y'].choices = features_name
            scatter.fields['facet_row'].choices = category_features_name
            scatter.fields['facet_col'].choices = category_features_name
            scatter.fields['color'].choices = category_features_name
            scatter.fields['size'].choices = numeric_features_name
            print(scatter.errors)

            if scatter.is_valid():
                print("form valid")
                x = scatter.cleaned_data['x']
                y = scatter.cleaned_data['y']
                facet_row = scatter.cleaned_data['facet_row']
                facet_col = scatter.cleaned_data['facet_col']
                facet_col_wrap = scatter.cleaned_data['facet_col_wrap']
                color = scatter.cleaned_data['color']
                size = scatter.cleaned_data['size']

                data_feature_list = list()
                data_feature_list.append(x)
                data_feature_list.append(y)
                if len(size) > 0:
                    data_feature_list.append(size)
                if len(color) > 0:
                    data_feature_list.append(color)
                if len(facet_row) > 0:
                    data_feature_list.append(facet_row)
                if len(facet_col) > 0:
                    data_feature_list.append(facet_col)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))
                try:
                    fig = px.scatter(
                        data_frame=data,
                        x=x,
                        y=y,
                        facet_row=None if len(facet_row) == 0 else facet_row,
                        facet_col=None if len(facet_col) == 0 else facet_col,
                        facet_col_wrap=facet_col_wrap,
                        title=scatter.cleaned_data['title'],
                        color=None if len(color) == 0 else color,
                        size=None if len(size) == 0 else size,
                        log_x=scatter.cleaned_data['log_x'],
                        log_y=scatter.cleaned_data['log_y'],
                        render_mode=scatter.cleaned_data['render_mode'],
                        height=800)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'scatter_3d_btn' in request.POST:
            scatter_3d = Scatter_3d_form(request.POST)
            scatter_3d.fields['x'].choices = features_name
            scatter_3d.fields['y'].choices = features_name
            scatter_3d.fields['z'].choices = features_name
            scatter_3d.fields['color'].choices = category_features_name
            scatter_3d.fields['size'].choices = numeric_features_name

            if scatter_3d.is_valid():
                x = scatter_3d.cleaned_data['x']
                y = scatter_3d.cleaned_data['y']
                z = scatter_3d.cleaned_data['z']
                color = scatter_3d.cleaned_data['color']
                size = scatter_3d.cleaned_data['size']

                data_feature_list = list()
                data_feature_list.append(x)
                data_feature_list.append(y)
                data_feature_list.append(z)
                if len(size) > 0:
                    data_feature_list.append(size)
                if len(color) > 0:
                    data_feature_list.append(color)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.scatter_3d(
                        data_frame=data,
                        x=x,
                        y=y,
                        z=z,
                        title=scatter_3d.cleaned_data['title'],
                        color=None if len(color) == 0 else color,
                        size=None if len(size) == 0 else size,
                        log_x=scatter_3d.cleaned_data['log_x'],
                        log_y=scatter_3d.cleaned_data['log_y'],
                        log_z=scatter_3d.cleaned_data['log_z'],
                        height=800)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'line_btn' in request.POST:
            print("Line Plot")
            line = Line_form(request.POST)
            line.fields['x'].choices = features_name
            line.fields['y'].choices = features_name
            line.fields['facet_row'].choices = category_features_name
            line.fields['facet_col'].choices = category_features_name
            line.fields['color'].choices = category_features_name

            if line.is_valid():
                print("line is valid")
                x = line.cleaned_data['x']
                y = line.cleaned_data['y']
                facet_row = line.cleaned_data['facet_row']
                facet_col = line.cleaned_data['facet_col']
                facet_col_wrap = line.cleaned_data['facet_col_wrap']
                color = line.cleaned_data['color']

                data_feature_list = list()
                data_feature_list.append(x)
                data_feature_list.append(y)

                if len(color) > 0:
                    data_feature_list.append(color)
                if len(facet_row) > 0:
                    data_feature_list.append(facet_row)
                if len(facet_col) > 0:
                    data_feature_list.append(facet_col)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.line(
                        data_frame=data,
                        x=x,
                        y=y,
                        facet_row=None if len(facet_row) == 0 else facet_row,
                        facet_col=None if len(facet_col) == 0 else facet_col,
                        facet_col_wrap=facet_col_wrap,
                        title=line.cleaned_data['title'],
                        color=None if len(color) == 0 else color,
                        height=800)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'bar_btn' in request.POST:
            print("Bar Plot")
            bar = Bar_form(request.POST)
            bar.fields['x'].choices = features_name
            bar.fields['y'].choices = features_name
            bar.fields['facet_row'].choices = category_features_name
            bar.fields['facet_col'].choices = category_features_name
            bar.fields['color'].choices = category_features_name

            if bar.is_valid():
                print("Bar is valid")
                x = bar.cleaned_data['x']
                y = bar.cleaned_data['y']
                facet_row = bar.cleaned_data['facet_row']
                facet_col = bar.cleaned_data['facet_col']
                facet_col_wrap = bar.cleaned_data['facet_col_wrap']
                color = bar.cleaned_data['color']
                title = bar.cleaned_data['title']
                orientation = bar.cleaned_data['orientation']
                bar_mode = bar.cleaned_data['bar_mode']

                data_feature_list = list()
                data_feature_list.append(x)
                data_feature_list.append(y)

                if len(color) > 0:
                    data_feature_list.append(color)
                if len(facet_row) > 0:
                    data_feature_list.append(facet_row)
                if len(facet_col) > 0:
                    data_feature_list.append(facet_col)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.bar(
                        data_frame=data,
                        x=x,
                        y=y,
                        facet_row=None if len(facet_row) == 0 else facet_row,
                        facet_col=None if len(facet_col) == 0 else facet_col,
                        facet_col_wrap=facet_col_wrap,
                        title=title,
                        color=None if len(color) == 0 else color,
                        orientation=orientation,
                        barmode=bar_mode,
                        height=800)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'pie_btn' in request.POST:
            print("Pie Plot")
            pie = Pie_form(request.POST)
            pie.fields['values'].choices = features_name
            pie.fields['names'].choices = category_features_name
            pie.fields['color'].choices = category_features_name

            if pie.is_valid():
                print("Pie is valid")
                values = pie.cleaned_data['values']
                names = pie.cleaned_data['names']
                color = pie.cleaned_data['color']
                title = pie.cleaned_data['title']

                data_feature_list = list()
                data_feature_list.append(values)
                data_feature_list.append(names)

                if len(color) > 0:
                    data_feature_list.append(color)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.pie(data_frame=data,
                                 values=values,
                                 names=names,
                                 color=None if len(color) == 0 else color,
                                 title=title,
                                 height=800)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'histogram_btn' in request.POST:
            print("Plot Histogram")
            histogram = Histogram_form(request.POST)
            histogram.fields['x'].choices = features_name
            histogram.fields['y'].choices = features_name
            histogram.fields['facet_row'].choices = category_features_name
            histogram.fields['facet_col'].choices = category_features_name
            histogram.fields['color'].choices = category_features_name
            print(histogram.errors)

            if histogram.is_valid():
                print("Histogram is valid")
                x = histogram.cleaned_data['x']
                y = histogram.cleaned_data['y']
                facet_row = histogram.cleaned_data['facet_row']
                facet_col = histogram.cleaned_data['facet_col']
                facet_col_wrap = histogram.cleaned_data['facet_col_wrap']
                color = histogram.cleaned_data['color']
                title = histogram.cleaned_data['title']
                orientation = histogram.cleaned_data['orientation']
                bar_mode = histogram.cleaned_data['bar_mode']
                marginal = histogram.cleaned_data['marginal']
                bar_norm = histogram.cleaned_data['bar_norm']
                hist_norm = histogram.cleaned_data['hist_norm']
                hist_func = histogram.cleaned_data['hist_func']
                log_x = histogram.cleaned_data['log_x']
                log_y = histogram.cleaned_data['log_y']
                cumulative = histogram.cleaned_data['cumulative']

                data_feature_list = list()
                data_feature_list.append(x)
                data_feature_list.append(y)

                if len(color) > 0:
                    data_feature_list.append(color)
                if len(facet_row) > 0:
                    data_feature_list.append(facet_row)
                if len(facet_col) > 0:
                    data_feature_list.append(facet_col)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.histogram(
                        data_frame=data,
                        x=x,
                        y=y,
                        facet_row=None if len(facet_row) == 0 else facet_row,
                        facet_col=None if len(facet_col) == 0 else facet_col,
                        facet_col_wrap=facet_col_wrap,
                        title=title,
                        color=None if len(color) == 0 else color,
                        orientation=orientation,
                        barmode=bar_mode,
                        marginal=marginal,
                        barnorm=bar_norm,
                        histnorm=hist_norm,
                        histfunc=hist_func,
                        log_x=log_x,
                        log_y=log_y,
                        cumulative=cumulative,
                        height=800)
                    print("Fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'scatter_matrix_btn' in request.POST:
            print("Scatter Matrix Plot")
            scatter_matrix = Scatter_matrix_form(request.POST)
            scatter_matrix.fields['feature_1'].choices = numeric_features_name
            scatter_matrix.fields['feature_2'].choices = numeric_features_name
            scatter_matrix.fields['feature_3'].choices = numeric_features_name
            scatter_matrix.fields['feature_4'].choices = numeric_features_name
            scatter_matrix.fields['color'].choices = category_features_name
            scatter_matrix.fields['size'].choices = numeric_features_name
            scatter_matrix.fields['symbol'].choices = category_features_name

            if scatter_matrix.is_valid():
                print("Scatter Matrix is valid")
                feature_1 = scatter_matrix.cleaned_data['feature_1']
                feature_2 = scatter_matrix.cleaned_data['feature_2']
                feature_3 = scatter_matrix.cleaned_data['feature_3']
                feature_4 = scatter_matrix.cleaned_data['feature_4']
                color = scatter_matrix.cleaned_data['color']
                symbol = scatter_matrix.cleaned_data['symbol']
                size = scatter_matrix.cleaned_data['size']
                title = scatter_matrix.cleaned_data['title']

                data_feature_list = list()
                data_feature_list.append(feature_1)
                data_feature_list.append(feature_2)
                data_feature_list.append(feature_3)
                data_feature_list.append(feature_4)
                if len(size) > 0:
                    data_feature_list.append(size)
                if len(color) > 0:
                    data_feature_list.append(color)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.scatter_matrix(
                        data_frame=data,
                        dimensions=[
                            feature_1, feature_2, feature_3, feature_4
                        ],
                        color=None if len(color) == 0 else color,
                        symbol=None if len(symbol) == 0 else symbol,
                        size=None if len(size) == 0 else size,
                        title=title)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'box_btn' in request.POST:
            box = Box_form(request.POST)
            box.fields['x'].choices = features_name
            box.fields['y'].choices = features_name
            box.fields['facet_row'].choices = category_features_name
            box.fields['facet_col'].choices = category_features_name
            box.fields['color'].choices = category_features_name
            print(box.errors)

            if box.is_valid():
                x = box.cleaned_data['x']
                y = box.cleaned_data['y']
                facet_row = box.cleaned_data['facet_row']
                facet_col = box.cleaned_data['facet_col']
                color = box.cleaned_data['color']
                facet_col_wrap = box.cleaned_data['facet_col_wrap']
                title = box.cleaned_data['title']
                orientation = box.cleaned_data['orientation']
                log_x = box.cleaned_data['log_x']
                log_y = box.cleaned_data['log_y']
                box_mode = box.cleaned_data['box_mode']
                points = box.cleaned_data['points']
                notched = box.cleaned_data['notched']

                data_feature_list = list()
                if len(x) > 0:
                    data_feature_list.append(x)
                if len(y) > 0:
                    data_feature_list.append(y)
                if len(color) > 0:
                    data_feature_list.append(color)
                if len(facet_row) > 0:
                    data_feature_list.append(facet_row)
                if len(facet_col) > 0:
                    data_feature_list.append(facet_col)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.box(
                        data_frame=data,
                        x=None if len(x) == 0 else x,
                        y=None if len(y) == 0 else y,
                        facet_row=None if len(facet_row) == 0 else facet_row,
                        facet_col=None if len(facet_col) == 0 else facet_col,
                        facet_col_wrap=facet_col_wrap,
                        title=title,
                        orientation=orientation,
                        log_x=log_x,
                        log_y=log_y,
                        boxmode=box_mode,
                        points=points,
                        notched=notched)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'violin_btn' in request.POST:
            violin = Violin_form(request.POST)
            violin.fields['x'].choices = features_name
            violin.fields['y'].choices = features_name
            violin.fields['facet_row'].choices = category_features_name
            violin.fields['facet_col'].choices = category_features_name
            violin.fields['color'].choices = category_features_name
            print(violin.errors)

            if violin.is_valid():
                x = violin.cleaned_data['x']
                y = violin.cleaned_data['y']
                facet_row = violin.cleaned_data['facet_row']
                facet_col = violin.cleaned_data['facet_col']
                color = violin.cleaned_data['color']
                facet_col_wrap = violin.cleaned_data['facet_col_wrap']
                title = violin.cleaned_data['title']
                orientation = violin.cleaned_data['orientation']
                log_x = violin.cleaned_data['log_x']
                log_y = violin.cleaned_data['log_y']
                violin_mode = violin.cleaned_data['violin_mode']
                points = violin.cleaned_data['points']
                box = violin.cleaned_data['box']

                data_feature_list = list()
                if len(x) > 0:
                    data_feature_list.append(x)
                if len(y) > 0:
                    data_feature_list.append(y)
                if len(color) > 0:
                    data_feature_list.append(color)
                if len(facet_row) > 0:
                    data_feature_list.append(facet_row)
                if len(facet_col) > 0:
                    data_feature_list.append(facet_col)

                data = pd.DataFrame(ppd.get_features_data(data_feature_list))

                try:
                    fig = px.violin(
                        data_frame=data,
                        x=None if len(x) == 0 else x,
                        y=None if len(y) == 0 else y,
                        facet_row=None if len(facet_row) == 0 else facet_row,
                        facet_col=None if len(facet_col) == 0 else facet_col,
                        facet_col_wrap=facet_col_wrap,
                        title=title,
                        orientation=orientation,
                        log_x=log_x,
                        log_y=log_y,
                        violinmode=violin_mode,
                        points=points,
                        box=box)
                    print("fig create success")
                except:
                    print("fig create error")
                    fig_error = True
                    fig = None

        if 'heat_map_btn' in request.POST:
            print("Heat Map")
            heat_map_data = pd.DataFrame(ppd.get_corr_matrix())
            print(heat_map_data.columns)
            try:
                fig = px.imshow(heat_map_data.astype(float),
                                x=heat_map_data.columns,
                                y=heat_map_data.index,
                                zmax=1,
                                zmin=-1,
                                height=800)
            except:
                print("fig create error")
                fig_error = True
                fig = None

    scatter = Scatter_form()
    scatter.fields['x'].choices = features_name
    scatter.fields['y'].choices = features_name
    scatter.fields['facet_row'].choices = category_features_name
    scatter.fields['facet_col'].choices = category_features_name
    scatter.fields['color'].choices = category_features_name
    scatter.fields['size'].choices = numeric_features_name

    scatter_3d = Scatter_3d_form()
    scatter_3d.fields['x'].choices = features_name
    scatter_3d.fields['y'].choices = features_name
    scatter_3d.fields['z'].choices = features_name
    scatter_3d.fields['color'].choices = category_features_name
    scatter_3d.fields['size'].choices = numeric_features_name

    line = Line_form()
    line.fields['x'].choices = features_name
    line.fields['y'].choices = features_name
    line.fields['facet_row'].choices = category_features_name
    line.fields['facet_col'].choices = category_features_name
    line.fields['color'].choices = category_features_name

    bar = Bar_form()
    bar.fields['x'].choices = features_name
    bar.fields['y'].choices = features_name
    bar.fields['facet_row'].choices = category_features_name
    bar.fields['facet_col'].choices = category_features_name
    bar.fields['color'].choices = category_features_name

    pie = Pie_form()
    pie.fields['values'].choices = features_name
    pie.fields['names'].choices = category_features_name
    pie.fields['color'].choices = category_features_name

    histogram = Histogram_form()
    histogram.fields['x'].choices = features_name
    histogram.fields['y'].choices = features_name
    histogram.fields['facet_row'].choices = category_features_name
    histogram.fields['facet_col'].choices = category_features_name
    histogram.fields['color'].choices = category_features_name

    scatter_matrix = Scatter_matrix_form()
    scatter_matrix.fields['feature_1'].choices = numeric_features_name
    scatter_matrix.fields['feature_2'].choices = numeric_features_name
    scatter_matrix.fields['feature_3'].choices = numeric_features_name
    scatter_matrix.fields['feature_4'].choices = numeric_features_name
    scatter_matrix.fields['color'].choices = category_features_name
    scatter_matrix.fields['size'].choices = numeric_features_name
    scatter_matrix.fields['symbol'].choices = category_features_name

    box = Box_form()
    box.fields['x'].choices = features_name
    box.fields['y'].choices = features_name
    box.fields['facet_row'].choices = category_features_name
    box.fields['facet_col'].choices = category_features_name
    box.fields['color'].choices = category_features_name

    violin = Violin_form()
    violin.fields['x'].choices = features_name
    violin.fields['y'].choices = features_name
    violin.fields['facet_row'].choices = category_features_name
    violin.fields['facet_col'].choices = category_features_name
    violin.fields['color'].choices = category_features_name

    context = {
        'fig': None,
        'scatter': scatter,
        'line': line,
        'scatter_3d': scatter_3d,
        'bar': bar,
        'pie': pie,
        'histogram': histogram,
        'scatter_matrix': scatter_matrix,
        'box': box,
        'violin': violin
    }
    if fig is not None:
        context['fig'] = pio.to_html(fig=fig,
                                     full_html=False,
                                     include_plotlyjs=False)
    elif fig_error is True:
        context[
            'fig'] = "Plot Graph Error When Setting Parameters. Please Try Again!"
    else:
        context['fig'] = None
    return render(request,
                  'data_cleaning_app/plot_graphs.html',
                  context=context)