Ejemplo n.º 1
0
def clustering():
    st.markdown("## **Master's Project**")
    st.markdown('---')
    st.markdown("# **Unsupervised Learning | Clustering**")

    st.markdown("### Choose a Dataset")
    files = tools.get_files()
    option = st.selectbox('Select a Teacher Answer', files['name'])

    index = files['name'].index(option)
    st.write('You selected:', index)

    doc = files['doc'][index]
    data = files['data'][index]

    st.markdown("## Modeling Example")
    st.markdown('**Select the below based on what you want to see:**')
    doc_flag = st.checkbox('Display Question Info')
    data_flag = st.checkbox('Display Prediction Data')
    chart_flag = st.checkbox('Display Reduced-Dimensionality Chart')
    model_flag = st.checkbox('Display Model Info and Performance')

    nlp = Clustering_NLP(data, doc)
    nlp.correct_cluster_labels()

    if doc_flag:
        st.markdown('## Question Info')
        st.write(doc)
    if data_flag:
        st.write(data.columns)
        st.markdown('## Prediction Data')
        st.write(data)
        st.write(pd.Series(data.columns, name='Features'))
    if chart_flag:
        col1, col2 = st.beta_columns(2)
        fig1, ax1 = charts.plot_pca_chart(data, doc['label'],
                                          nlp.model.cluster_centers_)
        fig2, ax2 = charts.plot_tsne_chart(data, doc['label'],
                                           nlp.model.cluster_centers_)
        with col1:
            st.pyplot(fig=fig1)

        with col2:
            st.pyplot(fig=fig2)

    if model_flag:
        st.markdown('## Model Data')
        st.markdown(f'### **Accuracy of Model: {round(nlp.accuracy(),3)}**  ')
        st.pyplot(fig=charts.plot_confusion_matrix(nlp.doc['label'],
                                                   nlp.doc.cluster))

        results = doc[['student_answer', 'label', 'cluster']]
        try_it = st.checkbox('Try it Yourself!')
        explore_flag = st.checkbox('Explore Data')
        if try_it:
            tryit(nlp)

        if explore_flag:
            st.markdown(f"""**Dataset Length: {len(results)}** """)
            start, end = st.slider(label='Data View Select',
                                   min_value=0,
                                   max_value=len(nlp.doc) - 1,
                                   value=(0, 5))
            st.markdown(
                f"""**Teacher Answer: {nlp.doc['teacher_answer'].values[0]}**"""
            )
            for i in range(int(start), int(end) + 1):
                st.markdown(
                    f"""{i}. {'Label:':>10} {str(nlp.doc.loc[i,'label'])}  Pred: {str(nlp.doc.loc[i,'cluster'])}    {str(nlp.doc.loc[i,'student_answer'])}"""
                )
Ejemplo n.º 2
0
def clustering():
    st.markdown("## **Master's Project**")
    st.markdown('---')
    st.markdown("# **Unsupervised Learning | Clustering**")
    
    st.markdown("### Choose a Dataset")
    option = st.selectbox(
        'Select a Teacher Answer',
        files['name']
    )
    
    index = files['name'].index(option)
    st.write('You selected:', index)

    doc= files['doc'][index] 
    data= files['data'][index]
    
    st.markdown("## Modeling Example")
    st.markdown('**Select the below based on what you want to see:**')
    chart_flag = st.checkbox('Display Reduced-Dimensionality Chart')
    model_flag = st.checkbox('Display Model Info and Performance')    
        
    nlp = Clustering_NLP(data, doc)
    nlp.correct_cluster_labels()
    
    if chart_flag:
        col1, col2 = st.beta_columns(2)
        fig1, ax1 = charts.plot_pca_chart(data, doc['label'], nlp.model.cluster_centers_)
        fig2, ax2 = charts.plot_tsne_chart(data, doc['label'], nlp.model.cluster_centers_)
        with col1:
            st.pyplot(fig = fig1)
            
        with col2:
            st.pyplot(fig = fig2)

    if model_flag:
        st.markdown('## Model Data Metrics') 
        scores = {}
        scores['Accuracy'] = round(nlp.accuracy(),3)
        scores['Balanced Accuract'] = round(nlp.balanced_accuracy(), 3)
        scores['F1 Score'] = round(nlp.f1_scorer(), 3)
        scores['Cohens Kappa'] = round(nlp.kappa(), 3)
        scores = pd.DataFrame.from_dict(scores, orient = 'index').transpose()
        scores.rename(index = {0:"Scores"}, inplace = True)
        st.table(scores)
        
        
        st.pyplot(fig = charts.plot_confusion_matrix(nlp.doc['label'], nlp.doc.cluster))
        
        
        results = doc[['student_answer', 'label', 'cluster']]
        try_it = st.checkbox('Try it Yourself!')
        explore_flag = st.checkbox('Explore Data')
        if try_it:
            tryit(nlp, 'cluster')
                
            
        if explore_flag:
            st.markdown(f"""**Dataset Length: {len(results)}** """)
            start, end = st.slider(
                label = 'Data View Select',
                min_value = 0,
                max_value = len(nlp.doc)-1,
                value = (0,5)
            )
            st.markdown(f"""**Teacher Answer: {nlp.doc['teacher_answer'].values[0]}**""")
            for i in range(int(start),int(end)+1):
                st.markdown(f"""{i}. {'Label:':>10} {str(nlp.doc.loc[i,'label'])}  Pred: {str(nlp.doc.loc[i,'cluster'])}    {str(nlp.doc.loc[i,'student_answer'])}""")
Ejemplo n.º 3
0
def classification():
    st.markdown("## **Master's Project**")
    st.markdown('---')
    st.markdown("# **Supervised Learning | Classification**")

    st.markdown("### Choose a Dataset")
    files = tools.get_files()
    option = st.selectbox('Select a Teacher Answer', files['name'])

    index = files['name'].index(option)
    st.write('You selected:', index)

    doc = files['doc'][index]
    data = files['data'][index]

    st.markdown("## Modeling Example")
    st.markdown('**Select the below based on what you want to see:**')
    doc_flag = st.checkbox('Display Question Info')
    data_flag = st.checkbox('Display Prediction Data')
    model_flag = st.checkbox('Display Model Info and Performance')

    test_size = st.number_input('Test Size',
                                min_value=0.01,
                                max_value=0.91,
                                value=.75,
                                step=0.05)
    nlp = Classification_NLP(data, doc, test_size)
    st.markdown(f'Training Set Size: {len(nlp.X_train)}')
    st.markdown(f'Test Set Size: {len(nlp.X_test)}')
    if doc_flag:
        st.markdown('## Question Info')
        st.write(doc)
    if data_flag:
        st.markdown('## Prediction Data')
        st.write(data)
        st.write(pd.Series(data.columns, name='Features'))
    if model_flag:
        st.markdown('## Model Data')

        _, accuracy = nlp.accuracy()
        st.markdown(
            f'### **Test Set Accuracy of Model: {round(accuracy, 3)}**  ')
        st.pyplot(fig=charts.plot_confusion_matrix(nlp.y_test, nlp.pred))

        results = doc[['student_answer', 'label', 'prediction']]
        try_it = st.checkbox('Try it Yourself!')
        explore_flag = st.checkbox('Explore Data')

        if try_it:
            tryit(nlp)
        if explore_flag:
            st.markdown(f"""**Dataset Length: {len(results)}** """)
            start, end = st.slider(label='Data View Select',
                                   min_value=0,
                                   max_value=len(nlp.doc) - 1,
                                   value=(0, 5))
            st.markdown(
                f"""**Teacher Answer: {nlp.doc['teacher_answer'].values[0]}**"""
            )
            for i in range(int(start), int(end) + 1):
                if nlp.doc.loc[i, 'label'] != nlp.doc.loc[i, 'prediction']:
                    st.markdown(
                        f"""{i}. {'Label:':>10} {str(nlp.doc.loc[i,'label'])}  Pred: {str(nlp.doc.loc[i,'prediction'])}    {str(nlp.doc.loc[i,'student_answer'])}"""
                    )
                else:
                    st.markdown(
                        f"""{i}. {'Label:':>10} {str(nlp.doc.loc[i,'label'])}  Pred: {str(nlp.doc.loc[i,'prediction'])}    {str(nlp.doc.loc[i,'student_answer'])}"""
                    )
Ejemplo n.º 4
0
def classification():
    st.markdown("## **Master's Project**")
    st.markdown('---')
    st.markdown("# **Supervised Learning | Classification**")
    
    st.markdown("### Choose a Dataset")
#     files = tools.get_files()
    option = st.selectbox(
        'Select a Teacher Answer',
        files['name']
    )
    
    index = files['name'].index(option)
    st.write('You selected:', index)

    doc= files['doc'][index] 
    data= files['data'][index]

   
    st.markdown("## Modeling Example")
    st.markdown('**Select the below based on what you want to see:**')
    chart_flag = st.checkbox('Display Logistic Function Chart')
    model_flag = st.checkbox('Display Model Info and Performance')
    
    
    test_size = st.number_input(
            'Test Size',
            min_value = 0.01,
            max_value = 0.91,
            value = .75,
            step = 0.05
            )
    nlp = Classification_NLP(data, doc, test_size)
    st.markdown(f'Training Set Size: {len(nlp.X_train)}')
    st.markdown(f'Test Set Size: {len(nlp.X_test)}')
    
    if chart_flag:
        st.pyplot(fig = charts.plot_logistic_function(nlp, test_size))
    if model_flag:
        st.markdown('## Model Data Metrics')
        scores = {}
        _, accuracy =  nlp.accuracy()
        scores['Accuracy'] = round(accuracy,3)
        scores['Balanced Accuract'] = round(nlp.balanced_accuracy(), 3)
        scores['F1 Score'] = round(nlp.f1_scorer(), 3)
        scores['Cohens Kappa'] = round(nlp.kappa(), 3)
        scores = pd.DataFrame.from_dict(scores, orient = 'index').transpose()
        scores.rename(index = {0:"Scores"}, inplace = True)
        st.table(scores)
        
        st.pyplot(fig = charts.plot_confusion_matrix(nlp.doc.label, nlp.doc.prediction))
        
        
        results = doc[['student_answer', 'label', 'prediction']]
        try_it = st.checkbox('Try it Yourself!')
        explore_flag = st.checkbox('Explore Data')
        
        if try_it:
            tryit(nlp, 'prediction')
        if explore_flag:
            st.markdown(f"""**Dataset Length: {len(results)}** """)
            start, end = st.slider(
                label = 'Data View Select',
                min_value = 0,
                max_value = len(nlp.doc)-1,
                value = (0,5)
            )
            st.markdown(f"""**Teacher Answer: {nlp.doc['teacher_answer'].values[0]}**""")
            for i in range(int(start),int(end)+1):
                if nlp.doc.loc[i,'label'] != nlp.doc.loc[i,'prediction']:
                    st.markdown(f"""{i}. {'Label:':>10} {str(nlp.doc.loc[i,'label'])}  Pred: {str(nlp.doc.loc[i,'prediction'])}    {str(nlp.doc.loc[i,'student_answer'])}""")
                else:
                    st.markdown(f"""{i}. {'Label:':>10} {str(nlp.doc.loc[i,'label'])}  Pred: {str(nlp.doc.loc[i,'prediction'])}    {str(nlp.doc.loc[i,'student_answer'])}""")