예제 #1
0
import pandas as pd
import scripts
from preparing_neural import apply_preparing_merge
from cleaning import apply_cleaning
from train_neural_liam_test import split_train_test_data, fit_model, save_model
from preparing_neural import apply_lemmatize
from train_neural_liam_test import get_preprocessor

data = pd.read_csv('../raw_data/working.csv')

data_clean = apply_cleaning(data)

data_merge = apply_preparing_merge(data_clean)

data_lemmatize = apply_lemmatize(data_merge)

x_train, x_test, y_train, y_test = split_train_test_data(data_lemmatize)

x_train_preprocessed, x_test_preprocessed = get_preprocessor(x_train, x_test)

neural_model = fit_model(x_train_preprocessed, y_train)

save_model(neural_model)

print(neural_model.evaluate(x_test_preprocessed, y_test))
예제 #2
0
if input_method == 'Text':
    title = st.text_input('Article title')
    text = st.text_area('Article body')
elif input_method == 'Link':
    url = st.text_input('Article URL')


analyze_status_logistic = st.button('Analyze_with_Logistic')
analyze_status_neural = st.button('Analyze_with_Neural')


###### Logistic
if input_method == 'Text' and analyze_status_logistic == True:

    input_df = convert(title,text)
    input_df = apply_cleaning(input_df)

    input_df = input_df[['title_clean', 'text_clean','title_length_char','title_Upper_Ratio','text_stop_words_ratio']]
    prediction = logistic_model.predict(input_df)
    if prediction == 1:
        st.write('I think its true')
    else:
        st.write('I think its fake')

if input_method == 'Link' and analyze_status_logistic == True:
    input_df = get_title_text_web(url)
    input_df = apply_cleaning(input_df)


    input_df = input_df[['title_clean', 'text_clean','title_length_char','title_Upper_Ratio','text_stop_words_ratio']]
예제 #3
0
import pandas as pd
from preparing_df import apply_preparing
from cleaning import apply_cleaning
from typo import apply_typo_ratio
from train import split_train_test_data, fit_model, save_model

true = pd.read_csv('True.csv')
fake = pd.read_csv('Fake.csv')

data = apply_preparing(true, fake)

apply_cleaning(data)

apply_typo_ratio(data)

x_train, x_test, y_train, y_test = split_train_test_data(data)

pipe = fit_model(x_train, y_train)

save_model(pipe, 'model_test')

print(pipe.score(x_test, y_test))
index0 = st.file_uploader("Choose a file")
if index0 is not None:
    st.sidebar.header('Dataset Parameter')
    x1 = pd.ExcelFile(index0)
    index1 = st.sidebar.selectbox('What Dataset you choose?', x1.sheet_names)

    # Load data example (dari functional maupun nonfunctional)
    st.header('Dataset parameters')
    statement = fulldataset(index0, index1)

    # Get text to clean (dari row yang diinginkan)
    text_to_clean = list(statement['Requirement Statement'])

    # Clean text
    print("Loading Original & Cleaned Text...")
    cleaned_text = apply_cleaning(text_to_clean)

    # Show first example
    text_df = pd.DataFrame([text_to_clean, cleaned_text],
                           index=['ORIGINAL', 'CLEANED'],
                           columns=statement['ID']).T
    st.write(text_df)

    st.header('Traceability parameters')
    id_requirement = fulldataset(index0, index1)['ID']

    genre = st.sidebar.radio(
        "What do you choose?",
        ('Information_Retrieval', 'Ontology', 'IR+LSA', 'IR+LDA'))
    if genre == 'Information_Retrieval':
        st.subheader("bag of words")