Python Data_Preprocessing Exemples, Data_Preprocessing Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : Blood_Pressure_Predictor.py Projet : HuangChiEn/DeepRNN-with-keras

def load_data():
    file_path = filedialog.askopenfilename(initialdir="./",
                                           title="Select file",
                                           filetypes=(("matlab files",
                                                       "*.mat"), ("all files",
                                                                  "*.*")))
    traSetLst, tstSetLst = DPre.preprocess(
        DPre.load_data(file_path, datVar.get()), seq_len,
        sample_num)  ## dbNam, datNam
    global traDatStmp, traLabStmp
    global tstDatStmp, tstLabStmp
    traDatStmp, traLabStmp = DPre.pack_time_stamp(
        traSetLst, fea_num, len(traSetLst))  ## package train data..
    tstDatStmp, tstLabStmp = DPre.pack_time_stamp(
        tstSetLst, fea_num, len(tstSetLst))  ## package test data..
    predBut.config(state="normal")
    global model
    input = Input(shape=(
        seq_len, fea_num
    ))  # sample num : 2000, time_stamp : 32(seq_len), feature num : 2
    model = Bp.build_model(input)
    try:
        model.load_weights('./model_save/model_weight/' + datVar.get() + '.h5')
        msg.set('load success!')
        label.config(text=msg.get())
    except:
        msg.set('fail to load')
        label.config(text=msg.get())

Exemple #2

0

Afficher le fichier

def select_random_records(reevaluate=False):
    if not reevaluate:
        list_of_random_number = random.sample(range(total_number_of_records), number_of_records_to_sample)
    else:
        list_of_random_number = Utility.read_words_file_into_list(path_to_saved_manually_tagged_file , 0 )
        list_of_random_number = [int(x) for x in list_of_random_number]

    list_of_random_number2 = copy.deepcopy(list_of_random_number)
    list_of_random_number3 = copy.deepcopy(list_of_random_number)
    list_of_random_number4 = copy.deepcopy(list_of_random_number)


    sentences = Data_Preprocessing.Raw_Record_Sentences_Parser(Data_Preprocessing.path_to_Rawdata)
    with open(path_to_sampled_raw_records, "w") as sampled_raw_records:
        for c, s in enumerate(sentences):
            if c in list_of_random_number4:
                logger.info("sampled record number #%.i", c)
                list_of_random_number4.remove(c)
                string_to_print = str(c) + '\t' + " ".join(s)
                print(string_to_print, file=sampled_raw_records)

    sentences = Utility.Utility_Sentence_Parser(Data_Preprocessing.path_to_Save_file)
    with open(path_to_sampled_preprocessed_records, "w") as sample_preprocessed_records:
        for c,s in enumerate(sentences):
            if c in list_of_random_number:
                logger.info("sampled record number #%.i", c )
                list_of_random_number.rnemove(c)
                string_to_print = str(c)nn + '\t' + " ".join(s)
                print(string_to_print , file=sample_preprocessed_records )

    sentences = Utility.Utility_Sentence_Parser(Text_Normalization.save_folder_name + "/Normalized_Text_Stage_2.txt")
    with open(path_to_sampled_normalized_records, "w") as sample_normalized_records:
        for c,s in enumerate(sentences):
            if c in list_of_random_number2:
                logger.info("sampled record number #%.i", c )
                list_of_random_number2.remove(c)
                string_to_print = str(c) + '\t' + " ".join(s)
                print(string_to_print , file=sample_normalized_records )

    sentences = Utility.Utility_Sentence_Parser(Maintenence_Action_Detection.processed_file_name)
    with open(path_to_sampled_final_records, "w") as sampled_final_records:
        for c,s in enumerate(sentences,0):
            if c in list_of_random_number3:
                logger.info("sampled record number #%.i", c)
                list_of_random_number3.remove(c)
                string_to_print = str(c) + '\t' + " ".join(s)
                print(string_to_print , file=sampled_final_records )

    if not reevaluate:
        copyfile(path_to_sampled_normalized_records, path_to_file_to_be_tagged_manually)

Exemple #3

0

Afficher le fichier

Fichier : Classification.py Projet : OSalmona/Predict-Mobile-Game-Success-Rating

def Trainig():

    if (os.path.exists("PreProcessing Data/TrainingDataSet.csv")):
        data = pd.read_csv("PreProcessing Data/TrainingDataSet.csv")
        print("Training DataSet Already Existing")
        Y = data['Rate']  # Label
        X = data.drop(columns=["Rate"], inplace=False)
    else:
        print("Starting Training Data PreProcessing")
        pre_processing = Data_Preprocessing.Pre_Processing()
        X, Y = pre_processing.PreProcessing_Trainig()

    highX = X[Y == 2]
    highY = Y[Y == 2]

    intermediateX = X[Y == 1]
    intermediateY = Y[Y == 1]

    lowX = X[Y == 0]
    lowY = Y[Y == 0]

    HX_train, HX_test, HY_train, HY_test = train_test_split(highX,
                                                            highY,
                                                            test_size=0.2,
                                                            shuffle=True)
    IX_train, IX_test, IY_train, IY_test = train_test_split(intermediateX,
                                                            intermediateY,
                                                            test_size=0.2,
                                                            shuffle=True)
    LX_train, LX_test, LY_train, LY_test = train_test_split(lowX,
                                                            lowY,
                                                            test_size=0.2,
                                                            shuffle=True)

    X_train = np.concatenate((HX_train, IX_train, LX_train))
    y_train = np.concatenate((HY_train, IY_train, LY_train))

    X_test = np.concatenate((HX_test, IX_test, LX_test))
    y_test = np.concatenate((HY_test, IY_test, LY_test))

    print("Start Classification Techinques")
    # X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=True)
    np.save('modelsPCA/traindata.npy', X_train)
    np.save('modelsPCA/trainlabel.npy', y_train)

    caller(X_train, y_train, X_test, y_test, 0)
    PCA_algorithm(X_train, y_train, X_test, y_test)

Exemple #4

0

Afficher le fichier

Fichier : Classifiers.py Projet : vishruth63/2_CO355_CO393_ML_MTEProject

from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.svm import LinearSVC
from sklearn import metrics
import Data_Preprocessing
import warnings
import time

warnings.filterwarnings("ignore")

data_obj = Data_Preprocessing.process_data()
data_obj.clean_data()

'''
Naive Bayes classifier class
'''
class Naive_Bayes_Classifier:

    def __init__(self):
        self.vectorizer_time = {'Count' : 0 , 'TFIDF' : 0 }
        self.classifier_time = {'Count' : 0 , 'TFIDF' : 0 }

    def Count_vectorizer_classifier(self):

        start = time.time()

        print("\n\nRunning Naive Bayes with Count Vectorizer...")
        x_train, x_test = data_obj.generate_count_vectorizer()
        y_train = data_obj.y_train
        y_test = data_obj.y_test

Exemple #5

0

Afficher le fichier

def Lemmatize_words(string_to_normalize):
    i = 0
    for i in range(len(string_to_normalize)):
        word = string_to_normalize[i]
        if word in lemmatized_words_dictionry:
            string_to_normalize[i] = lemmatized_words_dictionry[word]

    return string_to_normalize


if __name__ == "__main__":
    Unknown_words_building()
    lemmatized_words_building()
    Bigram_building()
    #sentences = Data_Preprocessing.Sentences_Parser_2('./Input_Output_Folder/Preprocessed_Record/Cleaned_Data_1.txt')
    sentences = Data_Preprocessing.Sentences_Parser_2(Data_Preprocessing.path_to_Save_file)
    with open(path_to_normalized_stage_1_records, "w") as Normalized_Text_Stage_1:
        i = 1
        for sentence in sentences:
            string_to_print = ' '.join(Normalize_Text_stage_1(sentence))
            print(str(i) + '\t' + string_to_print , file=Normalized_Text_Stage_1)
            i+=1


    sentences = Data_Preprocessing.Sentences_Parser_2(path_to_normalized_stage_1_records)
    with open(path_to_normalized_stage_1_lemmatized_records, "w") as Normalized_Text_Stage_1_lemmatized:
        i = 1
        for sentence in sentences:
            string_to_print = ' '.join(Lemmatize_words(sentence))
            print(str(i) + '\t' + string_to_print , file=Normalized_Text_Stage_1_lemmatized)
            i+=1

Exemple #6

0

Afficher le fichier

Fichier : Pipeline.py Projet : L-Ramos/DCI_Prediction

import pylab

#def Create_statistics(X1,cols):
"""
-----------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------
"""
if __name__ == '__main__':

    pathtrain = 'E:\Prospective_dataset\ClinicalDataWithImage.csv'
    path_image_data = 'E:\\DCI Prediction\\Data\\Image_data'

    frame = pd.read_csv(pathtrain, sep=';')
    image_feats = True
    [X1, Y, cols,
     names] = dp.Fix_Dataset(frame, image_feats)  #false for no images features
    Features = mt.Connect_Image_Features(names, path_image_data)
    cols = pd.Index.tolist(cols)

    #important columns according to feature selection with RFC
    ind1 = cols.index('SAH_vol_ml')
    ind2 = cols.index('DIAGNOSIS_FISHER4_E1_C1_IPH')
    ind3 = cols.index('TIME_ICTUS_CTSCANDT')
    ind4 = cols.index('Age')
    ind5 = cols.index('ADMISSION_GCS_TOTAL_AMC_E1_C1')
    ind6 = cols.index('ANEURYSM_LENGTH_E1_C1_1')
    ind7 = cols.index('DIAGNOSIS_FISHER4_E1_C1_SDH')
    ind8 = cols.index('ANEURYSM_WIDTH_E1_C1_1')
    ind9 = cols.index('TREATMENT_E1_C1')
    ind10 = cols.index('ANEURYSM_LOCATION_E1_C1_1')

Exemple #7

0

Afficher le fichier

import Data_Preprocessing as dp
import Feature_Selection as fs

"""
-----------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------
"""
if __name__ == '__main__':
    #pathtrain ='E:\Prospective_dataset\DataWithFeats.csv'
    path_clin_data =r'E:\Prospective_dataset\ClinicalDataWithImage.csv'
    path_image_data = r'E:\Prospective\Data\Image_data'
    #pathtrain ='E:\Prospective_dataset\test150.csv'
    
    #dummy_ranks = pd.get_dummies(df['prestige'], prefix='prestige')
    frame = pd.read_csv(path_clin_data,sep=';')
    [X1,Y,cols,names]=dp.Fix_Dataset(frame,False) #false for no images features
    
    Features=mt.Connect_Image_Features(names,path_image_data)
    cols=pd.Index.tolist(cols)
    X=X1
    
    #first clinical features
    ind1=cols.index('SAH_vol_ml')
    ind2=cols.index('DIAGNOSIS_FISHER4_E1_C1_IPH')
    ind3=cols.index('TIME_ICTUS_CTSCANDT')
    ind4=cols.index('Age')
    ind5=cols.index('ADMISSION_GCS_TOTAL_AMC_E1_C1')
    ind6=cols.index('ANEURYSM_LENGTH_E1_C1_1')
    ind7=cols.index('DIAGNOSIS_FISHER4_E1_C1_SDH')
    ind8=cols.index('ANEURYSM_WIDTH_E1_C1_1')
    ind9=cols.index('TREATMENT_E1_C1')

Exemple #8

0

Afficher le fichier

                    new_words[key2] = words2[key2]

        else:
            break

    Linked_Words(new_words)


def Legal_Words_Linked_Words():
    with open("../Data_Cleaning/lemmatized_word_4.txt",
              "r") as lemmatized_word_file:
        for line in lemmatized_word_file:
            print(line.split())


if __name__ == "__main__":
    #global_starting_word = 'reapair'
    #words_by_alphebat = dict(model.most_similar(positive=[global_starting_word ], topn=4))
    #Linked_Words(words_by_alphebat)

    #Legal_Words_Linked_Words()
    #Print_Similar_Word()
    sentences = Data_Preprocessing.Sentences_Parser_2(
        '../Data_Cleaning/Cleaned_Data_10.txt')
    d = Data_Preprocessing.Build_Frequency_Dic(sentences)

    for w in sorted(d, key=d.get, reverse=True):
        word = w

    print(model.most_similar(positive=[word], topn=10))

Exemple #9

0

Afficher le fichier

Fichier : Pipeline.py Projet : yangkuner/MrClean_Machine_Learning

    #T=0.01
    #T=0.00

    #method='backward'
    #T=0

    path_variables = path_variables + feats_use + ".csv"
    #path_variables=path_variables+feats_use+".csv"
    path_results = 'E:\\Mrclean\\Results_Sens\\test' + '-' + feats_use + '-' + label_use + '-' + method + '\\'
    path_models = path_results + "\\Models"
    if not os.path.exists(path_results):
        os.makedirs(path_results)
        os.makedirs(path_models)

    [X, Y, cols, center,
     vals_mask] = pp.Fix_Dataset_csv(path_data, label_use, feats_use,
                                     path_variables)
    #data=pd.io.stata.read_stata(("E:\\Mrclean\\Data\\RegistryOpenclinicacheck_core.dta"))
    center, range_centers = pp.Combine_Center5_10(center)

    [X, cols] = pp.Encode_Variables(X, cols, vals_mask)

    cols = np.array(cols)
    np.save(path_results + 'cols.npy', cols)

    #X=pp.Normalize_Min_Max(X)

    num_feats = X.shape[1]
    splits = 100
    cv = 5

    mean_tprr = 0.0

Exemple #10

0

Afficher le fichier

Fichier : Logistic Regression.py Projet : leonardtia/Neural_Networks

def sigmoid(z):
    """
    Compute the sigmoid of z

    Arguments:
    z -- A scalar or numpy array of any size.

    Return:
    s -- sigmoid(z)
    """
    return 1 / (1 + np.exp(-z))


y = sigmoid(pd_y)

X_train, X_test, y_train, y_test = dp.get_TrainingSet_and_Test_set(
    pd_X.T, y.T, 0.3)

X_train = X_train.T
X_test = X_test.T
y_train = y_train.T
y_test = y_test.T


def get_y(Row):
    if Row[0] > 0.5:
        return 1
    else:
        return 0


y = pd.DataFrame(y).apply(get_y, axis=0).reshape(1, m)

Exemple #11

0

Afficher le fichier

Fichier : Text_Normalization_Preprocessing.py Projet : gyy150/Thesis

def stage_1():
    words = Data_Preprocessing.Words_Parser('./Processed_Data/',
                                            'vacab_alphebat.txt')
    s = aspell.Speller('lang', 'en')
    with open("./Processed_Data/Stage1/auto_corrected_words_v_3.txt",
              "w") as auto_corrected_words_2:
        with open("./Processed_Data/Stage1/auto_corrected_words_v_3.txt",
                  "w") as auto_corrected_words:
            with open("./Processed_Data/Stage1/misspelled_words.txt_2_v_3",
                      "w") as misspelled_words:
                with open(
                        "./Processed_Data/Stage1/spell_checked_words_2_v_3.txt",
                        "w") as spell_checked_words:
                    i = 1
                    misspelled_words_count = 0
                    auto_corrected_words_count = 0
                    for word in words:
                        is_word = s.check(word)
                        if not is_word:
                            misspelled_words_count += 1
                            correction_candidates_list = [
                                candidate for candidate in s.suggest(word)
                                if string_in_corpus(candidate) > 0
                            ]
                            correction_candidates = ' '.join(
                                correction_candidates_list)
                            most_similar_word = model.most_similar(
                                positive=[word], topn=1)
                            if len(correction_candidates_list
                                   ) > 0 and most_similar_word[0][
                                       0] == correction_candidates_list[0]:
                                string_to_print = '{0}\t{1:15}\t{2:10}\t{3:50}'.format(
                                    misspelled_words_count, word,
                                    correction_candidates_list[0],
                                    '**********************')
                                auto_corrected_words_count += 1
                                print(string_to_print,
                                      file=auto_corrected_words)
                            elif len(correction_candidates_list) == 1:
                                string_to_print = '{0}\t{1:15}\t{2:10}\t{3:50}'.format(
                                    misspelled_words_count, word,
                                    str(most_similar_word[0][0]),
                                    correction_candidates)
                                auto_corrected_words_count += 1
                                print(string_to_print,
                                      file=auto_corrected_words_2)
                            else:
                                string_to_print = '{0}\t{1:15}\t{2:10}\t{3:40}'.format(
                                    misspelled_words_count, word,
                                    str(most_similar_word[0][0]),
                                    correction_candidates)
                                print(string_to_print, file=misspelled_words)

                        string_to_print = '{0}\t{1:15}\t{2:6}'.format(
                            i, word, is_word)
                        i += 1

                        print(string_to_print, file=spell_checked_words)

                    print('Total incorrect words are' +
                          str(misspelled_words_count),
                          file=spell_checked_words)
                    print('Total incorrect words are' +
                          str(auto_corrected_words_count),
                          file=spell_checked_words)

Exemple #12

0

Afficher le fichier

Fichier : Text_Normalization_Preprocessing.py Projet : gyy150/Thesis

import gensim
import aspell
import Data_Preprocessing

model = gensim.models.Word2Vec.load('./Data/mymodel_19_1000')
word_list = list(model.wv.vocab)
word_list.sort(key=len, reverse=False)
words = sorted(word_list)

Data_Preprocessing.write_vacab_to_txt('./Processed_Data/vacab_alphebat.txt',
                                      words)
Data_Preprocessing.write_vacab_to_txt('./Processed_Data/vacab_length.txt',
                                      word_list)

with open("./Processed_Data/Cleaned_Data_15.txt", "r") as f:
    searchlines = f.read()


def string_in_corpus(string_to_search):
    return searchlines.find(string_to_search)


def stage_1():
    words = Data_Preprocessing.Words_Parser('./Processed_Data/',
                                            'vacab_alphebat.txt')
    s = aspell.Speller('lang', 'en')
    with open("./Processed_Data/Stage1/auto_corrected_words_v_3.txt",
              "w") as auto_corrected_words_2:
        with open("./Processed_Data/Stage1/auto_corrected_words_v_3.txt",
                  "w") as auto_corrected_words:
            with open("./Processed_Data/Stage1/misspelled_words.txt_2_v_3",

Exemple #13

0

Afficher le fichier

Fichier : ml_modeling.py Projet : Avighan/Analytics_Heroku

def run(st,data,mongocls,session_id):
    st.markdown("### Steps -")

    view_sample_data = st.checkbox("View Sample Data - ")
    if view_sample_data:

        if mongocls.get_session_info({'session_id': session_id + '_ml_df'}) is not None:
            st.dataframe(pd.DataFrame(mongocls.get_session_info({'session_id':session_id+'_ml_df'})["data_dict"]).head())
        else:
            st.dataframe(data.head())

    c_load,c_session,c_restrict_columns = st.beta_columns(3)
    if c_load.button("Reload Data!!!"):
        data = get_data(data,mongocls,session_id)


    if c_session.button("Reset Session!!!"):
        mongocls.delete_session({'session_id': session_id + '_ml_df'})

    if c_restrict_columns.checkbox("Restrict Columns: "):
        restrict_columns = st.multiselect("Restrict Columns ", data.columns.tolist())
        data = data[restrict_columns]

    expander = st.beta_expander("Data Preparation:",expanded=False)


    data = get_data(data,mongocls,session_id)
    columns = data.columns.tolist()
    with expander:
            c1,c2,c3 = st.beta_columns(3)
            fs = c1.checkbox("Feature Selection")
            if fs:
                st.text("Currently under development!!!")
                #fs_option = st.selectbox("Selection Option",['SelectKBest','RFE','PCA','LDA'])

            dimpute = c2.checkbox("Data Imputers")
            if dimpute:
                impute_options = st.selectbox("Impute Option",['SimpleImputer'])
                if impute_options:

                    imputer = dp.Imputers(dataframe=data,select_imputer=impute_options)
                    imputer.select_imputers(imputerSelect = impute_options)
                    data = imputer.fit_transform()
                    data = pd.DataFrame(data, columns=columns)
                    mongocls.delete_session({'session_id': session_id + '_ml_df'})
                    mongocls.write_session_info(
                        {'session_id': session_id + '_ml_df', 'data_dict': data.to_dict("records")})
                    st.dataframe(data.head())
    expander_enc = st.beta_expander("Data Encoding", expanded=False)
    with expander_enc:
        encode = st.checkbox("Apply Encoding")
        if encode:
            c1_encode,c2_encode,c3_encode = st.beta_columns(3)
            encoding_option = c1_encode.selectbox("Select Encoder",['LabelEncoder','OneHotEncoder','OrdinalEncoder','Binarizer','LabelBinarizer','MultiLabelBinarizer'])
            Y_col_encode = c2_encode.selectbox("Select Y (target) (Encoding)", data.columns.tolist())
            cat_columns =  c3_encode.multiselect("Select Categorical Columns to Encode",data.columns.tolist())
            encode_btn = st.button("Encode Data!!!")
            if encode_btn:
                if len(cat_columns)==0:
                    encode_cls = dp.Encoders(df=data,y=Y_col_encode)
                else:
                    encode_cls = dp.Encoders(df=data,y=Y_col_encode, cat_columns = cat_columns)
                encode_cls.select_encoder(encode_type=encoding_option)
                data = encode_cls.compile_encoding()
                st.dataframe(data.head())
                mongocls.delete_session({'session_id': session_id + '_ml_df'})
                mongocls.write_session_info(
                    {'session_id': session_id + '_ml_df', 'data_dict': data.to_dict("records")})

    expander_sample = st.beta_expander("Data Sampling", expanded=False)
    with expander_sample:
        c1, c2, c3, c4 = st.beta_columns(4)
        sample_options = c1.selectbox("Sampling Options", ["Over", "Under","RandomOverSampler"])
        sampling_ratio = c2.slider('Sampling Ratio', min_value=0.1, max_value=1.0, step=0.05)
        Y_col = c3.selectbox("Select Y (target) (Sampling)", data.columns.tolist())
        if Y_col != '':
            X_cols = c4.multiselect("Select X Columns (default is all)",
                                    [col for col in data.columns.tolist() if col != Y_col])
            if len(X_cols) <= 0:
                X_cols = [col for col in data.columns.tolist() if col != Y_col]
                X_val = data[X_cols]

            X_val = data[X_cols]
            Y_val = data[Y_col]
        else:
            st.warning("Please select Target column!!!")
        sampler_btn = st.button("Run Sampler")
        if sampler_btn:
            sample_cls = dp.Sampling(df=data[X_cols + [Y_col]], target=Y_col, sampling_option=sample_options)
            X_val, Y_val = sample_cls.run_sampler()
            data = (pd.concat([X_val, Y_val], axis=1))
            st.dataframe(data.head())
            mongocls.delete_session({'session_id': session_id + '_ml_df'})
            mongocls.write_session_info(
                {'session_id': session_id + '_ml_df', 'data_dict': data.to_dict("records")})

    expander_scale = st.beta_expander("Data Scaling", expanded=False)
    with expander_scale:
        scale = st.checkbox("Apply Scaling")
        if scale:
            c1, c2, c3 = st.beta_columns(3)
            scaling_options = c1.selectbox("Sampling Options", ["StandardScaler", "MaxAbsScaler", "MinMaxScaler","RobustScaler",
                                                                "Normalizer","PowerTransformer","QuantileTransformer"])
            Y_col = c2.selectbox("Select Y (target) (Scaling)", data.columns.tolist())
            cat_columns = c3.multiselect("Categorical Columns",[col for col in data.columns.tolist() if col != Y_col])
            scale_btn = expander_scale.button("Scale Data!!!")
            if scale_btn:
                scaling = dp.scaling(df=data,cat_columns=cat_columns,scalar_type=scaling_options,y=Y_col)
                data = scaling.compile_scalar()
                st.dataframe(data.head())
                mongocls.delete_session({'session_id': session_id + '_ml_df'})
                mongocls.write_session_info(
                    {'session_id': session_id + '_ml_df', 'data_dict': data.to_dict("records")})
                pass

    expander_model = st.beta_expander("Model Training", expanded=False)
    with expander_model:
        c1,c2,c3 =  st.beta_columns(3)
        model_type = c1.selectbox("Model Type",['Regression','Classification'])
        y_col = c2.selectbox("Select Target Variable",data.columns.tolist())
        model_exe = ml_models.MLmodels(df=data,y_column=y_col,problem_type=model_type)
        select_models = c3.multiselect("Select ML models",model_exe.get_model_list())
        run_models = st.button("Run Models - ")
        model_storage = {}
        if run_models:
            for model in select_models:
                model_exe.select_model_to_run(model_select=model)
                model_storage[model]= model_exe.compile_modeling()
                train_x, train_y, test_x, test_y = model_exe.get_train_test()
            mongocls.delete_session({'session_id': session_id + '_models_ran'})
            mongocls.write_session_info(
                {'session_id': session_id + '_models_ran','models_trained': pickle.dumps(model_storage),'train_test_data':pickle.dumps(
                    {
                        'train_x':train_x,
                        'train_y':train_y,
                        'test_x':test_x,
                        'test_y':test_y
                    }
                )})
        if len(model_storage.keys())>0:
            st.write("Model Run completed for the below models - ")
            st.code(model_storage)

    expander_metrics = st.beta_expander("Evaluation Metrics", expanded=False)
    with expander_metrics:
        #try:
            models_trained = pickle.loads(mongocls.get_session_info({'session_id': session_id + '_models_ran'})["models_trained"])
            loaded_info = pickle.loads(mongocls.get_session_info({'session_id': session_id + '_models_ran'})["train_test_data"])
            train_x = loaded_info['train_x']
            train_y = loaded_info['train_y']
            test_x = loaded_info['test_x']
            test_y = loaded_info['test_y']
            metric_selected = {}
            for model,trained_model in models_trained.items():
                st.text(model)
                metric_cls = ml_metrics.Metrics(y_test=test_y)
                metric_selected[model] = st.multiselect('Select Metrics to see for the Model ('+model+')',metric_cls.get_metric_list())
            metrics_btn = st.button("Click to see the metrics")
            if metrics_btn:
                for model,metrics in metric_selected.items():
                    for metric in metrics:
                        metric_cls.select_metrics(metric)
                        st.write(metric)
                        st.write(metric_cls.metrics_solve(estimator=models_trained[model], test_x=test_x))

        #except:
        #    st.warning("No Models trained yet!!!")

Exemple #14

0

Afficher le fichier

def main():

    Data_Preprocessing.main_process()
    Naive_Bayes_Classifier.data_load()

Exemple #15

0

Afficher le fichier

Fichier : Main.py Projet : OSalmona/Predict-Mobile-Game-Success-Rating

import Classification
import Classification_Testing
import Data_Preprocessing
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler

print("Appstore games Classification")

input = int(input("Press 1 To Train Models Or 2 To Test Model :: "))
if input == 1:
    Classification.Trainig()
else:
    pre_processing = Data_Preprocessing.Pre_Processing()
    X, Y = pre_processing.PreProcessing_Testing()

    print("Testing Before PCA Algorithm")
    Testing = Classification_Testing.Classification_Testing(X, Y)
    Testing.OneVsOnelinear(0)
    Testing.OneVsOne_LinearSVC(0)
    Testing.OneVsOne_ploy(0)
    Testing.OneVsOne_rbf(0)
    Testing.adaBoost(0)
    Testing.decisionTree(0)
    Testing.KNN(0)

    xtrain = np.load('modelsPCA/traindata.npy', allow_pickle=True)
    ytrain = np.load('modelsPCA/trainlabel.npy', allow_pickle=True)
    Testing.PCA_algorithm(xtrain, ytrain)