from _0_DataCreation.Read_Data import load_dataframe from Scoring.scoring_func import f1_scores_plot from sklearn.linear_model import LogisticRegression import pandas as pd import numpy as np from General.Paths import Gitlab_Path fold1_df = load_dataframe(filename='fold1_NA_features.dat') fold2_df = load_dataframe(filename='fold2_NA_features.dat') fold3_df = load_dataframe(filename='fold3_NA_features.dat') testset_df = load_dataframe(filename='testSet_NA_features.dat') subset_cols = ['pca_1_last', 'pca_2_last','pca_3_last','pca_4_last','pca_5_last', 'pca_6_last','pca_7_last','pca_8_last','pca_9_last','pca_10_last', \ 'R_VALUE_last', 'XR_MAX_last', 'NA_satellite_last', 'NA_SHARPmask_last', 'NA_Rmask_last', 'NA_XR_MAX_last'] fold1_subset = fold1_df[subset_cols] fold2_subset = fold2_df[subset_cols] fold3_subset = fold3_df[subset_cols] testset_subset = testset_df[subset_cols] ## Lav logistisk regression fra R LR = LogisticRegression() LR.fit(fold1_subset, fold1_df['label']) #Score on f2 my_preds = LR.predict_proba(fold2_subset)[:, 1] true_vals = fold2_df['label'] temp = f1_scores_plot(my_preds, true_vals) #Næsten det samme. Lidt under #Fit on fold3 with resize and extract best score my_preds = LR.predict_proba(fold3_subset)[:, 1]
### To load and save pickle objects import pickle def save_obj(obj, name): with open(name + '.pkl', 'wb') as f: pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) def load_obj(name): with open(name + '.pkl', 'rb') as f: return pickle.load(f) ### Load and merge data fold1_df = load_dataframe(filename='fold1_NA_all_last.dat') fold2_df = load_dataframe(filename='fold2_NA_all_last.dat') fold3_df = load_dataframe(filename='fold3_NA_all_last.dat') test_df = load_dataframe(filename='testSet_NA_all_last.dat') del fold1_df['label'], fold2_df['label'], fold3_df['label'] all_sets = pd.concat([fold1_df, fold2_df, fold3_df, test_df]) del fold1_df, fold2_df, fold3_df, test_df ##Extract last last_cols = [x for x in all_sets.columns.values if x[-4:] == 'last'] fold_last = all_sets[last_cols] del all_sets fold_last.columns = [x[:-5] for x in fold_last.columns] #remove '_last' from name
my_model.fit_generator(generator=train_gen, validation_data=valid_gen, steps_per_epoch = np.ceil( (n_lines['fold1'] + n_lines['fold2'])/ batch_size), #Hvornår skal den stoppe med epoc og starte næste? validation_steps= np.ceil( n_lines['fold3']/ batch_size), #Hvornår skal den stoppe med epoc og starte næste? epochs=3) #Create the new generat valid_gen = batch_generator(filename=Data_Path + '/fold3_NA.dat', batch_size=batch_size, num_features=num_features) preds = my_model.predict_generator(valid_gen, steps = np.ceil( n_lines['fold3']/ batch_size) ).flatten() true_vals = load_dataframe(filename = 'fold3_NA_features.dat')['label'] preds = preds[:len(true_vals)] #Vi predicter 'np.ceil' så genstarter batchen f1_scores_plot(preds,true_vals) f1_scores_plot(preds,true_vals,resize = True)
from _0_DataCreation.Read_Data import load_dataframe from sklearn.ensemble import RandomForestRegressor import numpy as np import pandas as pd from time import time fold1_df = load_dataframe(filename='fold1_NA_features.dat') fold2_df = load_dataframe(filename='fold2_NA_features.dat') del fold1_df['id'] del fold2_df['id'] #fold1_df = fold1_df[['label','pca_1_last','pca_2_last','pca_3_last','pca_4_last','pca_5_last','pca_6_last','pca_7_last','pca_8_last','pca_9_last','pca_10_last']] #fold2_df = fold2_df[['label','pca_1_last','pca_2_last','pca_3_last','pca_4_last','pca_5_last','pca_6_last','pca_7_last','pca_8_last','pca_9_last','pca_10_last']] ## Create the different sets one_rows = fold1_df.loc[(fold1_df['label'] == 1), :] zero_rows = fold1_df.loc[(fold1_df['label'] == 0), :] ##create train_sets n_rfs = int(np.ceil(len(zero_rows) / len(one_rows))) #We make 6 regressors n_samples = int(round(len(zero_rows) / n_rfs, 0)) trains = [ zero_rows.iloc[n_samples * (x - 1):n_samples * x, :] for x in range(1, n_rfs) ] trains += [zero_rows.iloc[n_samples * (n_rfs - 1):, :]] ## Take equal amount of one rows and zero rows, and train random forrests n_features = int(len(fold1_df.columns) / 4)