processing = Processing_DB_Files() project = Project() #tuple from MPL t_aux = [] for i in range(0,500): t_aux.append(500) t = tuple(t_aux) #### classifiers = {"MPL": MLPClassifier(random_state=1, solver="adam", activation="relu", max_iter=100000, alpha=1e-5, hidden_layer_sizes=t), "Extratrees": ExtraTreesClassifier(n_estimators = 1000, random_state=1), "Knn":KNeighborsClassifier(n_neighbors=5), "Naive Bayes":GaussianNB(), "RandomForest":RandomForestClassifier(n_estimators = 1000, random_state=1), "Decision Tree":tree.DecisionTreeClassifier(random_state=1), "SVM":svm.SVC(probability=True, random_state=1)} persons = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] get_accuracy = Get_Accuracy() balance_data = BalanceData() threshold_balance_data = 40 #Select the best classifier accuracy_mean = pd.DataFrame(columns=["Classifier", "Accuracy"]) project.log("=====================ARCMA_SELECT_BEST_ALGORITHM=====================", file="arcma_best_algorithm.log") for c in classifiers: print(c) person_accuracies = [] person_f_score = [] person_precision = [] person_recall = [] times_to_predict = [] for p in persons: s = save() try: relevant_features = s.load_var("arcma_relevant_features_best_window{}relevant_features_{}.pkl".format(slash, p)) y = s.load_var("arcma_relevant_features_best_window{}y_{}.pkl".format(slash, p)) y = pd.DataFrame(y, columns=[arcma.label_tag]) except: print("file from person {} not found!".format(p))
extra_trees = ExtraTreesClassifier(n_estimators=1000, random_state=0) get_accuracy = Get_Accuracy() balance_data = BalanceData() threshold_balance_data = 40 #===LOAD FEATURES===# #Interate threshold to find de best value# persons = [ "f1", "m1", "m2", "f2", "m3", "f3", "m4", "m5", "m6", "m7", "f4", "m8", "m9", "f5", "m10", "m11" ] accuracy_by_person = pd.DataFrame() threshold = 0.65 project.log( "=========== HMP StratifiedKFold Accuracy, Thresold = {}===========". format(threshold), file="hmp_log_final_accuracy.log") for p in persons: s = save() relevant_features = s.load_var( "hmp_relevant_features_best_window{}relevant_features_{}.pkl".format( slash, p)) y = s.load_var("hmp_relevant_features_best_window{}y_{}.pkl".format( slash, p)) y = pd.DataFrame(y, columns=[hmp.label_tag]) skf = StratifiedKFold(n_splits=10, random_state=None, shuffle=False) accuracy = {} accuracies = [] balanced_data = balance_data.balance_data(relevant_features, y, threshold_balance_data)
hmp = HMP_Model() processing = Processing_DB_Files() project = Project() extra_trees = ExtraTreesClassifier(n_estimators=10000, random_state=0) base_classification = Base_Classification(hmp, extra_trees) #===LOAD FEATURES===# #Interate threshold to find de best value# s = save() person_list = ["f1", "m1", "m2"] accuracy_threshould_list = [] data = {} threshold = 0.35 project.log( "=========== HMP Outlier Accuracy, Thresold = {}===========".format( threshold)) for p in person_list: project.log("===========Person {}===========".format(p)) data = s.load_var("hmp_relevant_features{}relevant_features_{}.pkl".format( slash, p)) y = s.load_var("hmp_relevant_features{}y_{}.pkl".format(slash, p)) y = pd.DataFrame(y, columns=[hmp.label_tag]) print("------------------------------------") print("Person: {}".format(p)) print("------------------------------------") return_accuracy = base_classification.get_accuracy.stratified_kfold_accuracy_outlier( data, y, extra_trees, threshold, p) project.log(str(return_accuracy), file="hmp_log.log")
#===INITIALIZATION===# Debug.DEBUG = 0 arcma = ARCMA_Model() processing = Processing_DB_Files() project = Project() persons = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] get_accuracy = Get_Accuracy() balance_data = BalanceData() threshold_balance_data = 40 #Select de best windows t = time.time() best_model = ExtraTreesClassifier(n_estimators = 1000, random_state=0) w_accuracies = pd.DataFrame(columns=["window", "accurary"]) p = 15 # pessoa com mais registros project.log("=====================ARCMA_SELECT_BEST_WINDOWS=====================", file="arcma_log_best_window.log") for w in range(20,110,10): print("Load data with window len = {}".format(w)) data = arcma.load_training_data_by_people(p) print("Slicing Window....") data_tsfresh, y = arcma.slice_by_window_tsfresh(data, w) y.index += 1 del data_tsfresh["activity"] classes_counts = y.value_counts() if len(classes_counts) > 1: relevant_features = extract_relevant_features(data_tsfresh, y, column_id='id', column_sort='time') y = pd.DataFrame(y, columns=[arcma.label_tag]) balanced_data = balance_data.balance_data(relevant_features, y, threshold_balance_data)
column_id='id', column_sort='time') X_train, X_test, y_train, y_test = train_test_split(relevant_features, y2, test_size=0.2, random_state=42) extra_trees = ExtraTreesClassifier(n_estimators=10000, max_depth=1000, random_state=0) extra_trees.fit(X_train, y_train) start_time = time.time() pred = extra_trees.predict(X_test) end_time = time.time() accuracy = accuracy_score(y_test, pred) project.log("Accuracy to all ts_features ({}): {} - Time: {} seconds.".format( len(relevant_features.columns), accuracy, (end_time - start_time) / len(y_test))) time.sleep(10) del X_train, X_test, y_train, y_test, extra_trees, start_time, pred, accuracy time.sleep(10) # 2º - ACURÁCIA COM OS 10% MAIS RELEVANTES ts_features ts_extratree_features_importance = pd.DataFrame( extra_trees.feature_importances_, index=X_train.columns, columns=['importance']).sort_values('importance', ascending=False) len_features = len(ts_extratree_features_importance) best_features = ts_extratree_features_importance.index[0:int((len_features / 10) - 1)] ts_final_features = relevant_features[best_features]
Debug.DEBUG = 0 arcma = ARCMA_Model() processing = Processing_DB_Files() project = Project() extra_trees = ExtraTreesClassifier(n_estimators = 1000, random_state=0) base_classification = Base_Classification(arcma, extra_trees) balance_data = BalanceData() threshold_balance_data = 40 #Interate threshold to find de best value# s = save() person_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] accuracy_threshould_temp_aux = pd.DataFrame(columns=["accuracy","discarted", "len_activity", "threshold"]) accuracy_mean = pd.DataFrame(columns=["accuracy","discarted", "len_activity", "threshold"]) project.log("====================ARCMA BEST THRESHOLD============================", file="arcma_log_best_threshold.log") for t in np.arange(0.05, 1, 0.05): accuracy_threshould_temp_aux = pd.DataFrame(columns=["accuracy","discarted", "len_activity"]) for p in person_list: relevant_features = s.load_var("arcma_relevant_features_best_window{}relevant_features_{}.pkl".format(slash, p)) y = s.load_var("arcma_relevant_features_best_window{}y_{}.pkl".format(slash, p)) y = pd.DataFrame(y, columns=[arcma.label_tag]) balanced_data = balance_data.balance_data(relevant_features, y, threshold_balance_data) if isinstance(balanced_data, tuple): x_train, x_test, y_train, y_test = train_test_split(balanced_data[0], balanced_data[1], test_size=0.2, random_state=42) data = {} data[p] = {} data[p]["training"] = {} data[p]["training"]["training_features"] = x_train data[p]["training"]["training_labels"] = y_train
#===INITIALIZATION===# Debug.DEBUG = 0 umafall = UMAFALL_Model() processing = Processing_DB_Files() project = Project() extra_trees = ExtraTreesClassifier(n_estimators = 10000, random_state=0) base_classification = Base_Classification(umafall, extra_trees) #===LOAD FEATURES===# #Interate threshold to find de best value# s = save() person_list = [14,15, 16, 17] accuracy_threshould_list = [] data = {} threshold = 0.65 project.log("=========== UMAFALL Outlier Accuracy, Thresold = {}===========".format(threshold), file="umafall_log.log") for p in person_list: project.log("===========Person {}===========".format(p), file="umafall_log.log") data = s.load_var("umafall_relevant_features{}relevant_features_{}.pkl".format(slash, p)) y = s.load_var("umafall_relevant_features{}y_{}.pkl".format(slash, p)) y = pd.DataFrame(y, columns=[umafall.label_tag]) print("------------------------------------") print("Person: {}".format(p)) print("------------------------------------") return_accuracy = base_classification.get_accuracy.stratified_kfold_accuracy_outlier(data, y, extra_trees, threshold, p, column_test=list(data.columns.values)[0]) project.log(str(return_accuracy), file="umafall_log.log")
project = Project() extra_trees = ExtraTreesClassifier(n_estimators=10000, random_state=0) base_classification = Base_Classification(arcma, extra_trees) balance_data = BalanceData() threshold_balance_data = 40 #===LOAD FEATURES===# #Interate threshold to find de best value# s = save() person_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] accuracy_threshould_list = [] data = {} threshold = 0.60 project.log( "=========== ARCMA Outlier Accuracy, Thresold = {}===========".format( threshold), file="arcma_log_outlier_accuracy.log") for p in person_list: project.log("===========Person {}===========".format(p), file="arcma_log_outlier_accuracy.log") data = s.load_var( "arcma_relevant_features_best_window{}relevant_features_{}.pkl".format( slash, p)) y = s.load_var("arcma_relevant_features_best_window{}y_{}.pkl".format( slash, p)) y = pd.DataFrame(y, columns=[arcma.label_tag]) print("------------------------------------") print("Person: {}".format(p)) print("------------------------------------")
#===INITIALIZATION===# Debug.DEBUG = 0 umafall = UMAFALL_Model() processing = Processing_DB_Files() project = Project() persons = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] get_accuracy = Get_Accuracy() balance_data = BalanceData() threshold_balance_data = 40 #Select de best windows t = time.time() best_model = ExtraTreesClassifier(n_estimators=1000, random_state=0) w_accuracies = pd.DataFrame(columns=["window", "accurary"]) p = 1 # pessoa com mais registros project.log( "=====================UMAFALL_SELECT_BEST_WINDOWS=====================", file="umafall_log_best_window.log") for w in range(10, 110, 10): print("Load data with window len = {}".format(w)) data = umafall.load_training_data_by_people( p, additional_where="and sensor=2 and body=2") print("Slicing Window....") data_tsfresh, y = umafall.slice_by_window_tsfresh(data, w) y.index += 1 del data_tsfresh["activity"] classes_counts = y.value_counts() if len(classes_counts) > 1: relevant_features = extract_relevant_features(data_tsfresh, y,
person_accuracies.append(accuracy) times_to_predict.append(spent_time) person_precision.append(precision) person_recall.append(recall) person_f_score.append(f_score) person_discarteds.append(discarteds) person_len_activities.append(len_activity) except Exception as e: print(e) out_aux = pd.DataFrame({"Classifier":[type(classifiers[c]).__name__], "Threshold": t, "Accuracy":[st.mean(person_accuracies)], "Precision":[st.mean(person_precision)], "Recall":[st.mean(person_recall)], "F-Score":[st.mean(person_f_score)], "Time":[st.mean(times_to_predict)], "Discarteds":[st.mean(person_discarteds)], "Len Activities":[st.mean(person_len_activities)]}) accuracy_mean = pd.concat([accuracy_mean, out_aux]) accuracy_mean.to_csv(s.path+"new_results{}{}_best_threshold_balanced_data_window{}.csv".format(slash, model['model_name'], model['window']), sep='\t', encoding='utf-8') ''' #Interate threshold to find de best value# s = save() person_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] accuracy_threshould_temp_aux = pd.DataFrame(columns=["accuracy","discarted", "len_activity", "threshold"]) accuracy_mean = pd.DataFrame(columns=["accuracy","discarted", "len_activity", "threshold"]) project.log("====================ARCMA BEST THRESHOLD============================", file="arcma_log_best_threshold.log") for t in np.arange(0.05, 1, 0.05): accuracy_threshould_temp_aux = pd.DataFrame(columns=["accuracy","discarted", "len_activity"]) for p in person_list: relevant_features = s.load_var("arcma_relevant_features_best_window{}relevant_features_{}.pkl".format(slash, p)) y = s.load_var("arcma_relevant_features_best_window{}y_{}.pkl".format(slash, p)) y = pd.DataFrame(y, columns=[arcma.label_tag]) balanced_data = balance_data.balance_data(relevant_features, y, threshold_balance_data)