def extract_rtp_known_state(self, path_list): healthy_x = [] healthy_y = [] healthy_bs = [] healthy_new_stroke = [] disease_x = [] disease_y = [] disease_bs = [] disease_new_stroke = [] states = [] for path in path_list: # Acuqisisco l'id del paziente id = HandManager.get_id_from_path(path) # Acquisisco lo stato del paziente state = HandManager.get_state_from_id(id) # Leggo i punti campionati nel file partial_x, partial_y, partial_bs = RTPExtraction.__read_samples_from_file( path) # Trasformo i punti in segmenti RHS. partial_x, partial_y, partial_bs = self.__transform_point_in_rtp( partial_x, partial_y, partial_bs) partial_new_stroke = RTPExtraction.__get_new_stroke(partial_bs) # Raddoppio il numero dei campioni RHS così da ampliare il dataset. partial_x, partial_y, partial_bs, partial_new_stroke = self.__extract_subs_from_samples( partial_x, partial_y, partial_bs, partial_new_stroke) # Suddivido i capioni in base allo stato di salute del paziente a cui appartengono, dopo di che genero degli # array mono dimensionali di lunghezza pari alla lunghezza dei campioni richiesta dal Modello. if state == HEALTHY_STATE: self.__create_sample_sequence(healthy_x, healthy_y, healthy_bs, healthy_new_stroke, partial_x, partial_y, partial_bs, partial_new_stroke) else: self.__create_sample_sequence(disease_x, disease_y, disease_bs, disease_new_stroke, partial_x, partial_y, partial_bs, partial_new_stroke) # Dopo aver ultimato l'estrazione genero due tensori tridimensionali, uno per i pazienti sani e uno per i pazienti malati healthy_tensor = np.reshape( np.array(healthy_x + healthy_y + healthy_bs + healthy_new_stroke), (len(healthy_x), self.__num_samples, FEATURES)) disease_tensor = np.reshape( np.array(disease_x + disease_y + disease_bs + disease_new_stroke), (len(disease_x), self.__num_samples, FEATURES)) # A questo punto per ottenere un dataset bilanciato in ogni situazione valuto quale dei due tensori possiede meno. healthy_tensor, disease_tensor = HandManager.balance_dataset( healthy_tensor, disease_tensor) # Il tensore con meno campioni verrà utilizzato per generare il tensore finale, il quale verrà composto inserendo # prima tutti gli utenti sani e poi tutti gli utenti sani, si è già provato un approccio alternato, ma ha dato # scarsi risultati. final_tensor = np.concatenate((healthy_tensor, disease_tensor)) # Genero infine il vettore gli stati states += [HEALTHY_STATE for _ in range(len(healthy_tensor)) ] + [DISEASE_STATE for _ in range(len(disease_tensor))] return np.array(final_tensor), np.array(states), len(final_tensor)
def get_task_from_paths(paths, tasks): ids_task = {} for path in paths: id = HandManager.get_id_from_path(path) task = "_" + HandManager.get_task_from_path(path) + "." if task in tasks: if id in ids_task: temp_path = ids_task[id] temp_path.append(path) ids_task.update({id: temp_path}) else: ids_task[id] = [path] return ids_task
def extract_rhs_file(self, path): x_samples = [] y_samples = [] bs_samples = [] id = HandManager.get_id_from_path(path) state = HandManager.get_state_from_id(id) partial_x, partial_y, partial_bs = RHSDistanceExtract.read_samples_from_file(path) partial_x, partial_y, partial_bs = self.__transform_point_in_rhs(partial_x, partial_y, partial_bs) partial_x, partial_y, partial_bs = self.__extract_subs_from_samples(partial_x, partial_y, partial_bs) self.__create_sample_sequence(x_samples, y_samples, bs_samples, partial_x, partial_y, partial_bs) tensor = np.reshape((x_samples + y_samples + bs_samples), (len(x_samples), self.__num_samples, FEATURES)) states = [state for _ in range(len(x_samples))] return np.array(tensor), np.array(states)
def execute_emothaw_experiment(self): file_manager = HandManager("ConvertedEmothaw") rhs_extraction = RHSDistanceExtract(self.__file_samples, NUM_FILE_SAMPLES) ids_task = TaskManager.get_task_from_paths(file_manager.get_files_path(), self.__test_task) for id in ids_task: paths = ids_task.get(id) for task_path in paths: tensor = rhs_extraction.extract_rhs_file(task_path) result = self.__ml_model.predict_result(tensor) counter_result = Counter(result) print("Id: ", id, "file: ", task_path) healthy = counter_result.get(0) / len(result) * 100 print("Healthy: ", healthy, "%") print("Disease: ", 100 - healthy, "%")
def start_experiment(self): print("Leave one out experiment start.") feature_extraction = RHSDistanceExtract(MINIMUM_SAMPLES, SAMPLES) global_results = np.zeros(0) global_states = np.zeros(0) for test_id in self.__patients: print("Test id: ", test_id) print("Deleting test file...") deleted_paths = HandManager.delete_files(test_id, self.__patients_paths) validation_number = int(np.ceil(len(deleted_paths) * 0.20)) training_file = deleted_paths[0: len(deleted_paths) - validation_number] validation_file = deleted_paths[len(deleted_paths) - validation_number: len(deleted_paths)] test_file = HandManager.get_all_file_of_id(test_id, self.__patients_paths) print("Creating training tensor...") training_tensor, training_states, _ = feature_extraction.extract_rhs_known_state(training_file) print("Creating validation tensor...") validation_tensor, validation_states, _ = feature_extraction.extract_rhs_known_state(validation_file) test_tensor = np.zeros((0, SAMPLES * 2, FEATURES)) test_states = np.zeros(0) print("Creating test tensor...") for file in test_file: partial_tensor, partial_states = feature_extraction.extract_rhs_file(file) test_tensor = np.concatenate((test_tensor, partial_tensor)) test_states = np.concatenate((test_states, test_states)) print("Creating model...") ml_model = MLModel(training_tensor, training_states, validation_tensor, validation_states) print("Testing model...") partial_results, _, _ = ml_model.test_model(test_tensor, test_states) accuracy, _ = MLModel.get_accuracy_precision(partial_results, test_states) print("Accuracy: ", accuracy) print("Update results...") global_results = np.concatenate((global_results, partial_results)) global_states = np.concatenate((global_states, test_states)) with open(os.path.join(EXPERIMENT_RESULT, "experiment_5.txt"), 'w') as file: accuracy, precision, recall, f_score = MLModel.evaluate_results(global_results, global_states) file.write("LEAVE ONE OUT EXPERIMENT:\n") file.write("ACCURACY: " + str(accuracy * 100) + "\n") file.write("PRECISION: " + str(precision * 100) + "\n") file.write("RECALL: " + str(recall * 100) + "\n") file.write("F_SCORE: " + str(f_score * 100) + "\n") file.close()
def __init__(self, dataset): self.__dataset = dataset dataset = HandManager(self.__dataset) self.__patients_paths = dataset.get_files_path() self.__patients_paths = HandManager.filter_file(self.__patients_paths, MINIMUM_SAMPLES) self.__patients = HandManager.get_ids_from_dir(dataset.get_patient_paths()) self.__patients.sort()
def rtp(self, path): x_samples = [] y_samples = [] bs_samples = [] new_stroke_samples = [] id = HandManager.get_id_from_path(path) state = HandManager.get_state_from_id(id) partial_x, partial_y, partial_bs = RTPExtraction.__read_samples_from_file( path) partial_x, partial_y, partial_bs = self.__transform_point_in_rtp( partial_x, partial_y, partial_bs) partial_new_stroke = RTPExtraction.__get_new_stroke(partial_bs) partial_x, partial_y, partial_bs, partial_new_stroke = self.__extract_subs_from_samples( partial_x, partial_y, partial_bs, partial_new_stroke) self.__create_sample_sequence(x_samples, y_samples, bs_samples, new_stroke_samples, partial_x, partial_y, partial_bs, partial_new_stroke) tensor = np.reshape( (x_samples + y_samples + bs_samples + new_stroke_samples), (len(x_samples), self.__num_samples, FEATURES)) states = [state for _ in range(len(x_samples))] return np.array(tensor), np.array(states)
def get_task_files(tasks, paths): task_paths = [] try: if not isinstance(tasks, list): tasks = [tasks] for task in tasks: for path in paths: if task in '_' + HandManager.get_task_from_path( path) + '.': task_paths.append(path) except TypeError as error: print("Error: ", error) print("Task: ", tasks) return task_paths
def split(paths, healthy_task, diseased_task, test_task, training_number, validation_number): # Ottengo le liste degli id dei pazienti selezionati per eseguire le varie di modellazione, distinguendo tra id # di pazienti sani e malati. listh_training, listh_validation, listh_test, listd_training, listd_validation, listd_test = TaskManager.__split( training_number, validation_number) training_list_diseased = [] training_list_healthy = [] test_list_healthy = [] test_list_diseased = [] validation_list_diseased = [] validation_list_healthy = [] # In questo for vengono individuati dati path del sistema tutti i tasks che sono stati selezionati per la modellazione # se il tasks si identifica come uno dei tasks richiesti allora si verifica l'id a cui il tasks appartiene per essere # correttamente smistato nella lista di appartenenza corretta. for path in paths: id = HandManager.get_id_from_path(path) task = "_" + HandManager.get_task_from_path(path) + "." # Verifico se il tasks nel path è uno di quelli selezionati per i pazienti con malattia. if task in diseased_task: # Verifico se l'id del paziente è presente nella lista dei pazienti con malattia. training_list_diseased, validation_list_diseased = TaskManager.__id_in_list( id, path, listd_training, listd_validation, training_list_diseased, validation_list_diseased) # In questo if si verificano i tasks per i pazienti considerati sani if task in healthy_task: training_list_healthy, validation_list_healthy = TaskManager.__id_in_list( id, path, listh_training, listh_validation, training_list_healthy, validation_list_healthy) # In questo if si verificano i tasks selezionati per i test if task in test_task: if id in listh_test: test_list_healthy.append(path) elif id in listd_test: test_list_diseased.append(path) return training_list_diseased, training_list_healthy, test_list_healthy, test_list_diseased, \ validation_list_diseased, validation_list_healthy
def __split(training_numbers, validation_numbers): # Ottengo le liste degli id dei pazienti sani e malati. healthy_id, diseased_id = HandManager.get_healthy_disease_list() # h => healthy, d => diseased # Separo il dataset tra training src e validazione a seconda della condizione dei pazienti. h_ids_training = healthy_id[0:training_numbers] h_ids_validation = healthy_id[training_numbers:training_numbers + validation_numbers] h_ids_test = healthy_id[training_numbers + validation_numbers:len(healthy_id)] d_ids_training = diseased_id[0:training_numbers] d_ids_validation = diseased_id[training_numbers:training_numbers + validation_numbers] d_ids_test = diseased_id[training_numbers + validation_numbers:len(diseased_id)] return h_ids_training, h_ids_validation, h_ids_test, d_ids_training, d_ids_validation, d_ids_test
def read_samples_from_file(path): partial_x = [] partial_y = [] partial_bs = [] timestamp = [] with open(path, newline='') as csv_file: # Leggo il file di campioni come fosse un file csv con delimitatore di colonna indicato da uno spazio, anziché # una virgola. rows = csv.reader(csv_file, delimiter=' ') for row in rows: partial_x.append(float(row[X_COORDINATE])) partial_y.append(float(row[Y_COORDINATE])) partial_bs.append(float(row[BOTTOM_STATUS])) timestamp.append(float(row[TIMESTAMP])) csv_file.close() # Elimino i duplicati dalle lista. partial_x, partial_y, timestamp, partial_bs = HandManager.delete_duplicates(partial_x, partial_y, timestamp, partial_bs) return np.array(partial_x).astype(float), np.array(partial_y).astype(float), np.array(partial_bs).astype(float)