def train(self, path, database): """ Trains an specific model with the features and labels read from a file. Parameters ---------- path: string The location of the file with the features and labels for training. database: string The name of the database. Return ---------- succesful: boolean Wether the training of the model was succesful. """ if not Utilities.checkFile(path, database): return False else: dataframe = Utilities.readCSV(path) labels = dataframe['Diagnosis'].values classes, ids = Utilities.getClasses(labels) encoded_labels = np_utils.to_categorical(ids, len(classes)) if database == 'Caldas': features = dataframe[list(['CDR', 'MMSE', 'MoCA', 'GDS'])].values self.classifier_caldas.train(features, encoded_labels) self.classifier_caldas.trained = True self.classifier_caldas.classes = classes else: features = dataframe[list(['CDR', 'MMSE', 'Age', 'Education'])].values self.classifier_adni.train(features, encoded_labels) self.classifier_adni.trained = True self.classifier_adni.classes = classes return True
def train(self, features, labels): """ Train the model itself with a data set (features, labels). The training method uses bootstrapping to extract samples from the data set. Parameters ---------- features: array-like of shape = [number_samples, number_features] The training input samples. labels: array-like of shape = [number_samples] The target values (class labels in classification). Return ---------- None """ number_samples, number_features = features.shape max_samples = number_samples random_state = check_random_state(None) seeds = random_state.randint(numpy.iinfo(numpy.int32).max, size=self.number_classifiers) classes, ids = Utilities.getClasses(labels) encoded_labels = np_utils.to_categorical(ids, len(classes)) for i in range(self.number_classifiers): random_state = check_random_state(seeds[i]) indices = random_state.randint(0, number_samples, max_samples) if i % 2 != 0: self.model[i].train(features[indices], labels[indices]) self.model[i].classes = self.model[i].model.classes_ else: self.model[i].train(features[indices], encoded_labels[indices]) self.model[i].classes = classes self.classes = classes
def validate(self, path, number_folds, database): """ Compute a model's performance metrics based on k-fold cross-validation technique. Parameters ---------- path: string The location of the file with the features and labels for validation. number_folds: int The amount of folds for the k-fold cross-validation. database: string The name of the database. Return ---------- checked: boolean Whether the columns/classes in the file correspond to the ones associated with the database. validated: boolean Wether the validation of the model was succesful. accuracy: float The accuracy of the model based on it's confusion matrix. precision: float The precision of the model based on it's confusion matrix. sensitivity: float The sensitivity of the model based on it's confusion matrix. specificity: float The specificity of the model based on it's confusion matrix. kappa: float The Cohen's Kappa of the model based on it's confusion matrix. """ if not Utilities.checkFile(path, database): return False, True, None, None, None, None, None else: dataframe = Utilities.readCSV(path) labels = dataframe['Diagnosis'].values classes, ids = Utilities.getClasses(labels) encoded_labels = np_utils.to_categorical(ids, len(classes)) if database == 'Caldas': if self.classifier_caldas.trained: features = dataframe[list(['CDR', 'MMSE', 'MoCA', 'GDS'])].values accuracy, precision, sensitivity, specificity, kappa = self.classifier_caldas.validate(features, labels, number_folds, encoded_labels) else: return True, False, None, None, None, None, None else: if self.classifier_adni.trained: features = dataframe[list(['CDR', 'MMSE', 'Age', 'Education'])].values accuracy, precision, sensitivity, specificity, kappa = self.classifier_adni.validate(features, labels, number_folds, encoded_labels) else: return True, False, None, None, None, None, None return True, True, accuracy, precision, sensitivity, specificity, kappa