Ejemplo n.º 1
0
 def train(self, path, database):
     """
     Trains an specific model with the features and labels read from a file.
     
     Parameters
     ----------
     path: string
          The location of the file with the features and labels for training.
         
     database: string
         The name of the database.
     
     Return
     ----------
     succesful: boolean
         Wether the training of the model was succesful.
     """
     if not Utilities.checkFile(path, database):
         return False
     else:
         dataframe = Utilities.readCSV(path)
         labels = dataframe['Diagnosis'].values
         if database == 'Caldas':
             features = dataframe[list(['CDR', 'MMSE', 'MoCA',
                                        'GDS'])].values
             self.classifier_caldas.train(features, labels)
             self.classifier_caldas.trained = True
         else:
             features = dataframe[list(['CDR', 'MMSE', 'Age',
                                        'Education'])].values
             self.classifier_adni.train(features, labels)
             self.classifier_adni.trained = True
         return True
Ejemplo n.º 2
0
 def validate(self, path, number_folds, database):
     """
     Compute a bagging models' performance metrics based on k-fold cross-validation technique.
     
     Parameters
     ----------
     path: string
         The location of the file with the features and labels for validation.
         
     number_folds: int
         The amount of folds for the k-fold cross-validation.
         
     database: string
         The name of the database.
     
     Return
     ----------
     checked: boolean
         Whether the columns/classes in the file correspond to the ones associated with the database.  
     
     validated: boolean
         Wether the validation of the model was succesful.
         
     accuracy: float
         The accuracy of the model based on it's confusion matrix.
         
     precision: float
         The precision of the model based on it's confusion matrix.
         
     sensitivity: float
         The sensitivity of the model based on it's confusion matrix.
         
     specificity: float
         The specificity of the model based on it's confusion matrix.
         
     kappa: float
         The Cohen's Kappa of the model based on it's confusion matrix.
     """
     if not Utilities.checkFile(path, database):
         return False, True, None, None, None, None, None
     else:
         dataframe = Utilities.readCSV(path)
         labels = dataframe['Diagnosis'].values
         if database == 'Caldas':
             if self.classifier_caldas.trained:
                 features = dataframe[list(['CDR', 'MMSE', 'MoCA',
                                            'GDS'])].values
                 accuracy, precision, sensitivity, specificity, kappa = self.classifier_caldas.validate(
                     features, labels, number_folds)
             else:
                 return True, False, None, None, None, None, None
         else:
             if self.classifier_adni.trained:
                 features = dataframe[list(
                     ['CDR', 'MMSE', 'Age', 'Education'])].values
                 accuracy, precision, sensitivity, specificity, kappa = self.classifier_adni.validate(
                     features, labels, number_folds)
             else:
                 return True, False, None, None, None, None, None
         return True, True, accuracy, precision, sensitivity, specificity, kappa
Ejemplo n.º 3
0
 def train(self, features, labels):
     """
     Train the model itself with a data set (features, labels).
     The training method uses bootstrapping to extract samples from the data set.
     
     Parameters
     ----------
     features: array-like of shape = [number_samples, number_features]
         The training input samples.
         
     labels: array-like of shape = [number_samples]
         The target values (class labels in classification).
         
     Return
     ----------
     None
     """
     number_samples, number_features = features.shape
     max_samples = number_samples
     random_state = check_random_state(None)
     seeds = random_state.randint(numpy.iinfo(numpy.int32).max,
                                  size=self.number_classifiers)
     classes, ids = Utilities.getClasses(labels)
     encoded_labels = np_utils.to_categorical(ids, len(classes))
     for i in range(self.number_classifiers):
         random_state = check_random_state(seeds[i])
         indices = random_state.randint(0, number_samples, max_samples)
         if i % 2 != 0:
             self.model[i].train(features[indices], labels[indices])
             self.model[i].classes = self.model[i].model.classes_
         else:
             self.model[i].train(features[indices], encoded_labels[indices])
             self.model[i].classes = classes
     self.classes = classes
Ejemplo n.º 4
0
 def validate(self, features, labels, number_folds, encoded_labels):
     """
     Compute a model's performance metrics based on k-fold cross-validation technique.
     
     Parameters
     ----------
     features: array-like of shape = [number_samples, number_features]
         The validation input samples.
         
     labels: array-like of shape = [number_samples]
         The target values (class labels in classification).
         
     number_folds: int
         The amount of folds for the k-fold cross-validation.
         If 0 compute metrics withput folds.
         If > 0 compute metrics with n folds, n=number_folds.
     
     encoded_labels: array-like of shape = [number_samples, number_outputs]
         The target values (class labels in classification) in one-hot-encoding.
         
     Return
     ----------
     accuracy: float
         The accuracy of the model based on it's confusion matrix.
         
     precision: float
         The precision of the model based on it's confusion matrix.
         
     sensitivity: float
         The sensitivity of the model based on it's confusion matrix.
         
     specificity: float
         The specificity of the model based on it's confusion matrix.
         
     kappa: float
         The Cohen's Kappa of the model based on it's confusion matrix.
     """
     if number_folds == 0:
         predictions = self.model.predict_classes(features)
     else:
         predictions = numpy.empty(len(labels), dtype=float)
         folds = Utilities.getFolds(labels, number_folds)
         for i, (train, test) in enumerate(folds):
             self.model.fit(features[train], encoded_labels[train], nb_epoch=250, batch_size=10, verbose=1)
             fold_prediction = self.model.predict_classes(features[test])
             for j in range(len(test)):
                 predictions[test[j]]=fold_prediction[j]
     matrix = confusion_matrix(np_utils.categorical_probas_to_classes(encoded_labels), predictions)
     sum_columns = numpy.sum(matrix, 0)
     sum_rows = numpy.sum(matrix, 1)
     diagonal_sum = numpy.trace(matrix)
     total_sum = numpy.sum(sum_rows)
     accuracy = diagonal_sum / total_sum
     temp_precision = []
     temp_sensitivity = []
     temp_specificity = []
     for i in range(len(matrix)):
         temp_precision.append(matrix[i][i] / sum_columns[i])
         temp_sensitivity.append(matrix[i][i] / sum_rows[i])
         temp_reduced_sum = total_sum - sum_rows[i] - sum_columns[i] + matrix[i][i]
         temp_specificity.append(temp_reduced_sum / (temp_reduced_sum + sum_columns[i] - matrix[i][i]))
     precision = sum(temp_precision * sum_rows) / total_sum
     sensitivity = sum(temp_sensitivity * sum_rows) / total_sum
     specificity = sum(temp_specificity * sum_rows) / total_sum
     kappa_sum = sum(sum_rows * sum_columns)
     kappa_numerator = (total_sum * diagonal_sum) - kappa_sum
     kappa_denominator =  (total_sum * total_sum) - kappa_sum
     kappa = kappa_numerator / kappa_denominator
     return accuracy, precision, sensitivity, specificity, kappa       
Ejemplo n.º 5
0
 def validate(self, features, labels, number_folds):
     """
     Compute bagging model's performance metrics based on k-fold cross-validation technique.
     
     Parameters
     ----------
     features: array-like of shape = [number_samples, number_features]
         The validation input samples.
         
     labels: array-like of shape = [number_samples] or [number_samples, number_outputs]
         The target values (class labels in classification).
         
     number_folds: int
         The amount of folds for the k-fold cross-validation.
         If 0 compute metrics withput folds.
         If > 0 compute metrics with n folds, n=number_folds.
     
     Return
     ----------
     accuracy: float
         The accuracy of the bagging model based on it's confusion matrix.
         
     precision: float
         The precision of the bagging model based on it's confusion matrix.
         
     sensitivity: float
         The sensitivity of the bagging model based on it's confusion matrix.
         
     specificity: float
         The specificity of the bagging model based on it's confusion matrix.
         
     kappa: float
         The Cohen's Kappa of the bagging model based on it's confusion matrix.
     """
     number_samples, number_features = features.shape
     predictions = []
     if number_folds == 0:
         for i in range(number_samples):
             prediction, _ = self.predict(features[i].reshape(1, -1))
             predictions.append(prediction)
     else:
         predictions = numpy.empty(len(labels), dtype=object)
         folds = Utilities.getFolds(labels, number_folds)
         for i, (train, test) in enumerate(folds):
             self.train(features[train], labels[train])
             for j in range(len(test)):
                 fold_prediction, _ = self.predict(
                     features[test[j]].reshape(1, -1))
                 predictions[test[j]] = fold_prediction[0]
     matrix = confusion_matrix(labels, predictions)
     sum_columns = numpy.sum(matrix, 0)
     sum_rows = numpy.sum(matrix, 1)
     diagonal_sum = numpy.trace(matrix)
     total_sum = numpy.sum(sum_rows)
     accuracy = diagonal_sum / total_sum
     temp_precision = []
     temp_sensitivity = []
     temp_specificity = []
     for i in range(len(matrix)):
         temp_precision.append(matrix[i][i] / sum_columns[i])
         temp_sensitivity.append(matrix[i][i] / sum_rows[i])
         temp_reduced_sum = total_sum - sum_rows[i] - sum_columns[
             i] + matrix[i][i]
         temp_specificity.append(
             temp_reduced_sum /
             (temp_reduced_sum + sum_columns[i] - matrix[i][i]))
     precision = sum(temp_precision * sum_rows) / total_sum
     sensitivity = sum(temp_sensitivity * sum_rows) / total_sum
     specificity = sum(temp_specificity * sum_rows) / total_sum
     kappa_sum = sum(sum_rows * sum_columns)
     kappa_numerator = (total_sum * diagonal_sum) - kappa_sum
     kappa_denominator = (total_sum * total_sum) - kappa_sum
     kappa = kappa_numerator / kappa_denominator
     return accuracy, precision, sensitivity, specificity, kappa