def support_vector_machine(self, sensors_set):
        features = list(self.dataset.get_sensors_set_features(sensors_set))
        print("SUPPORT VECTOR MACHINE.....")
        print("CLASSIFICATION BASED ON THESE SENSORS: ",
              self.dataset.get_remained_sensors(sensors_set))
        print("NUMBER OF FEATURES: ", len(features))
        train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification(
            self.dataset.get_train, self.dataset.get_test, features)
        train_features_scaled, test_features_scaled = util.scale_features(
            train_features, test_features)

        classifier_svm = SVC(C=const.PAR_SVM_C[sensors_set],
                             gamma=const.PAR_SVM_GAMMA[sensors_set],
                             verbose=False)
        classifier_svm.fit(train_features_scaled, train_classes)
        test_prediction = classifier_svm.predict(test_features_scaled)
        acc = accuracy_score(test_classes, test_prediction)
        print("ACCURACY : " + str(acc))
        print("END SUPPORT VECTOR MACHINE.....")

        if not os.path.exists(const.DIR_RESULTS):
            os.makedirs(const.DIR_RESULTS)
        file_content = "acc\n" + str(acc)
        with open(
                const.DIR_RESULTS + "/" + str(sensors_set) +
                const.FILE_SUPPORT_VECTOR_MACHINE_RESULTS, 'w') as f:
            f.write(file_content)
    def neural_network(self, sensors_set):
        features = list(self.dataset.get_sensors_set_features(sensors_set))
        print("NEURAL NETWORK.....")
        print("CLASSIFICATION BASED ON THESE SENSORS: ",
              self.dataset.get_remained_sensors(sensors_set))
        print("NUMBER OF FEATURES: ", len(features))
        train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification(
            self.dataset.get_train, self.dataset.get_test, features)
        train_features_scaled, test_features_scaled = util.scale_features(
            train_features, test_features)

        classifier_nn = MLPClassifier(
            hidden_layer_sizes=(const.PAR_NN_NEURONS[sensors_set], ),
            alpha=const.PAR_NN_ALPHA[sensors_set],
            max_iter=const.PAR_NN_MAX_ITER,
            tol=const.PAR_NN_TOL)
        classifier_nn.fit(train_features_scaled, train_classes)
        test_prediction = classifier_nn.predict(test_features_scaled)
        acc = accuracy_score(test_classes, test_prediction)
        print("ACCURACY : " + str(acc))
        print("END NEURAL NETWORK")

        if not os.path.exists(const.DIR_RESULTS):
            os.makedirs(const.DIR_RESULTS)
        file_content = "acc\n" + str(acc)
        with open(
                const.DIR_RESULTS + "/" + str(sensors_set) +
                const.FILE_NEURAL_NETWORK_RESULTS, 'w') as f:
            f.write(file_content)
Esempio n. 3
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str):
    """Brain tissue segmentation using logistic regression.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Logistic regression classifier model building
        - Segmentation using logistic regression on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training...')

    # load feature matrix and label vector
    # precomputed by preprocessAndStore.py
    file_id = open('data_train.pckl', 'rb')
    data_train = pickle.load(file_id)
    file_id.close()

    file_id = open('labels_train.pckl', 'rb')
    labels_train = pickle.load(file_id)
    file_id.close()


    ##########################################

    # perform a grid search over the parameter grid and choose the optimal parameters
    param_grid = {'C': [0.5, 1, 2.5, 50, 1000]}  # grid to search for best parameter C = 0.02
    log_reg_classifier = model_selection.GridSearchCV(sk.LogisticRegression(class_weight='balanced')
                                                      , param_grid, refit=True)

    print('abschnitt 1')

    data_train_scaled, scaler = util.scale_features(data_train)

    start_time = timeit.default_timer()

    log_reg_classifier.fit(data_train_scaled, labels_train)

    util.print_feature_importance(log_reg_classifier.best_estimator_.coef_)

    util.print_class_count(labels_train)

    print('abschnitt 2')

    #print("importance of features: ", log_reg_classifier.best_estimator_.coef_)
    print("best estimator: ", log_reg_classifier.best_estimator_)
    print("best parameter: ", log_reg_classifier.best_params_)


    # store trained log_regr
    file_id = open('log_regr.pckl', 'wb')
    pickle.dump(log_reg_classifier, file_id)
    file_id.close()
    file_id = open('scaler.pckl', 'wb')
    pickle.dump(scaler, file_id)
    file_id.close()

    print(' Time elapsed:', timeit.default_timer() - start_time, 's')
Esempio n. 4
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str,
         data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    pre_process_params = {
        'zscore_pre': True,
        'registration_pre': False,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True,
        'second_oder_coordinate_feature': False,
        'label_percentages': [0.0003, 0.004, 0.003, 0.04, 0.04, 0.02]
    }  #[0.0005, 0.005, 0.005, 0.05, 0.09, 0.022]}

    print('-' * 5, 'Testing...')

    # load classifier
    file_id = open('svm_rbf_fullset_C15_G5_lotofpointspersample.pckl', 'rb')
    svm_rbf_classifier = pickle.load(file_id)
    file_id.close()

    file_id = open('scaler_rbf_fullset_C15_G5_lotofpointspersample.pckl', 'rb')
    scaler = pickle.load(file_id)
    file_id.close()

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_test_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())

    # load images for testing and pre-process
    pre_process_params['training'] = False
    images_test = putil.pre_process_batch(crawler.data,
                                          pre_process_params,
                                          multi_process=False)

    images_prediction = []
    images_probabilities = []

    for img in images_test:
        print('-' * 10, 'Testing', img.id_)
        scaled_features, s = util.scale_features(img.feature_matrix[0], scaler)
        start_time = timeit.default_timer()
        predictions = svm_rbf_classifier.predict(scaled_features)
        #probabilities = svm_classifier.predict_proba(img.feature_matrix[0])
        #predictions = forest.predict(img.feature_matrix[0])
        #probabilities = forest.predict_proba(img.feature_matrix[0])
        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

        # convert prediction and probabilities back to SimpleITK images
        image_prediction = conversion.NumpySimpleITKImageBridge.convert(
            predictions.astype(np.uint8), img.image_properties)
        #image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties)

        # evaluate segmentation without post-processing
        evaluator.evaluate(image_prediction,
                           img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_)

        images_prediction.append(image_prediction)
        #images_probabilities.append(image_probabilities)

    # post-process segmentation and evaluate with post-processing
    #post_process_params = {'crf_post': False}
    #images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities,
    #                                                post_process_params, multi_process=True)

    for i, img in enumerate(images_test):
        #    evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth],
        #                       img.id_ + '-PP')

        # save results
        sitk.WriteImage(
            images_prediction[i],
            os.path.join(
                result_dir, images_test[i].id_ +
                '_SEG_SVM_fullset_C15-_G5_lotofpointspersample.mha'), True)
Esempio n. 5
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str, ml_method: str, verbose: bool):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training '+ ml_method + '...')

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_train_dir,
                                         IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    pre_process_params = {'zscore_pre': True,
                          'registration_pre': False,
                          'coordinates_feature': True,
                          'intensity_feature': True,
                          'gradient_intensity_feature': True,
                          'second_oder_coordinate_feature': False,
                          'label_percentages': [0.0005, 0.005, 0.005, 0.05, 0.09, 0.022]}

    # load images for training and pre-process
    images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False)

    # generate feature matrix and label vector
    data_train = np.concatenate([img.feature_matrix[0] for img in images])
    labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze()

    if verbose:
        util.print_class_count(labels_train)

    start_time = timeit.default_timer()
    if ml_method == 'random_forest':
        classifier = sk_ensemble.RandomForestClassifier(max_features=images[0].feature_matrix[0].shape[1],
                                                    n_estimators=20,
                                                    max_depth=25)
        data_train_scaled = data_train # do not scale features to keep original RF
    elif ml_method == 'svm_linear':
        classifier = svm.SVC(kernel='linear', C=1, class_weight='balanced')
        data_train_scaled, scaler = util.scale_features(data_train)
    elif ml_method == 'svm_rbf':
        classifier = svm.SVC(kernel='rbf', C=15, gamma=5, class_weight='balanced',
                                     decision_function_shape='ovo')
        data_train_scaled, scaler = util.scale_features(data_train)

    elif ml_method == 'logistic_regression':
        classifier = linear_model.LogisticRegression(class_weight='balanced')
        data_train_scaled, scaler = util.scale_features(data_train)
    else:
        assert False, "No valid segmentation algorithm selected in argument ml_method"

    classifier.fit(data_train_scaled, labels_train)
    print(' Time elapsed:', timeit.default_timer() - start_time, 's')

    # create a result directory with timestamp
    t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    result_dir = os.path.join(result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    # print and plot feature importance for each structure
    if verbose:
        if ml_method == 'svm_linear':
            util.print_feature_importance(classifier.coef_)
            util.plot_feature_importance(classifier.coef_, result_dir)
        if ml_method == 'random_forest':
            util.print_feature_importance(classifier.feature_importances_)
            util.plot_feature_importance(classifier.feature_importances_, result_dir)

    print('-' * 5, 'Testing...')

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(data_test_dir,
                                         IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())

    # load images for testing and pre-process
    pre_process_params['training'] = False
    images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=True)

    images_prediction = []
    images_probabilities = []

    for img in images_test:
        print('-' * 10, 'Testing', img.id_)

        start_time = timeit.default_timer()
        if ml_method == 'random_forest':
            scaled_features = img.feature_matrix[0]
        else:
            scaled_features, s = util.scale_features(img.feature_matrix[0], scaler)


        predictions = classifier.predict(scaled_features)

        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

        # convert prediction and probabilities back to SimpleITK images
        image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8),
                                                                        img.image_properties)

        probabilities = classifier.predict_proba(scaled_features)
        image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties)
        images_probabilities.append(image_probabilities)

        # evaluate segmentation without post-processing
        evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_)

        images_prediction.append(image_prediction)


    # post-process segmentation and evaluate with post-processing
    post_process_params = {'crf_post': False}
    images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities,
                                                     post_process_params, multi_process=True)

    for i, img in enumerate(images_test):
        evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth],
                           img.id_ + '-PP')

        # save results
        sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True)
        sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)
Esempio n. 6
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str,
         data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training...')

    # load feature matrix and label vector
    # precomputed by preprocessAndStore.py
    file_id = open('data_train.pckl', 'rb')
    data_train = pickle.load(file_id)
    file_id.close()

    file_id = open('labels_train.pckl', 'rb')
    labels_train = pickle.load(file_id)
    file_id.close()

    ##########################################
    # use if GridSearchCV is used
    # perform a grid search over the parameter grid and choose the optimal parameters
    #Cs = [10, 12, 15, 20]#a list best = 15
    #gammas = [1 ,2 ,3, 5,10]#a list best = 10
    #param_grid = {'C': Cs, 'gamma': gammas}#a dictionary
    #svm_rbf_classifier = model_selection.GridSearchCV(svm.SVC(kernel='rbf'), param_grid, verbose=1)

    data_train_scaled, scaler = util.scale_features(data_train)

    # printing out how much labels of each group were taken by the mask
    util.print_class_count(labels_train)

    # use if GridSearchCV is not used
    svm_rbf_classifier = svm.SVC(kernel='rbf',
                                 C=15,
                                 gamma=10,
                                 class_weight='balanced',
                                 decision_function_shape='ovo')

    start_time = timeit.default_timer()

    print("start training")
    # for position features only: svm_rbf_classifier.fit(data_train_scaled[:, 0:3], labels_train)
    svm_rbf_classifier.fit(data_train_scaled, labels_train)

    #####svm_rbf_classifier.coef_ can not be used with rbf kernel
    #util.print_feature_importance(svm_rbf_classifier.best_estimator_.coef_)

    #use if GridSearchCV is used
    #print("importance of features: ", svm_rbf_classifier.best_estimator_.coef_)#####svm_rbf_classifier.coef_ can not be used with rbf kernel
    #print("best estimator: ", svm_rbf_classifier.best_estimator_)
    #print("best parameter: ", svm_rbf_classifier.best_params_)

    #use if GridSearchCV is not used
    print("best estimator: ", svm_rbf_classifier)
    print("estimator dual_coef_: ", svm_rbf_classifier.dual_coef_)

    file_id = open('svm_rbf_fullset_C15_G5.pckl', 'wb')
    pickle.dump(svm_rbf_classifier, file_id)
    file_id.close()
    file_id = open('scaler_rbf_fullset_C15_G5.pckl', 'wb')
    pickle.dump(scaler, file_id)
    file_id.close()

    print(' Time elapsed:', timeit.default_timer() - start_time, 's')
Esempio n. 7
0
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(data_atlas_dir)

    print('-' * 5, 'Training...')

    # load feature matrix and label vector
    # precomputed by preprocessAndStore.py1057278
    file_id = open('data_train_reduced2.pckl', 'rb')
    data_train = pickle.load(file_id)
    file_id.close()

    file_id = open('labels_train_reduced.pckl', 'rb')
    labels_train = pickle.load(file_id)
    file_id.close()


    ##########################################


    # perform a grid search over the parameter grid and choose the optimal parameters
    param_grid = {'C': [ 2, 3, 4, 5, 10, 20, 100]}  # grid to search for best parameter C = 0.02
    #svm_classifier = model_selection.GridSearchCV(svm.LinearSVC(C=1, class_weight='balanced', dual=False), param_grid, verbose=1)

    data_train_scaled, scaler = util.scale_features(data_train)

    util.print_class_count(labels_train)

    # use balanced class weights to include classes with small sample size
    # solve the primal problem since n_features < n_samples

    svm_classifier = svm.LinearSVC(C=1, class_weight='balanced', dual=False)  # probability=False, kernel= 'rbf') #kernel='linear')
    start_time = timeit.default_timer()

    svm_classifier.fit(data_train_scaled, labels_train)

    #util.print_feature_importance(svm_classifier.coef_)
    util.plot_feature_importance(svm_classifier.coef_)

    #print(svm_classifier.best_params_)
    #print(svm_classifier.best_estimator_)

    # store trained SVM
    file_id = open('svm_linear.pckl', 'wb')
    pickle.dump(svm_classifier, file_id)
    file_id.close()
    file_id = open('scaler.pckl', 'wb')
    pickle.dump(scaler, file_id)
    file_id.close()

    print(' Time elapsed:', timeit.default_timer() - start_time, 's')