Python DecisionForestの例

プログラミング言語: Python

名前空間/パッケージ名: mialab.classifier.decision_forest

メソッド/関数: DecisionForest

hotexamples.comのコード掲載数: 3

Python DecisionForest - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのmialab.classifier.decision_forest.DecisionForestの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def main(_):
    """Trains a decision forest classifier on a two-dimensional point cloud."""

    # generate model directory (use datetime to ensure that the directory is empty)
    t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S')
    model_dir = os.path.join(FLAGS.model_dir, t)
    os.makedirs(model_dir, exist_ok=True)

    # generate result directory
    os.makedirs(FLAGS.result_dir, exist_ok=True)

    # read file with training data
    data = Reader.load(FLAGS.input_file)

    # generate testing data
    test_data = Generator.get_test_data(1000)

    # generate decision forest parameters
    params = df.DecisionForestParameters()
    params.num_classes = data.label_count()
    params.num_features = data.dimension
    params.num_trees = 10
    params.max_nodes = 100  # or params.set_max_nodes(...)
    params.use_training_loss = False
    params.report_feature_importances = True
    params.model_dir = model_dir
    print(params)

    # train the forest
    forest = df.DecisionForest(params)
    print('Decision forest training...')
    forest.train(data.data, data.labels)
    # or use load_estimator to load a model (create a DecisionForestParameters object and set the model_dir)
    # forest.load_estimator()

    # apply the forest to test data
    print('Decision forest testing...')
    probabilities, predictions = forest.predict(test_data)

    # or directly evaluate when labels are known
    # this can be used to see the feature importance
    # eval_data, eval_labels = Generator.get_test_data_with_label(50)
    # results = forest.evaluate(eval_data, eval_labels)
    # for key in sorted(results):
    #     print('%s: %s' % (key, results[key]))

    # plot the result
    print('Plotting...')
    plotter = Plotter()
    plotter.plot_pixels_proba(test_data, np.array(probabilities))
    plotter.plot_points(data.data, data.labels)
    plotter.save(os.path.join(FLAGS.result_dir, 'result_{}.png'.format(t)))

コード例 #2

ファイルを表示

def main(FLAGS,trees,nodes):

    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(FLAGS.data_atlas_dir)

    print('-' * 5, 'Training...')

    # generate a model directory (use datetime to ensure that the directory is empty)
    # we need an empty directory because TensorFlow will continue training an existing model if it is not empty
    t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S')
    t='DF_trees_'+str(trees)+'_nodes_'+str(nodes)
    model_dir = os.path.join(FLAGS.model_dir, t)
    os.makedirs(model_dir, exist_ok=True)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir,
                                         IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())

    pre_process_params = {'zscore_pre': True,
                          'coordinates_feature': True,
                          'intensity_feature': True,
                          'gradient_intensity_feature': True}

    # initialize decision forest parameters
    df_params = df.DecisionForestParameters()
    df_params.num_classes = 4
    df_params.num_trees = trees
    df_params.max_nodes = nodes
    df_params.model_dir = model_dir
    forest = None
    start_time_total_train = timeit.default_timer()

    for batch_index in range(0, len(data_items), TRAIN_BATCH_SIZE):



        # slicing manages out of range; no need to worry
        batch_data = dict(data_items[batch_index: batch_index+TRAIN_BATCH_SIZE])
        # load images for training and pre-process
        images = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True)
        print('pre-processing done')

        # generate feature matrix and label vector
        data_train = np.concatenate([img.feature_matrix[0] for img in images])
        labels_train = np.concatenate([img.feature_matrix[1] for img in images])

        if forest is None:
            df_params.num_features = data_train.shape[1]
            print(df_params)
            forest = df.DecisionForest(df_params)

        start_time = timeit.default_timer()
        forest.train(data_train, labels_train)
        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

    time_total_train = timeit.default_timer() - start_time_total_train
    print('-' * 5, 'Testing...')
    result_dir = os.path.join(FLAGS.result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir,
                                         IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())

    for batch_index in range(0, len(data_items), TEST_BATCH_SIZE):
        # slicing manages out of range; no need to worry
        batch_data = dict(data_items[batch_index: batch_index + TEST_BATCH_SIZE])

        # load images for testing and pre-process
        pre_process_params['training'] = False
        images_test = putil.pre_process_batch(batch_data, pre_process_params, multi_process=True)

        images_prediction = []
        images_probabilities = []

        for img in images_test:
            print('-' * 10, 'Testing', img.id_)

            start_time = timeit.default_timer()
            probabilities, predictions = forest.predict(img.feature_matrix[0])
            print(' Time elapsed:', timeit.default_timer() - start_time, 's')

            # convert prediction and probabilities back to SimpleITK images
            image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8),
                                                                            img.image_properties)
            image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties)

            # evaluate segmentation without post-processing
            evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_)

            images_prediction.append(image_prediction)
            images_probabilities.append(image_probabilities)

        # post-process segmentation and evaluate with post-processing
        post_process_params = {'crf_post': True}
        images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities,
                                                         post_process_params, multi_process=True)

        for i, img in enumerate(images_test):
            evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth],
                               img.id_ + '-PP')

            # save results
            sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True)
            sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)


    # write summary of parameters to results dir
    with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file:
        print('Training data size: {}'.format(len(data_items)), file=summary_file)
        print('Total training time: {:.1f}s'.format(time_total_train), file=summary_file)
        print('Decision forest', file=summary_file)
        print(df_params, file=summary_file)
        stats = statistics.gather_statistics(os.path.join(result_dir, 'results.csv'))
        print('Result statistics:', file=summary_file)
        print(stats, file=summary_file)

コード例 #3

ファイルを表示

def main(_):
    """Brain tissue segmentation using decision forests.

    The main routine executes the medical image analysis pipeline:

        - Image loading
        - Registration
        - Pre-processing
        - Feature extraction
        - Decision forest classifier model building
        - Segmentation using the decision forest classifier model on unseen images
        - Post-processing of the segmentation
        - Evaluation of the segmentation
    """

    # load atlas images
    putil.load_atlas_images(FLAGS.data_atlas_dir)

    print('-' * 5, 'Training...')

    # generate a model directory (use datetime to ensure that the directory is empty)
    # we need an empty directory because TensorFlow will continue training an existing model if it is not empty
    t = datetime.datetime.now().strftime('%Y-%m-%d%H%M%S')
    model_dir = os.path.join(FLAGS.model_dir, t)
    os.makedirs(model_dir, exist_ok=True)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_train_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())
    train_data_size = len(data_items)

    pre_process_params = {
        'zscore_pre': True,
        'coordinates_feature': True,
        'intensity_feature': True,
        'gradient_intensity_feature': True
    }

    # initialize decision forest parameters
    df_params = df.DecisionForestParameters()
    df_params.num_classes = 4
    df_params.num_trees = 160
    df_params.max_nodes = 3000
    df_params.model_dir = model_dir
    forest = None
    start_time_total_train = timeit.default_timer()

    for batch_index in range(0, len(data_items), TRAIN_BATCH_SIZE):
        cache_file_prefix = os.path.normpath(
            os.path.join(
                script_dir, './mia-cache/batch-' + str(batch_index) + '-' +
                str(TRAIN_BATCH_SIZE)))
        cache_file_train = cache_file_prefix + '-data_train.npy'
        cache_file_labels = cache_file_prefix + '-data_labels.npy'
        if (USE_PREPROCESS_CACHE & os.path.exists(cache_file_train)):
            print('Using cache from ', cache_file_train)
            data_train = np.load(cache_file_train)
            labels_train = np.load(cache_file_labels)
        else:
            # slicing manages out of range; no need to worry
            batch_data = dict(data_items[batch_index:batch_index +
                                         TRAIN_BATCH_SIZE])
            # load images for training and pre-process
            images = putil.pre_process_batch(batch_data,
                                             pre_process_params,
                                             multi_process=True)
            print('pre-processing done')

            # generate feature matrix and label vector
            data_train = np.concatenate(
                [img.feature_matrix[0] for img in images])
            labels_train = np.concatenate(
                [img.feature_matrix[1] for img in images])

            if NORMALIZE_FEATURES:
                # normalize data (mean 0, std 1)
                # data_train = scipy_stats.zscore(data_train)
                non_coord = scipy_stats.zscore(data_train[:, 3:8])
                coord = data_train[:, 0:3] / 255 * 2 - 1
                data_train = np.concatenate((coord, non_coord), axis=1)
            if (USE_PREPROCESS_CACHE):
                print('Writing cache')
                if (not os.path.exists(os.path.dirname(cache_file_prefix))):
                    os.mkdir(os.path.dirname(cache_file_prefix))
                data_train.dump(cache_file_train)
                labels_train.dump(cache_file_labels)

        if forest is None:
            df_params.num_features = data_train.shape[1]
            print(df_params)
            forest = df.DecisionForest(df_params)

        start_time = timeit.default_timer()
        forest.train(data_train, labels_train)
        print(' Time elapsed:', timeit.default_timer() - start_time, 's')

    time_total_train = timeit.default_timer() - start_time_total_train

    start_time_total_test = timeit.default_timer()
    print('-' * 5, 'Testing...')
    result_dir = os.path.join(FLAGS.result_dir, t)
    os.makedirs(result_dir, exist_ok=True)

    # initialize evaluator
    evaluator = putil.init_evaluator(result_dir)

    # crawl the training image directories
    crawler = load.FileSystemDataCrawler(FLAGS.data_test_dir, IMAGE_KEYS,
                                         futil.BrainImageFilePathGenerator(),
                                         futil.DataDirectoryFilter())
    data_items = list(crawler.data.items())

    all_probabilities = None

    for batch_index in range(0, len(data_items), TEST_BATCH_SIZE):
        # slicing manages out of range; no need to worry
        batch_data = dict(data_items[batch_index:batch_index +
                                     TEST_BATCH_SIZE])

        # load images for testing and pre-process
        pre_process_params['training'] = False
        images_test = putil.pre_process_batch(batch_data,
                                              pre_process_params,
                                              multi_process=True)

        images_prediction = []
        images_probabilities = []

        for img in images_test:
            print('-' * 10, 'Testing', img.id_)

            start_time = timeit.default_timer()
            features = img.feature_matrix[0]

            if NORMALIZE_FEATURES:
                # features = scipy_stats.zscore(features)
                non_coord = scipy_stats.zscore(features[:, 3:8])
                coord = features[:, 0:3] / 255 * 2 - 1
                features = np.concatenate((coord, non_coord), axis=1)

            probabilities, predictions = forest.predict(features)

            if all_probabilities is None:
                all_probabilities = np.array([probabilities])
            else:
                all_probabilities = np.concatenate(
                    (all_probabilities, [probabilities]), axis=0)

            print(' Time elapsed:', timeit.default_timer() - start_time, 's')

            # convert prediction and probabilities back to SimpleITK images
            image_prediction = conversion.NumpySimpleITKImageBridge.convert(
                predictions.astype(np.uint8), img.image_properties)
            image_probabilities = conversion.NumpySimpleITKImageBridge.convert(
                probabilities, img.image_properties)

            # evaluate segmentation without post-processing
            evaluator.evaluate(
                image_prediction,
                img.images[structure.BrainImageTypes.GroundTruth], img.id_)

            images_prediction.append(image_prediction)
            images_probabilities.append(image_probabilities)

        # post-process segmentation and evaluate with post-processing
        post_process_params = {'crf_post': True}
        images_post_processed = putil.post_process_batch(images_test,
                                                         images_prediction,
                                                         images_probabilities,
                                                         post_process_params,
                                                         multi_process=True)

        for i, img in enumerate(images_test):
            evaluator.evaluate(
                images_post_processed[i],
                img.images[structure.BrainImageTypes.GroundTruth],
                img.id_ + '-PP')

            # save results
            sitk.WriteImage(
                images_prediction[i],
                os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'),
                True)
            sitk.WriteImage(
                images_post_processed[i],
                os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'),
                True)

    time_total_test = timeit.default_timer() - start_time_total_test

    # write summary of parameters to results dir
    with open(os.path.join(result_dir, 'summary.txt'), 'w') as summary_file:
        print('Result dir: {}'.format(result_dir))
        print('Result dir: {}'.format(result_dir), file=summary_file)
        print('Training data size: {}'.format(train_data_size),
              file=summary_file)
        print('Total training time: {:.1f}s'.format(time_total_train),
              file=summary_file)
        print('Total testing time: {:.1f}s'.format(time_total_test),
              file=summary_file)
        print('Voxel Filter Mask: {}'.format(
            putil.FeatureExtractor.VOXEL_MASK_FLT),
              file=summary_file)
        print('Normalize Features: {}'.format(NORMALIZE_FEATURES),
              file=summary_file)
        print('Decision forest', file=summary_file)
        print(df_params, file=summary_file)
        stats = statistics.gather_statistics(
            os.path.join(result_dir, 'results.csv'))
        print('Result statistics:', file=summary_file)
        print(stats, file=summary_file)