예제 #1
0
    def construct_axis_aligned_forest(self):
        #                   (0) X[0] > 2.2
        #                   /           \
        #           (1) X[1] > -5     (2) [0.7 0.1 0.2]
        #           /            \
        # (3) [0.3 0.3 0.4]   (4) [0.3 0.6 0.1]
        #
        #                   (0) X[0] > 5.0
        #                   /           \
        #           (1) X[0] > 2.5     (2) [0.8 0.1 0.1]
        #           /            \
        # (3) [0.2 0.2 0.6]   (4) [0.2 0.7 0.1]
        path_1 = buffers.as_matrix_buffer(np.array([[1,2],[3,4],[-1,-1],[-1,-1],[-1,-1]], dtype=np.int32))
        int_params_1 = buffers.as_matrix_buffer(np.array([[1,0],[1,1],[1,0],[1,0],[1,0]], dtype=np.int32))
        float_params_1 = buffers.as_matrix_buffer(np.array([[2.2],[-5],[0],[0],[0]], dtype=np.float32))
        ys_1 = buffers.as_matrix_buffer(np.array([[0,0,0],[0,0,0],[0.7,0.1,0.2],[0.3,0.3,0.4],[0.3,0.6,0.1]], dtype=np.float32))
        depth_1 = buffers.as_vector_buffer(np.array([0, 1, 1, 2, 2], dtype=np.int32))
        counts_1 = buffers.as_vector_buffer(np.array([5, 5, 5, 5, 5], dtype=np.float32))
        tree_1 = forest_data.Tree(path_1, int_params_1, float_params_1, depth_1, counts_1, ys_1)

        path_2 = buffers.as_matrix_buffer(np.array([[1,2],[3,4],[-1,-1],[-1,-1],[-1,-1]], dtype=np.int32))
        int_params_2 = buffers.as_matrix_buffer(np.array([[1,0],[1,0],[1,0],[1,0],[1,0]], dtype=np.int32))
        float_params_2 = buffers.as_matrix_buffer(np.array([[5.0],[2.5],[0],[0],[0]], dtype=np.float32))
        ys_2 = buffers.as_matrix_buffer(np.array([[0,0,0],[0,0,0],[0.8,0.1,0.1],[0.2,0.2,0.6],[0.2,0.7,0.1]], dtype=np.float32))
        depth_2 = buffers.as_vector_buffer(np.array([0, 1, 1, 2, 2], dtype=np.int32))
        counts_2 = buffers.as_vector_buffer(np.array([5, 5, 5, 5, 5], dtype=np.float32))
        tree_2 = forest_data.Tree(path_2, int_params_2, float_params_2, depth_2, counts_2, ys_2)

        forest = forest_data.Forest([tree_1, tree_2])
        return forest
예제 #2
0
    def construct_axis_aligned_forest(self):
        #                   (0) X[0] > 2.2
        #                   /           \
        #           (1) X[1] > -5     (2) [0.7 0.1 0.2]
        #           /            \
        # (3) [0.3 0.3 0.4]   (4) [0.3 0.6 0.1]
        #
        #                   (0) X[0] > 5.0
        #                   /           \
        #           (1) X[0] > 2.5     (2) [0.8 0.1 0.1]
        #           /            \
        # (3) [0.2 0.2 0.6]   (4) [0.2 0.7 0.1]
        path_1 = buffers.as_matrix_buffer(
            np.array([[1, 2], [3, 4], [-1, -1], [-1, -1], [-1, -1]],
                     dtype=np.int32))
        int_params_1 = buffers.as_matrix_buffer(
            np.array([[1, 0], [1, 1], [1, 0], [1, 0], [1, 0]], dtype=np.int32))
        float_params_1 = buffers.as_matrix_buffer(
            np.array([[2.2], [-5], [0], [0], [0]], dtype=np.float32))
        ys_1 = buffers.as_matrix_buffer(
            np.array([[0, 0, 0], [0, 0, 0], [0.7, 0.1, 0.2], [0.3, 0.3, 0.4],
                      [0.3, 0.6, 0.1]],
                     dtype=np.float32))
        depth_1 = buffers.as_vector_buffer(
            np.array([0, 1, 1, 2, 2], dtype=np.int32))
        counts_1 = buffers.as_vector_buffer(
            np.array([5, 5, 5, 5, 5], dtype=np.float32))
        tree_1 = forest_data.Tree(path_1, int_params_1, float_params_1,
                                  depth_1, counts_1, ys_1)

        path_2 = buffers.as_matrix_buffer(
            np.array([[1, 2], [3, 4], [-1, -1], [-1, -1], [-1, -1]],
                     dtype=np.int32))
        int_params_2 = buffers.as_matrix_buffer(
            np.array([[1, 0], [1, 0], [1, 0], [1, 0], [1, 0]], dtype=np.int32))
        float_params_2 = buffers.as_matrix_buffer(
            np.array([[5.0], [2.5], [0], [0], [0]], dtype=np.float32))
        ys_2 = buffers.as_matrix_buffer(
            np.array([[0, 0, 0], [0, 0, 0], [0.8, 0.1, 0.1], [0.2, 0.2, 0.6],
                      [0.2, 0.7, 0.1]],
                     dtype=np.float32))
        depth_2 = buffers.as_vector_buffer(
            np.array([0, 1, 1, 2, 2], dtype=np.int32))
        counts_2 = buffers.as_vector_buffer(
            np.array([5, 5, 5, 5, 5], dtype=np.float32))
        tree_2 = forest_data.Tree(path_2, int_params_2, float_params_2,
                                  depth_2, counts_2, ys_2)
        tree_2.GetExtraInfo().AddBuffer(
            "first", np.array([3, 21, 1, 22, 1, 5], dtype=np.float32))

        forest = forest_data.Forest([tree_1, tree_2])

        return forest
예제 #3
0
def create_scaled_depth_delta_learner_32f(**kwargs):
    ux = float( kwargs.get('ux') )
    uy = float( kwargs.get('uy') )
    vx = float( kwargs.get('vx') )
    vy = float( kwargs.get('vy') )

    number_of_trees = int( kwargs.get('number_of_trees', 10) )
    number_of_features = int( kwargs.get('number_of_features', 1) )
    feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) )
    number_of_jobs = int( kwargs.get('number_of_jobs', 1) )
    number_of_classes = int( kwargs['classes'].GetMax() + 1 )

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_i32f32i32(buffers.PIXEL_INDICES)
    else:
        sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES)

    number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step])

    feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(set_number_features_step.OutputBufferId, ux, uy, vx, vy )
    depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(feature_params_step.FloatParamsBufferId,
                                                                      feature_params_step.IntParamsBufferId,
                                                                      sample_data_step.IndicesBufferId,
                                                                      buffers.PIXEL_INDICES,
                                                                      buffers.DEPTH_IMAGES,
                                                                      buffers.OFFSET_SCALES)
    depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(depth_delta_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    class_infogain_walker = classification.ClassInfoGainWalker_f32i32(slice_weights_step.SlicedBufferId,
                                                                      slice_classes_step.SlicedBufferId,
                                                                      number_of_classes)
    best_splitpint_step = classification.ClassInfoGainBestSplitpointsWalkingSortedStep_f32i32(class_infogain_walker,
                                                                        depth_delta_feature_extractor_step.FeatureValuesBufferId,
                                                                        feature_ordering)
    node_steps_pipeline = pipeline.Pipeline([feature_params_step, depth_delta_feature_extractor_step,
                                            slice_classes_step, slice_weights_step, best_splitpint_step])

    split_buffers = splitpoints.SplitSelectorBuffers(best_splitpint_step.ImpurityBufferId,
                                                          best_splitpint_step.SplitpointBufferId,
                                                          best_splitpint_step.SplitpointCountsBufferId,
                                                          best_splitpint_step.ChildCountsBufferId,
                                                          best_splitpint_step.LeftYsBufferId,
                                                          best_splitpint_step.RightYsBufferId,
                                                          feature_params_step.FloatParamsBufferId,
                                                          feature_params_step.IntParamsBufferId,
                                                          depth_delta_feature_extractor_step.FeatureValuesBufferId,
                                                          feature_ordering,
                                                          sample_data_step.IndicesBufferId)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer )

    tree_learner = learn.DepthFirstTreeLearner_f32i32(try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, 5, 5, number_of_classes, number_of_jobs)
    return forest_learner
예제 #4
0
    def test_pickle(self):
        # test tensor
        for dtype in [np.float32, np.float64, np.int32, np.int64]:
            array = np.array(
                [[[3, 21, 1], [22, 1, 5]], [[2, 2, 2], [7, 7, 7]]],
                dtype=dtype)
            b1 = buffers.as_tensor_buffer(array)
            pickle.dump(b1, open('tmp.pkl', 'wb'))
            b2 = pickle.load(open('tmp.pkl', 'rb'))
            array2 = buffers.as_numpy_array(b2)
            self.assertTrue((array == array2).all())

        # test matrix
        for dtype in [np.float32, np.float64, np.int32, np.int64]:
            array = np.array([[3, 21, 1], [22, 1, 5], [2, 2, 2], [7, 7, 7]],
                             dtype=dtype)
            b1 = buffers.as_matrix_buffer(array)
            pickle.dump(b1, open('tmp.pkl', 'wb'))
            b2 = pickle.load(open('tmp.pkl', 'rb'))
            array2 = buffers.as_numpy_array(b2)
            self.assertTrue((array == array2).all())

        # test vector
        for dtype in [np.float32, np.float64, np.int32, np.int64]:
            array = np.array([3, 21, 1], dtype=dtype)
            b1 = buffers.as_vector_buffer(array)
            pickle.dump(b1, open('tmp.pkl', 'wb'))
            b2 = pickle.load(open('tmp.pkl', 'rb'))
            array2 = buffers.as_numpy_array(b2)
            self.assertTrue((array == array2).all())
예제 #5
0
    def test_pickle(self):
        # test tensor
        for dtype in [np.float32, np.float64, np.int32, np.int64]:
            array = np.array([[[3, 21, 1], [22, 1, 5]], [[2, 2, 2], [7, 7, 7]]], dtype=dtype)
            b1 = buffers.as_tensor_buffer(array)
            pickle.dump(b1, open("tmp.pkl", "wb"))
            b2 = pickle.load(open("tmp.pkl", "rb"))
            array2 = buffers.as_numpy_array(b2)
            self.assertTrue((array == array2).all())

        # test matrix
        for dtype in [np.float32, np.float64, np.int32, np.int64]:
            array = np.array([[3, 21, 1], [22, 1, 5], [2, 2, 2], [7, 7, 7]], dtype=dtype)
            b1 = buffers.as_matrix_buffer(array)
            pickle.dump(b1, open("tmp.pkl", "wb"))
            b2 = pickle.load(open("tmp.pkl", "rb"))
            array2 = buffers.as_numpy_array(b2)
            self.assertTrue((array == array2).all())

        # test vector
        for dtype in [np.float32, np.float64, np.int32, np.int64]:
            array = np.array([3, 21, 1], dtype=dtype)
            b1 = buffers.as_vector_buffer(array)
            pickle.dump(b1, open("tmp.pkl", "wb"))
            b2 = pickle.load(open("tmp.pkl", "rb"))
            array2 = buffers.as_numpy_array(b2)
            self.assertTrue((array == array2).all())
예제 #6
0
def create_biau2008_regression_axis_aligned_matrix_learner_32f(**kwargs):
    number_of_trees = int( kwargs.get('number_of_trees', 10) )
    number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].shape[0] / 5 + 1) )
    number_of_split_retries = int( kwargs.get('number_of_split_retries', 10) )
    number_of_features = 1
    feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) )
    number_of_jobs = int( kwargs.get('number_of_jobs', 1) )
    dimension_of_y = int( kwargs['y'].shape[1] )

    try_split_criteria = try_split.MinNodeSizeCriteria(2)

    sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(feature_params_step.FloatParamsBufferId,
                                                                      feature_params_step.IntParamsBufferId,
                                                                      sample_data_step.IndicesBufferId,
                                                                      buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(matrix_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)

    impurity_walker = regression.SumOfVarianceWalker_f32i32(slice_weights_step.SlicedBufferId,
                                                            slice_ys_step.SlicedBufferId,
                                                            dimension_of_y)

    best_splitpint_step = regression.SumOfVarianceRandomGapSplitpointsStep_f32i32(impurity_walker,
                                                                matrix_feature_extractor_step.FeatureValuesBufferId,
                                                                feature_ordering)

    node_steps_pipeline = pipeline.Pipeline([feature_params_step, matrix_feature_extractor_step,
                                            slice_ys_step, slice_weights_step, best_splitpint_step])

    split_buffers = splitpoints.SplitSelectorBuffers(best_splitpint_step.ImpurityBufferId,
                                                          best_splitpint_step.SplitpointBufferId,
                                                          best_splitpint_step.SplitpointCountsBufferId,
                                                          best_splitpint_step.ChildCountsBufferId,
                                                          best_splitpint_step.LeftYsBufferId,
                                                          best_splitpint_step.RightYsBufferId,
                                                          feature_params_step.FloatParamsBufferId,
                                                          feature_params_step.IntParamsBufferId,
                                                          matrix_feature_extractor_step.FeatureValuesBufferId,
                                                          feature_ordering,
                                                          matrix_feature_extractor_step)
    should_split_criteria = min_child_size_criteria = should_split.MinChildSizeCriteria(1)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices )

    tree_learner = learn.Biau2008TreeLearner_f32i32(try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves, number_of_split_retries)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, dimension_of_y, number_of_jobs)
    return forest_learner
def load_and_sample(pose_path, list_of_poses, number_of_pixels_per_image):
    depths, labels = load_data(pose_path, list_of_poses)
    depths_buffer = buffers.as_tensor_buffer(depths)
    del depths
    pixel_indices, pixel_labels = sample_pixels_from_images(labels, number_of_pixels_per_image)
    del labels
    pixel_indices_buffer = buffers.as_matrix_buffer(pixel_indices)
    pixel_labels_buffer = buffers.as_vector_buffer(pixel_labels)
    del pixel_indices
    del pixel_labels
    return depths_buffer, pixel_indices_buffer, pixel_labels_buffer
def load_training_data(numpy_filename):
    f = open(numpy_filename, 'rb')
    depths = np.load(f)
    labels = np.load(f)
    pixel_indices = np.load(f)
    pixel_labels = np.load(f)
    depths_buffer = buffers.as_tensor_buffer(depths)
    del depths
    del labels
    pixel_indices_buffer = buffers.as_matrix_buffer(pixel_indices)
    del pixel_indices
    pixel_labels_buffer = buffers.as_vector_buffer(pixel_labels)
    del pixel_labels
    return depths_buffer, pixel_indices_buffer, pixel_labels_buffer
def create_axis_aligned_matrix_two_stream_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_features = int(
        kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_splitpoints = int(kwargs.get('number_of_splitpoints', 1))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    number_of_classes = int(np.max(kwargs['classes']) + 1)
    probability_of_impurity_stream = float(
        kwargs.get('probability_of_impurity_stream', 0.5))

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    assign_stream_step = splitpoints.AssignStreamStep_f32i32(
        sample_data_step.WeightsBufferId, probability_of_impurity_stream)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step, assign_stream_step])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(
        assign_stream_step.StreamTypeBufferId,
        sample_data_step.IndicesBufferId)

    random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32(
        matrix_feature_extractor_step.FeatureValuesBufferId,
        number_of_splitpoints, feature_ordering,
        slice_assign_stream_step.SlicedBufferId)

    class_stats_updater = classification.ClassStatsUpdater_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId,
        number_of_classes)
    two_stream_split_stats_step = classification.ClassStatsUpdaterTwoStreamStep_f32i32(
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        slice_assign_stream_step.SlicedBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        class_stats_updater)

    impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32(
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsImpurityBufferId,
        two_stream_split_stats_step.LeftImpurityStatsBufferId,
        two_stream_split_stats_step.RightImpurityStatsBufferId)

    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, matrix_feature_extractor_step, slice_classes_step,
        slice_weights_step, slice_assign_stream_step,
        random_splitpoint_selection_step, two_stream_split_stats_step,
        impurity_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        impurity_step.ImpurityBufferId,
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsEstimatorBufferId,
        two_stream_split_stats_step.LeftEstimatorStatsBufferId,
        two_stream_split_stats_step.RightEstimatorStatsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    if 'tree_order' in kwargs and kwargs.get('tree_order') == 'breadth_first':
        tree_learner = learn.BreadthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
            split_selector)
    else:
        tree_learner = learn.DepthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
            split_selector)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 number_of_classes,
                                                 number_of_jobs)
    return forest_learner
예제 #10
0
def create_class_pair_difference_matrix_walking_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_features = int(
        kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    number_of_classes = int(np.max(kwargs['classes']) + 1)

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step])

    feature_params_step = matrix_features.ClassPairDifferenceParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA,
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)

    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)

    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    class_infogain_walker = classification.ClassInfoGainWalker_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId,
        number_of_classes)
    best_splitpint_step = classification.ClassInfoGainBestSplitpointsWalkingSortedStep_f32i32(
        class_infogain_walker,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering)
    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, matrix_feature_extractor_step, slice_classes_step,
        slice_weights_step, best_splitpint_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        best_splitpint_step.ImpurityBufferId,
        best_splitpint_step.SplitpointBufferId,
        best_splitpint_step.SplitpointCountsBufferId,
        best_splitpint_step.ChildCountsBufferId,
        best_splitpint_step.LeftYsBufferId,
        best_splitpint_step.RightYsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    if 'tree_order' in kwargs and kwargs.get('tree_order') == 'breadth_first':
        tree_learner = learn.BreadthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
            split_selector)
    else:
        tree_learner = learn.DepthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
            split_selector)

    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 number_of_classes,
                                                 number_of_jobs)
    return forest_learner
예제 #11
0
def create_biau2012_regression_axis_aligned_matrix_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_leaves = int(
        kwargs.get('number_of_leaves', kwargs['y'].shape[0] / 5 + 1))
    number_of_features = int(
        kwargs.get('number_of_features', (kwargs['x'].shape[1]) / 3 + 0.5))
    # number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    dimension_of_y = int(kwargs['y'].shape[1])
    probability_of_impurity_stream = float(
        kwargs.get('probability_of_impurity_stream', 0.5))

    try_split_criteria = create_try_split_criteria(**kwargs)

    sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    assert (
        np.max(np.abs(kwargs['x'])) <= 1.00
    )  # double check that the data has been scaled into a -1,1 hypercube
    feature_range_buffer = buffers.as_vector_buffer(
        np.array([-1, 1], dtype=np.float32))
    set_feature_range_buffer_step = pipeline.SetFloat32VectorBufferStep(
        feature_range_buffer, pipeline.WHEN_NEW)

    assign_stream_step = splitpoints.AssignStreamStep_f32i32(
        sample_data_step.WeightsBufferId, probability_of_impurity_stream,
        False)
    forest_steps_pipeline = pipeline.Pipeline([
        sample_data_step, set_number_features_step,
        set_feature_range_buffer_step, assign_stream_step
    ])
    tree_steps_pipeline = pipeline.Pipeline([])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(
        buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(
        assign_stream_step.StreamTypeBufferId,
        sample_data_step.IndicesBufferId)

    quantized_feature_equal = pipeline.FeatureEqualQuantized_f32i32(1.0)

    midpoint_step = splitpoints.RangeMidpointStep_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal)

    mean_variance_stats_updater = regression.MeanVarianceStatsUpdater_f32i32(
        slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId,
        dimension_of_y)

    two_stream_split_stats_step = regression.SumOfVarianceTwoStreamStep_f32i32(
        midpoint_step.SplitpointsBufferId,
        midpoint_step.SplitpointsCountsBufferId,
        slice_assign_stream_step.SlicedBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        mean_variance_stats_updater)

    impurity_step = regression.SumOfVarianceSplitpointsImpurity_f32i32(
        midpoint_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsImpurityBufferId,
        two_stream_split_stats_step.LeftImpurityStatsBufferId,
        two_stream_split_stats_step.RightImpurityStatsBufferId)

    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, matrix_feature_extractor_step, slice_ys_step,
        slice_weights_step, slice_assign_stream_step, midpoint_step,
        two_stream_split_stats_step, impurity_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        impurity_step.ImpurityBufferId, midpoint_step.SplitpointsBufferId,
        midpoint_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsEstimatorBufferId,
        two_stream_split_stats_step.LeftEstimatorStatsBufferId,
        two_stream_split_stats_step.RightEstimatorStatsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = no_split_criteria(**kwargs)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_midpoint_ranges = splitpoints.SplitBuffersFeatureRange_f32i32(
        midpoint_step.PastFloatParamsBufferId,
        midpoint_step.PastIntParamsBufferId, midpoint_step.PastRangesBufferId,
        set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal)
    split_steps = splitpoints.SplitBuffersList(
        [split_indices, split_midpoint_ranges])

    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_steps)

    tree_learner = learn.BreadthFirstTreeLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
        split_selector, number_of_leaves)
    forest_learner = learn.ParallelForestLearner(tree_learner,
                                                 forest_steps_pipeline,
                                                 number_of_trees,
                                                 dimension_of_y,
                                                 number_of_jobs)
    return forest_learner
def create_axis_aligned_matrix_two_stream_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get("number_of_trees", 10))
    number_of_features = int(kwargs.get("number_of_features", np.sqrt(kwargs["x"].shape[1])))
    feature_ordering = int(kwargs.get("feature_ordering", pipeline.FEATURES_BY_DATAPOINTS))
    number_of_splitpoints = int(kwargs.get("number_of_splitpoints", 1))
    number_of_jobs = int(kwargs.get("number_of_jobs", 1))
    number_of_classes = int(np.max(kwargs["classes"]) + 1)
    probability_of_impurity_stream = float(kwargs.get("probability_of_impurity_stream", 0.5))

    try_split_criteria = create_try_split_criteria(**kwargs)

    if "bootstrap" in kwargs and kwargs.get("bootstrap"):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(buffers.X_FLOAT_DATA)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW)
    assign_stream_step = splitpoints.AssignStreamStep_f32i32(
        sample_data_step.WeightsBufferId, probability_of_impurity_stream
    )
    tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step, assign_stream_step])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA
    )
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId,
        buffers.X_FLOAT_DATA,
    )
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering
    )
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId
    )
    slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(
        assign_stream_step.StreamTypeBufferId, sample_data_step.IndicesBufferId
    )

    random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32(
        matrix_feature_extractor_step.FeatureValuesBufferId,
        number_of_splitpoints,
        feature_ordering,
        slice_assign_stream_step.SlicedBufferId,
    )

    class_stats_updater = classification.ClassStatsUpdater_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId, number_of_classes
    )
    two_stream_split_stats_step = classification.ClassStatsUpdaterTwoStreamStep_f32i32(
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        slice_assign_stream_step.SlicedBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering,
        class_stats_updater,
    )

    impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32(
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsImpurityBufferId,
        two_stream_split_stats_step.LeftImpurityStatsBufferId,
        two_stream_split_stats_step.RightImpurityStatsBufferId,
    )

    node_steps_pipeline = pipeline.Pipeline(
        [
            feature_params_step,
            matrix_feature_extractor_step,
            slice_classes_step,
            slice_weights_step,
            slice_assign_stream_step,
            random_splitpoint_selection_step,
            two_stream_split_stats_step,
            impurity_step,
        ]
    )

    split_buffers = splitpoints.SplitSelectorBuffers(
        impurity_step.ImpurityBufferId,
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsEstimatorBufferId,
        two_stream_split_stats_step.LeftEstimatorStatsBufferId,
        two_stream_split_stats_step.RightEstimatorStatsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering,
        matrix_feature_extractor_step,
    )
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices)

    if "tree_order" in kwargs and kwargs.get("tree_order") == "breadth_first":
        tree_learner = learn.BreadthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector
        )
    else:
        tree_learner = learn.DepthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector
        )
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, number_of_classes, number_of_jobs)
    return forest_learner
def create_biau2008_regression_scaled_depth_delta_learner_32f(**kwargs):
    ux = float(kwargs.get('ux'))
    uy = float(kwargs.get('uy'))
    vx = float(kwargs.get('vx'))
    vy = float(kwargs.get('vy'))

    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_leaves = int(
        kwargs.get('number_of_leaves', kwargs['y'].GetM() / 5 + 1))
    number_of_split_retries = int(kwargs.get('number_of_split_retries', 10))
    number_of_features = 1
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    dimension_of_y = int(kwargs['y'].GetN())

    try_split_criteria = try_split.MinNodeSizeCriteria(2)

    sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step])

    feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(
        set_number_features_step.OutputBufferId, ux, uy, vx, vy)
    depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES,
        buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES)
    depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(
        depth_delta_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(
        buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)

    impurity_walker = regression.SumOfVarianceWalker_f32i32(
        slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId,
        dimension_of_y)

    best_splitpint_step = regression.SumOfVarianceRandomGapSplitpointsStep_f32i32(
        impurity_walker,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering)

    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, depth_delta_feature_extractor_step, slice_ys_step,
        slice_weights_step, best_splitpint_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        best_splitpint_step.ImpurityBufferId,
        best_splitpint_step.SplitpointBufferId,
        best_splitpint_step.SplitpointCountsBufferId,
        best_splitpint_step.ChildCountsBufferId,
        best_splitpint_step.LeftYsBufferId,
        best_splitpint_step.RightYsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering, depth_delta_feature_extractor_step)
    should_split_criteria = min_child_size_criteria = should_split.MinChildSizeCriteria(
        1)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    tree_learner = learn.Biau2008TreeLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
        split_selector, number_of_leaves, number_of_split_retries)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 dimension_of_y,
                                                 number_of_jobs)
    return forest_learner
예제 #14
0
def create_regression_axis_aligned_matrix_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_leaves = int(
        kwargs.get('number_of_leaves', kwargs['y'].shape[0] / 5 + 1))
    number_of_features = int(
        kwargs.get('number_of_features', (kwargs['x'].shape[1]) / 3 + 0.5))
    # number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    dimension_of_y = int(kwargs['y'].shape[1])

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(
        buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    impurity_walker = regression.SumOfVarianceWalker_f32i32(
        slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId,
        dimension_of_y)
    best_splitpint_step = regression.SumOfVarianceBestSplitpointsWalkingSortedStep_f32i32(
        impurity_walker, matrix_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering)
    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, matrix_feature_extractor_step, slice_ys_step,
        slice_weights_step, best_splitpint_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        best_splitpint_step.ImpurityBufferId,
        best_splitpint_step.SplitpointBufferId,
        best_splitpint_step.SplitpointCountsBufferId,
        best_splitpint_step.ChildCountsBufferId,
        best_splitpint_step.LeftYsBufferId,
        best_splitpint_step.RightYsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    tree_learner = learn.BreadthFirstTreeLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
        split_selector, number_of_leaves)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 dimension_of_y,
                                                 number_of_jobs)
    return forest_learner
예제 #15
0
def create_biau2012_regression_axis_aligned_matrix_learner_32f(**kwargs):
    number_of_trees = int( kwargs.get('number_of_trees', 10) )
    number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].shape[0] / 5 + 1) )
    number_of_features = int( kwargs.get('number_of_features', (kwargs['x'].shape[1])/3 + 0.5))
    # number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) )
    number_of_jobs = int( kwargs.get('number_of_jobs', 1) )
    dimension_of_y = int(  kwargs['y'].shape[1] )
    probability_of_impurity_stream = float(kwargs.get('probability_of_impurity_stream', 0.5) )

    try_split_criteria = create_try_split_criteria(**kwargs)

    sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW)
    assert(np.max(np.abs(kwargs['x'])) <= 1.00) # double check that the data has been scaled into a -1,1 hypercube
    feature_range_buffer = buffers.as_vector_buffer(np.array([-1, 1], dtype=np.float32))
    set_feature_range_buffer_step = pipeline.SetFloat32VectorBufferStep(feature_range_buffer, pipeline.WHEN_NEW)

    assign_stream_step = splitpoints.AssignStreamStep_f32i32(sample_data_step.WeightsBufferId, probability_of_impurity_stream, False)
    forest_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step, set_feature_range_buffer_step, assign_stream_step])
    tree_steps_pipeline = pipeline.Pipeline([])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(feature_params_step.FloatParamsBufferId,
                                                                      feature_params_step.IntParamsBufferId,
                                                                      sample_data_step.IndicesBufferId,
                                                                      buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(matrix_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(assign_stream_step.StreamTypeBufferId, sample_data_step.IndicesBufferId)


    quantized_feature_equal = pipeline.FeatureEqualQuantized_f32i32(1.0)

    midpoint_step = splitpoints.RangeMidpointStep_f32i32(feature_params_step.FloatParamsBufferId,
                                                        feature_params_step.IntParamsBufferId,
                                                        set_feature_range_buffer_step.OutputBufferId,
                                                        quantized_feature_equal)

    mean_variance_stats_updater = regression.MeanVarianceStatsUpdater_f32i32(slice_weights_step.SlicedBufferId,
                                                                              slice_ys_step.SlicedBufferId,
                                                                              dimension_of_y)

    two_stream_split_stats_step = regression.SumOfVarianceTwoStreamStep_f32i32(midpoint_step.SplitpointsBufferId,
                                                                          midpoint_step.SplitpointsCountsBufferId,
                                                                          slice_assign_stream_step.SlicedBufferId,
                                                                          matrix_feature_extractor_step.FeatureValuesBufferId,
                                                                          feature_ordering,
                                                                          mean_variance_stats_updater)


    impurity_step = regression.SumOfVarianceSplitpointsImpurity_f32i32(midpoint_step.SplitpointsCountsBufferId,
                                                                          two_stream_split_stats_step.ChildCountsImpurityBufferId,
                                                                          two_stream_split_stats_step.LeftImpurityStatsBufferId,
                                                                          two_stream_split_stats_step.RightImpurityStatsBufferId)

    node_steps_pipeline = pipeline.Pipeline([feature_params_step, matrix_feature_extractor_step,
                                            slice_ys_step, slice_weights_step, slice_assign_stream_step,
                                            midpoint_step, two_stream_split_stats_step, impurity_step])

    split_buffers = splitpoints.SplitSelectorBuffers(impurity_step.ImpurityBufferId,
                                                          midpoint_step.SplitpointsBufferId,
                                                          midpoint_step.SplitpointsCountsBufferId,
                                                          two_stream_split_stats_step.ChildCountsEstimatorBufferId,
                                                          two_stream_split_stats_step.LeftEstimatorStatsBufferId,
                                                          two_stream_split_stats_step.RightEstimatorStatsBufferId,
                                                          feature_params_step.FloatParamsBufferId,
                                                          feature_params_step.IntParamsBufferId,
                                                          matrix_feature_extractor_step.FeatureValuesBufferId,
                                                          feature_ordering,
                                                          matrix_feature_extractor_step)
    should_split_criteria = no_split_criteria(**kwargs)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId)
    split_midpoint_ranges = splitpoints.SplitBuffersFeatureRange_f32i32(midpoint_step.PastFloatParamsBufferId,
                                                                        midpoint_step.PastIntParamsBufferId,
                                                                        midpoint_step.PastRangesBufferId,
                                                                        set_feature_range_buffer_step.OutputBufferId,
                                                                        quantized_feature_equal)
    split_steps = splitpoints.SplitBuffersList([split_indices, split_midpoint_ranges])

    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                        should_split_criteria,
                                                        finalizer,
                                                        split_steps)

    tree_learner = learn.BreadthFirstTreeLearner_f32i32(try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves)
    forest_learner = learn.ParallelForestLearner(tree_learner, forest_steps_pipeline, number_of_trees, dimension_of_y, number_of_jobs)
    return forest_learner
            # Load single pose depth and class labels
            depths = kinect_utils.load_depth_from_exr("%s%s.exr" % (args.pose_files_input_path, pose_filename))
            labels = kinect_utils.load_labels_from_png("%s%s.png" % (args.pose_files_input_path, pose_filename))
            pixel_indices, pixel_labels = kinect_utils.sample_pixels_from_image(labels[0,:,:], config.number_of_pixels_per_image)

            # Randomly sample pixels and offset scales
            (number_of_datapoints, _) = pixel_indices.shape
            offset_scales = np.array(np.random.uniform(0.8, 1.2, (number_of_datapoints, 2)), dtype=np.float32)
            datapoint_indices = np.array(np.arange(number_of_datapoints), dtype=np.int32)

            # Package buffers for learner
            bufferCollection = buffers.BufferCollection()
            bufferCollection.AddFloat32Tensor3Buffer(buffers.DEPTH_IMAGES, buffers.as_tensor_buffer(depths))
            bufferCollection.AddFloat32MatrixBuffer(buffers.OFFSET_SCALES, buffers.as_matrix_buffer(offset_scales))
            bufferCollection.AddInt32MatrixBuffer(buffers.PIXEL_INDICES, buffers.as_matrix_buffer(pixel_indices))
            bufferCollection.AddInt32VectorBuffer(buffers.CLASS_LABELS, buffers.as_vector_buffer(pixel_labels))

            # Update learner
            online_learner.Train(bufferCollection, buffers.Int32Vector(datapoint_indices))

            #pickle forest and data used for training
            if (i+1) % 1 == 0:
            # if True:
                forest_pickle_filename = "%s/forest-%d-%d.pkl" % (online_run_folder, pass_id, i+1)
                pickle.dump(online_learner.GetForest(), gzip.open(forest_pickle_filename, 'wb'))

                # Print forest stats
                forestStats = online_learner.GetForest().GetForestStats()
                forestStats.Print()
예제 #17
0
            offset_scales = np.array(np.random.uniform(
                0.8, 1.2, (number_of_datapoints, 2)),
                                     dtype=np.float32)
            datapoint_indices = np.array(np.arange(number_of_datapoints),
                                         dtype=np.int32)

            # Package buffers for learner
            bufferCollection = buffers.BufferCollection()
            bufferCollection.AddFloat32Tensor3Buffer(
                buffers.DEPTH_IMAGES, buffers.as_tensor_buffer(depths))
            bufferCollection.AddFloat32MatrixBuffer(
                buffers.OFFSET_SCALES, buffers.as_matrix_buffer(offset_scales))
            bufferCollection.AddInt32MatrixBuffer(
                buffers.PIXEL_INDICES, buffers.as_matrix_buffer(pixel_indices))
            bufferCollection.AddInt32VectorBuffer(
                buffers.CLASS_LABELS, buffers.as_vector_buffer(pixel_labels))

            # Update learner
            online_learner.Train(bufferCollection,
                                 buffers.Int32Vector(datapoint_indices))

            #pickle forest and data used for training
            if (i + 1) % 1 == 0:
                # if True:
                forest_pickle_filename = "%s/forest-%d-%d.pkl" % (
                    online_run_folder, pass_id, i + 1)
                pickle.dump(online_learner.GetForest(),
                            gzip.open(forest_pickle_filename, 'wb'))

                # Print forest stats
                forestStats = online_learner.GetForest().GetForestStats()
def create_biau2012_regression_scaled_depth_delta_learner_32f(**kwargs):
    ux = float(kwargs.get('ux'))
    uy = float(kwargs.get('uy'))
    vx = float(kwargs.get('vx'))
    vy = float(kwargs.get('vy'))

    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_leaves = int(
        kwargs.get('number_of_leaves', kwargs['y'].GetM() / 5 + 1))
    number_of_features = int(kwargs.get('number_of_features', 1))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    dimension_of_y = int(kwargs['y'].GetN())
    probability_of_impurity_stream = float(
        kwargs.get('probability_of_impurity_stream', 0.5))

    try_split_criteria = create_try_split_criteria(**kwargs)

    sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    feature_range_buffer = buffers.as_vector_buffer(
        np.array([-6, 6], dtype=np.float32))
    set_feature_range_buffer_step = pipeline.SetFloat32VectorBufferStep(
        feature_range_buffer, pipeline.WHEN_NEW)

    assign_stream_step = splitpoints.AssignStreamStep_f32i32(
        sample_data_step.WeightsBufferId, probability_of_impurity_stream,
        False)
    forest_steps_pipeline = pipeline.Pipeline([
        sample_data_step, set_number_features_step,
        set_feature_range_buffer_step, assign_stream_step
    ])
    tree_steps_pipeline = pipeline.Pipeline([])

    feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(
        set_number_features_step.OutputBufferId, ux, uy, vx, vy)
    depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES,
        buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES)
    depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(
        depth_delta_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(
        buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(
        assign_stream_step.StreamTypeBufferId,
        sample_data_step.IndicesBufferId)

    quantized_feature_equal = pipeline.FeatureEqualQuantized_f32i32(1.0)

    midpoint_step = splitpoints.RangeMidpointStep_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal)

    mean_variance_stats_updater = regression.MeanVarianceStatsUpdater_f32i32(
        slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId,
        dimension_of_y)

    two_stream_split_stats_step = regression.SumOfVarianceTwoStreamStep_f32i32(
        midpoint_step.SplitpointsBufferId,
        midpoint_step.SplitpointsCountsBufferId,
        slice_assign_stream_step.SlicedBufferId,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering, mean_variance_stats_updater)

    impurity_step = regression.SumOfVarianceSplitpointsImpurity_f32i32(
        midpoint_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsImpurityBufferId,
        two_stream_split_stats_step.LeftImpurityStatsBufferId,
        two_stream_split_stats_step.RightImpurityStatsBufferId)

    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, depth_delta_feature_extractor_step, slice_ys_step,
        slice_weights_step, slice_assign_stream_step, midpoint_step,
        two_stream_split_stats_step, impurity_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        impurity_step.ImpurityBufferId, midpoint_step.SplitpointsBufferId,
        midpoint_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsEstimatorBufferId,
        two_stream_split_stats_step.LeftEstimatorStatsBufferId,
        two_stream_split_stats_step.RightEstimatorStatsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering, depth_delta_feature_extractor_step)
    should_split_criteria = no_split_criteria(**kwargs)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_midpoint_ranges = splitpoints.SplitBuffersFeatureRange_f32i32(
        midpoint_step.PastFloatParamsBufferId,
        midpoint_step.PastIntParamsBufferId, midpoint_step.PastRangesBufferId,
        set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal)
    split_steps = splitpoints.SplitBuffersList(
        [split_indices, split_midpoint_ranges])

    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_steps)

    tree_learner = learn.BreadthFirstTreeLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
        split_selector, number_of_leaves)
    forest_learner = learn.ParallelForestLearner(tree_learner,
                                                 forest_steps_pipeline,
                                                 number_of_trees,
                                                 dimension_of_y,
                                                 number_of_jobs)
    return forest_learner
예제 #19
0
def create_online_axis_aligned_matrix_one_stream_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_features = int(
        kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_splitpoints = int(kwargs.get('number_of_splitpoints', 1))
    number_of_classes = int(np.max(kwargs['classes']) + 1)
    max_frontier_size = int(kwargs.get('max_frontier_size', 10000000))
    impurity_update_period = int(kwargs.get('impurity_update_period', 1))

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)
    elif 'poisson_sample' in kwargs:
        poisson_sample_mean = float(kwargs.get('poisson_sample'))
        sample_data_step = pipeline.PoissonSamplesStep_f32i32(
            buffers.X_FLOAT_DATA, poisson_sample_mean)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)

    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)

    random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32(
        matrix_feature_extractor_step.FeatureValuesBufferId,
        number_of_splitpoints, feature_ordering)

    class_stats_updater = classification.ClassStatsUpdater_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId,
        number_of_classes)
    one_stream_split_stats_step = classification.ClassStatsUpdaterOneStreamStep_f32i32(
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        class_stats_updater)

    impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32(
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        one_stream_split_stats_step.ChildCountsBufferId,
        one_stream_split_stats_step.LeftStatsBufferId,
        one_stream_split_stats_step.RightStatsBufferId)

    init_node_steps_pipeline = pipeline.Pipeline([feature_params_step])
    update_stats_node_steps_pipeline = pipeline.Pipeline([
        matrix_feature_extractor_step, slice_classes_step, slice_weights_step,
        random_splitpoint_selection_step, one_stream_split_stats_step
    ])
    update_impurity_node_steps_pipeline = pipeline.Pipeline([impurity_step])

    split_buffers = splitpoints.SplitSelectorBuffers(
        impurity_step.ImpurityBufferId,
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        one_stream_split_stats_step.ChildCountsBufferId,
        one_stream_split_stats_step.LeftStatsBufferId,
        one_stream_split_stats_step.RightStatsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.WaitForBestSplitSelector_f32i32(
        [split_buffers], should_split_criteria, finalizer, split_indices)

    matrix_feature_prediction = matrix_features.LinearFloat32MatrixFeature_f32i32(
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    estimator_params_updater = classification.ClassEstimatorUpdater_f32i32(
        sample_data_step.WeightsBufferId, buffers.CLASS_LABELS,
        number_of_classes)
    forest_learner = learn.OnlineForestMatrixClassLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, init_node_steps_pipeline,
        update_stats_node_steps_pipeline, update_impurity_node_steps_pipeline,
        impurity_update_period, split_selector, max_frontier_size,
        number_of_trees, 5, 5, number_of_classes,
        sample_data_step.IndicesBufferId, sample_data_step.WeightsBufferId,
        matrix_feature_prediction, estimator_params_updater)
    return forest_learner
def create_biau2012_regression_scaled_depth_delta_learner_32f(**kwargs):
    ux = float( kwargs.get('ux') )
    uy = float( kwargs.get('uy') )
    vx = float( kwargs.get('vx') )
    vy = float( kwargs.get('vy') )

    number_of_trees = int( kwargs.get('number_of_trees', 10) )
    number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].GetM() / 5 + 1) )
    number_of_features = int( kwargs.get('number_of_features', 1) )
    feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) )
    number_of_jobs = int( kwargs.get('number_of_jobs', 1) )
    dimension_of_y = int( kwargs['y'].GetN() )
    probability_of_impurity_stream = float(kwargs.get('probability_of_impurity_stream', 0.5) )

    try_split_criteria = create_try_split_criteria(**kwargs)

    sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES)

    number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW)
    feature_range_buffer = buffers.as_vector_buffer(np.array([-6, 6], dtype=np.float32))
    set_feature_range_buffer_step = pipeline.SetFloat32VectorBufferStep(feature_range_buffer, pipeline.WHEN_NEW)

    assign_stream_step = splitpoints.AssignStreamStep_f32i32(sample_data_step.WeightsBufferId, probability_of_impurity_stream, False)
    forest_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step, set_feature_range_buffer_step, assign_stream_step])
    tree_steps_pipeline = pipeline.Pipeline([])

    feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(set_number_features_step.OutputBufferId, ux, uy, vx, vy )
    depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(feature_params_step.FloatParamsBufferId,
                                                                      feature_params_step.IntParamsBufferId,
                                                                      sample_data_step.IndicesBufferId,
                                                                      buffers.PIXEL_INDICES,
                                                                      buffers.DEPTH_IMAGES,
                                                                      buffers.OFFSET_SCALES)
    depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(depth_delta_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(assign_stream_step.StreamTypeBufferId, sample_data_step.IndicesBufferId)


    quantized_feature_equal = pipeline.FeatureEqualQuantized_f32i32(1.0)

    midpoint_step = splitpoints.RangeMidpointStep_f32i32(feature_params_step.FloatParamsBufferId,
                                                        feature_params_step.IntParamsBufferId,
                                                        set_feature_range_buffer_step.OutputBufferId,
                                                        quantized_feature_equal)

    mean_variance_stats_updater = regression.MeanVarianceStatsUpdater_f32i32(slice_weights_step.SlicedBufferId,
                                                                              slice_ys_step.SlicedBufferId,
                                                                              dimension_of_y)

    two_stream_split_stats_step = regression.SumOfVarianceTwoStreamStep_f32i32(midpoint_step.SplitpointsBufferId,
                                                                          midpoint_step.SplitpointsCountsBufferId,
                                                                          slice_assign_stream_step.SlicedBufferId,
                                                                          depth_delta_feature_extractor_step.FeatureValuesBufferId,
                                                                          feature_ordering,
                                                                          mean_variance_stats_updater)


    impurity_step = regression.SumOfVarianceSplitpointsImpurity_f32i32(midpoint_step.SplitpointsCountsBufferId,
                                                                          two_stream_split_stats_step.ChildCountsImpurityBufferId,
                                                                          two_stream_split_stats_step.LeftImpurityStatsBufferId,
                                                                          two_stream_split_stats_step.RightImpurityStatsBufferId)

    node_steps_pipeline = pipeline.Pipeline([feature_params_step, depth_delta_feature_extractor_step,
                                            slice_ys_step, slice_weights_step, slice_assign_stream_step,
                                            midpoint_step, two_stream_split_stats_step, impurity_step])

    split_buffers = splitpoints.SplitSelectorBuffers(impurity_step.ImpurityBufferId,
                                                          midpoint_step.SplitpointsBufferId,
                                                          midpoint_step.SplitpointsCountsBufferId,
                                                          two_stream_split_stats_step.ChildCountsEstimatorBufferId,
                                                          two_stream_split_stats_step.LeftEstimatorStatsBufferId,
                                                          two_stream_split_stats_step.RightEstimatorStatsBufferId,
                                                          feature_params_step.FloatParamsBufferId,
                                                          feature_params_step.IntParamsBufferId,
                                                          depth_delta_feature_extractor_step.FeatureValuesBufferId,
                                                          feature_ordering,
                                                          depth_delta_feature_extractor_step)
    should_split_criteria = no_split_criteria(**kwargs)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId)
    split_midpoint_ranges = splitpoints.SplitBuffersFeatureRange_f32i32(midpoint_step.PastFloatParamsBufferId,
                                                                        midpoint_step.PastIntParamsBufferId,
                                                                        midpoint_step.PastRangesBufferId,
                                                                        set_feature_range_buffer_step.OutputBufferId,
                                                                        quantized_feature_equal)
    split_steps = splitpoints.SplitBuffersList([split_indices, split_midpoint_ranges])

    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                        should_split_criteria,
                                                        finalizer,
                                                        split_steps)

    tree_learner = learn.BreadthFirstTreeLearner_f32i32(try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves)
    forest_learner = learn.ParallelForestLearner(tree_learner, forest_steps_pipeline, number_of_trees, dimension_of_y, number_of_jobs)
    return forest_learner
def create_online_scaled_depth_delta_one_stream_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_features = int(kwargs.get('number_of_features', 1))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_splitpoints = int(kwargs.get('number_of_splitpoints', 1))
    number_of_classes = int(kwargs['classes'].GetMax() + 1)
    max_frontier_size = int(kwargs.get('max_frontier_size', 10000000))
    impurity_update_period = int(kwargs.get('impurity_update_period', 1))

    ux = float(kwargs.get('ux'))
    uy = float(kwargs.get('uy'))
    vx = float(kwargs.get('vx'))
    vy = float(kwargs.get('vy'))

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_i32f32i32(
            buffers.PIXEL_INDICES)
    elif 'poisson_sample' in kwargs:
        poisson_sample_mean = float(kwargs.get('poisson_sample'))
        sample_data_step = pipeline.PoissonSamplesStep_i32i32(
            buffers.PIXEL_INDICES, poisson_sample_mean)
    else:
        sample_data_step = pipeline.AllSamplesStep_i32f32i32(
            buffers.PIXEL_INDICES)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step])

    feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(
        set_number_features_step.OutputBufferId, ux, uy, vx, vy)
    init_node_steps_pipeline = pipeline.Pipeline([feature_params_step])

    depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES,
        buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES)
    depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(
        depth_delta_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32(
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        number_of_splitpoints, feature_ordering)
    class_stats_updater = classification.ClassStatsUpdater_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId,
        number_of_classes)
    one_stream_split_stats_step = classification.ClassStatsUpdaterOneStreamStep_f32i32(
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering, class_stats_updater)
    update_stats_node_steps_pipeline = pipeline.Pipeline([
        depth_delta_feature_extractor_step, slice_classes_step,
        slice_weights_step, random_splitpoint_selection_step,
        one_stream_split_stats_step
    ])

    impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32(
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        one_stream_split_stats_step.ChildCountsBufferId,
        one_stream_split_stats_step.LeftStatsBufferId,
        one_stream_split_stats_step.RightStatsBufferId)
    update_impurity_node_steps_pipeline = pipeline.Pipeline([impurity_step])

    split_buffers = splitpoints.SplitSelectorBuffers(
        impurity_step.ImpurityBufferId,
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        one_stream_split_stats_step.ChildCountsBufferId,
        one_stream_split_stats_step.LeftStatsBufferId,
        one_stream_split_stats_step.RightStatsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering, depth_delta_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_selector = splitpoints.WaitForBestSplitSelector_f32i32(
        [split_buffers], should_split_criteria, finalizer)

    feature_prediction = image_features.ScaledDepthDeltaFeature_f32i32(
        sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES,
        buffers.DEPTH_IMAGES)
    estimator_params_updater = classification.ClassEstimatorUpdater_f32i32(
        sample_data_step.WeightsBufferId, buffers.CLASS_LABELS,
        number_of_classes)
    forest_learner = learn.OnlineForestScaledDepthDeltaClassLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, init_node_steps_pipeline,
        update_stats_node_steps_pipeline, update_impurity_node_steps_pipeline,
        impurity_update_period, split_selector, max_frontier_size,
        number_of_trees, 5, 5, number_of_classes,
        sample_data_step.IndicesBufferId, sample_data_step.WeightsBufferId,
        feature_prediction, estimator_params_updater)
    return forest_learner
예제 #22
0
def create_online_scaled_depth_delta_one_stream_learner_32f(**kwargs):
    number_of_trees = int( kwargs.get('number_of_trees', 10) )
    number_of_features = int( kwargs.get('number_of_features', 1))
    feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) )
    number_of_splitpoints = int( kwargs.get('number_of_splitpoints', 1 ))
    number_of_classes = int( kwargs['classes'].GetMax() + 1 )
    max_frontier_size = int( kwargs.get('max_frontier_size', 10000000) )
    impurity_update_period = int( kwargs.get('impurity_update_period', 1) )

    ux = float( kwargs.get('ux') )
    uy = float( kwargs.get('uy') )
    vx = float( kwargs.get('vx') )
    vy = float( kwargs.get('vy') )

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_i32f32i32(buffers.PIXEL_INDICES)
    elif 'poisson_sample' in kwargs:
        poisson_sample_mean = float(kwargs.get('poisson_sample'))
        sample_data_step = pipeline.PoissonSamplesStep_i32i32(buffers.PIXEL_INDICES, poisson_sample_mean)
    else:
        sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES)

    number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step])

    feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(set_number_features_step.OutputBufferId, ux, uy, vx, vy )
    init_node_steps_pipeline = pipeline.Pipeline([feature_params_step])

    depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(feature_params_step.FloatParamsBufferId,
                                                                      feature_params_step.IntParamsBufferId,
                                                                      sample_data_step.IndicesBufferId,
                                                                      buffers.PIXEL_INDICES,
                                                                      buffers.DEPTH_IMAGES,
                                                                      buffers.OFFSET_SCALES)
    depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(depth_delta_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32(depth_delta_feature_extractor_step.FeatureValuesBufferId,
                                                                                number_of_splitpoints,
                                                                                feature_ordering)
    class_stats_updater = classification.ClassStatsUpdater_f32i32(slice_weights_step.SlicedBufferId,
                                                                      slice_classes_step.SlicedBufferId,
                                                                      number_of_classes)
    one_stream_split_stats_step = classification.ClassStatsUpdaterOneStreamStep_f32i32(random_splitpoint_selection_step.SplitpointsBufferId,
                                                                          random_splitpoint_selection_step.SplitpointsCountsBufferId,
                                                                          depth_delta_feature_extractor_step.FeatureValuesBufferId,
                                                                          feature_ordering,
                                                                          class_stats_updater)
    update_stats_node_steps_pipeline = pipeline.Pipeline([depth_delta_feature_extractor_step,
                                                          slice_classes_step, 
                                                          slice_weights_step,
                                                          random_splitpoint_selection_step,
                                                          one_stream_split_stats_step])

    impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32(random_splitpoint_selection_step.SplitpointsCountsBufferId,
                                                                          one_stream_split_stats_step.ChildCountsBufferId,
                                                                          one_stream_split_stats_step.LeftStatsBufferId,
                                                                          one_stream_split_stats_step.RightStatsBufferId)
    update_impurity_node_steps_pipeline = pipeline.Pipeline([impurity_step])

    split_buffers = splitpoints.SplitSelectorBuffers(impurity_step.ImpurityBufferId,
                                                          random_splitpoint_selection_step.SplitpointsBufferId,
                                                          random_splitpoint_selection_step.SplitpointsCountsBufferId,
                                                          one_stream_split_stats_step.ChildCountsBufferId,
                                                          one_stream_split_stats_step.LeftStatsBufferId,
                                                          one_stream_split_stats_step.RightStatsBufferId,
                                                          feature_params_step.FloatParamsBufferId,
                                                          feature_params_step.IntParamsBufferId,
                                                          depth_delta_feature_extractor_step.FeatureValuesBufferId,
                                                          feature_ordering,
                                                          sample_data_step.IndicesBufferId)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_selector = splitpoints.WaitForBestSplitSelector_f32i32([split_buffers], should_split_criteria, finalizer )

    feature_prediction = image_features.ScaledDepthDeltaFeature_f32i32(sample_data_step.IndicesBufferId,
                                                                              buffers.PIXEL_INDICES,
                                                                              buffers.DEPTH_IMAGES)
    estimator_params_updater = classification.ClassEstimatorUpdater_f32i32(sample_data_step.WeightsBufferId, buffers.CLASS_LABELS, number_of_classes)
    forest_learner = learn.OnlineForestScaledDepthDeltaClassLearner_f32i32(
                                                              try_split_criteria, 
                                                              tree_steps_pipeline, 
                                                              init_node_steps_pipeline, 
                                                              update_stats_node_steps_pipeline,
                                                              update_impurity_node_steps_pipeline,
                                                              impurity_update_period, split_selector,
                                                              max_frontier_size, number_of_trees, 5, 5, number_of_classes,
                                                              sample_data_step.IndicesBufferId, sample_data_step.WeightsBufferId,
                                                              feature_prediction, estimator_params_updater)
    return forest_learner
예제 #23
0
def create_consistent_two_stream_regression_axis_aligned_matrix_learner_32f(**kwargs):
    number_of_trees = int( kwargs.get('number_of_trees', 10) )
    number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].shape[0] / 5 + 1) )
    number_of_features = int( kwargs.get('number_of_features', (kwargs['x'].shape[1])/3 + 0.5))
    # number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) )
    number_of_jobs = int( kwargs.get('number_of_jobs', 1) )
    dimension_of_y = int(  kwargs['y'].shape[1] )

    probability_of_impurity_stream = float(kwargs.get('probability_of_impurity_stream', 0.5) )
    in_bounds_number_of_points = int(kwargs.get('in_bounds_number_of_points', kwargs['y'].shape[0]/2) )

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(buffers.X_FLOAT_DATA)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA)

    assign_stream_step = splitpoints.AssignStreamStep_f32i32(sample_data_step.WeightsBufferId, probability_of_impurity_stream)
    tree_steps_pipeline = pipeline.Pipeline([sample_data_step, assign_stream_step])

    if 'poisson_number_of_features' in kwargs and kwargs.get('poisson_number_of_features'):
        set_number_features_step = pipeline.PoissonStep_f32i32(number_of_features, 1)
    else:
        number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32))
        set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW)

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(feature_params_step.FloatParamsBufferId,
                                                                      feature_params_step.IntParamsBufferId,
                                                                      sample_data_step.IndicesBufferId,
                                                                      buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(matrix_feature, feature_ordering)

    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    slice_stream_step = pipeline.SliceInt32VectorBufferStep_i32(assign_stream_step.StreamTypeBufferId, sample_data_step.IndicesBufferId)

    impurity_walker = regression.SumOfVarianceTwoStreamWalker_f32i32(slice_weights_step.SlicedBufferId,
                                                            slice_stream_step.SlicedBufferId,
                                                            slice_ys_step.SlicedBufferId,
                                                            dimension_of_y)

    best_splitpint_step = regression.SumOfVarianceTwoStreamBestSplitpointsWalkingSortedStep_f32i32(impurity_walker,
                                                                        slice_stream_step.SlicedBufferId,
                                                                        matrix_feature_extractor_step.FeatureValuesBufferId,
                                                                        feature_ordering,
                                                                        splitpoints.AT_MIDPOINT,
                                                                        in_bounds_number_of_points)

    node_steps_pipeline = pipeline.Pipeline([set_number_features_step, feature_params_step, matrix_feature_extractor_step,
                                            slice_ys_step, slice_weights_step, slice_stream_step, best_splitpint_step])

    split_buffers = splitpoints.SplitSelectorBuffers(best_splitpint_step.ImpurityBufferId,
                                                          best_splitpint_step.SplitpointBufferId,
                                                          best_splitpint_step.SplitpointCountsBufferId,
                                                          best_splitpint_step.ChildCountsEstimationBufferId,
                                                          best_splitpint_step.LeftEstimationYsBufferId,
                                                          best_splitpint_step.RightEstimationYsBufferId,
                                                          feature_params_step.FloatParamsBufferId,
                                                          feature_params_step.IntParamsBufferId,
                                                          matrix_feature_extractor_step.FeatureValuesBufferId,
                                                          feature_ordering,
                                                          matrix_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices )

    tree_learner = learn.BreadthFirstTreeLearner_f32i32(try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, dimension_of_y, number_of_jobs)
    return forest_learner
def create_scaled_depth_delta_learner_32f(**kwargs):
    ux = float(kwargs.get('ux'))
    uy = float(kwargs.get('uy'))
    vx = float(kwargs.get('vx'))
    vy = float(kwargs.get('vy'))

    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_features = int(kwargs.get('number_of_features', 1))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    number_of_classes = int(kwargs['classes'].GetMax() + 1)

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_i32f32i32(
            buffers.PIXEL_INDICES)
    else:
        sample_data_step = pipeline.AllSamplesStep_i32f32i32(
            buffers.PIXEL_INDICES)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step])

    feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(
        set_number_features_step.OutputBufferId, ux, uy, vx, vy)
    depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES,
        buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES)
    depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(
        depth_delta_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    class_infogain_walker = classification.ClassInfoGainWalker_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId,
        number_of_classes)
    best_splitpint_step = classification.ClassInfoGainBestSplitpointsWalkingSortedStep_f32i32(
        class_infogain_walker,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering)
    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, depth_delta_feature_extractor_step,
        slice_classes_step, slice_weights_step, best_splitpint_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        best_splitpint_step.ImpurityBufferId,
        best_splitpint_step.SplitpointBufferId,
        best_splitpint_step.SplitpointCountsBufferId,
        best_splitpint_step.ChildCountsBufferId,
        best_splitpint_step.LeftYsBufferId,
        best_splitpint_step.RightYsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering, depth_delta_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    tree_learner = learn.DepthFirstTreeLearner_f32i32(try_split_criteria,
                                                      tree_steps_pipeline,
                                                      node_steps_pipeline,
                                                      split_selector)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 number_of_classes,
                                                 number_of_jobs)
    return forest_learner
def create_biau2008_regression_scaled_depth_delta_learner_32f(**kwargs):
    ux = float( kwargs.get('ux') )
    uy = float( kwargs.get('uy') )
    vx = float( kwargs.get('vx') )
    vy = float( kwargs.get('vy') )

    number_of_trees = int( kwargs.get('number_of_trees', 10) )
    number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].GetM() / 5 + 1) )
    number_of_split_retries = int( kwargs.get('number_of_split_retries', 10) )
    number_of_features = 1
    feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) )
    number_of_jobs = int( kwargs.get('number_of_jobs', 1) )
    dimension_of_y = int( kwargs['y'].GetN() )

    try_split_criteria = try_split.MinNodeSizeCriteria(2)

    sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES)

    number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step])

    feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(set_number_features_step.OutputBufferId, ux, uy, vx, vy )
    depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(feature_params_step.FloatParamsBufferId,
                                                                      feature_params_step.IntParamsBufferId,
                                                                      sample_data_step.IndicesBufferId,
                                                                      buffers.PIXEL_INDICES,
                                                                      buffers.DEPTH_IMAGES,
                                                                      buffers.OFFSET_SCALES)
    depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(depth_delta_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)

    impurity_walker = regression.SumOfVarianceWalker_f32i32(slice_weights_step.SlicedBufferId,
                                                            slice_ys_step.SlicedBufferId,
                                                            dimension_of_y)

    best_splitpint_step = regression.SumOfVarianceRandomGapSplitpointsStep_f32i32(impurity_walker,
                                                                depth_delta_feature_extractor_step.FeatureValuesBufferId,
                                                                feature_ordering)

    node_steps_pipeline = pipeline.Pipeline([feature_params_step, depth_delta_feature_extractor_step,
                                            slice_ys_step, slice_weights_step, best_splitpint_step])

    split_buffers = splitpoints.SplitSelectorBuffers(best_splitpint_step.ImpurityBufferId,
                                                          best_splitpint_step.SplitpointBufferId,
                                                          best_splitpint_step.SplitpointCountsBufferId,
                                                          best_splitpint_step.ChildCountsBufferId,
                                                          best_splitpint_step.LeftYsBufferId,
                                                          best_splitpint_step.RightYsBufferId,
                                                          feature_params_step.FloatParamsBufferId,
                                                          feature_params_step.IntParamsBufferId,
                                                          depth_delta_feature_extractor_step.FeatureValuesBufferId,
                                                          feature_ordering,
                                                          depth_delta_feature_extractor_step)
    should_split_criteria = min_child_size_criteria = should_split.MinChildSizeCriteria(1)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices )

    tree_learner = learn.Biau2008TreeLearner_f32i32(try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves, number_of_split_retries)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, dimension_of_y, number_of_jobs)
    return forest_learner
예제 #26
0
def tree_weights_for_all_trees(forest_size):
    tree_weights=buffers.as_vector_buffer( np.ones(forest_size, dtype=np.float64) )
    return tree_weights
def create_axis_aligned_matrix_walking_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get("number_of_trees", 10))
    number_of_features = int(kwargs.get("number_of_features", np.sqrt(kwargs["x"].shape[1])))
    feature_ordering = int(kwargs.get("feature_ordering", pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get("number_of_jobs", 1))
    number_of_classes = int(np.max(kwargs["classes"]) + 1)

    try_split_criteria = create_try_split_criteria(**kwargs)

    if "bootstrap" in kwargs and kwargs.get("bootstrap"):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(buffers.X_FLOAT_DATA)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA
    )
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId,
        buffers.X_FLOAT_DATA,
    )
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering
    )
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId
    )
    class_infogain_walker = classification.ClassInfoGainWalker_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId, number_of_classes
    )
    best_splitpint_step = classification.ClassInfoGainBestSplitpointsWalkingSortedStep_f32i32(
        class_infogain_walker, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering
    )
    node_steps_pipeline = pipeline.Pipeline(
        [
            feature_params_step,
            matrix_feature_extractor_step,
            slice_classes_step,
            slice_weights_step,
            best_splitpint_step,
        ]
    )

    split_buffers = splitpoints.SplitSelectorBuffers(
        best_splitpint_step.ImpurityBufferId,
        best_splitpint_step.SplitpointBufferId,
        best_splitpint_step.SplitpointCountsBufferId,
        best_splitpint_step.ChildCountsBufferId,
        best_splitpint_step.LeftYsBufferId,
        best_splitpint_step.RightYsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering,
        matrix_feature_extractor_step,
    )
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices)

    if "tree_order" in kwargs and kwargs.get("tree_order") == "breadth_first":
        tree_learner = learn.BreadthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector
        )
    else:
        tree_learner = learn.DepthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector
        )

    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, number_of_classes, number_of_jobs)
    return forest_learner
    number_of_datapoints = pixel_indices_buffer.GetM()
    offset_scales = np.array(np.random.uniform(0.8, 1.2, (number_of_datapoints, 2)), dtype=np.float32)
    offset_scales_buffer = rftk.buffers.as_matrix_buffer(offset_scales)

    # On the first pass through data learn for each sample counts
    list_of_sample_counts = eval(args.list_of_sample_counts)
    clipped_list_of_sample_counts = [min(s, pixel_labels_buffer.GetN()) for s in list_of_sample_counts]
    clipped_list_of_sample_ranges = zip([0] + clipped_list_of_sample_counts[:-1], clipped_list_of_sample_counts)
    print clipped_list_of_sample_ranges
    pass_id = 0
    for (start_index, end_index) in clipped_list_of_sample_ranges:
        print start_index
        print end_index

        # Slice data
        datapoint_indices = buffers.as_vector_buffer(np.array(np.arange(start_index, end_index), dtype=np.int32))
        sliced_pixel_indices_buffer = pixel_indices_buffer.Slice(datapoint_indices)
        sliced_offset_scales_buffer = offset_scales_buffer.Slice(datapoint_indices)
        sliced_pixel_labels_buffer = pixel_labels_buffer.Slice(datapoint_indices)

        # online_learner.Train(bufferCollection, buffers.Int32Vector(datapoint_indices))
        predictor = forest_learner.fit(depth_images=depths_buffer, 
                                      pixel_indices=sliced_pixel_indices_buffer,
                                      offset_scales=sliced_offset_scales_buffer,
                                      classes=sliced_pixel_labels_buffer)

        #pickle forest and data used for training
        forest_pickle_filename = "%s/forest-%d-%d.pkl" % (online_run_folder, pass_id, end_index)
        pickle.dump(predictor.get_forest(), gzip.open(forest_pickle_filename, 'wb'))

        # Print forest stats
def create_online_axis_aligned_matrix_one_stream_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get("number_of_trees", 10))
    number_of_features = int(kwargs.get("number_of_features", np.sqrt(kwargs["x"].shape[1])))
    feature_ordering = int(kwargs.get("feature_ordering", pipeline.FEATURES_BY_DATAPOINTS))
    number_of_splitpoints = int(kwargs.get("number_of_splitpoints", 1))
    number_of_classes = int(np.max(kwargs["classes"]) + 1)
    max_frontier_size = int(kwargs.get("max_frontier_size", 10000000))
    impurity_update_period = int(kwargs.get("impurity_update_period", 1))

    try_split_criteria = create_try_split_criteria(**kwargs)

    if "bootstrap" in kwargs and kwargs.get("bootstrap"):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(buffers.X_FLOAT_DATA)
    elif "poisson_sample" in kwargs:
        poisson_sample_mean = float(kwargs.get("poisson_sample"))
        sample_data_step = pipeline.PoissonSamplesStep_f32i32(buffers.X_FLOAT_DATA, poisson_sample_mean)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA
    )

    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId,
        buffers.X_FLOAT_DATA,
    )
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering
    )
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId
    )

    random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32(
        matrix_feature_extractor_step.FeatureValuesBufferId, number_of_splitpoints, feature_ordering
    )

    class_stats_updater = classification.ClassStatsUpdater_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId, number_of_classes
    )
    one_stream_split_stats_step = classification.ClassStatsUpdaterOneStreamStep_f32i32(
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering,
        class_stats_updater,
    )

    impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32(
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        one_stream_split_stats_step.ChildCountsBufferId,
        one_stream_split_stats_step.LeftStatsBufferId,
        one_stream_split_stats_step.RightStatsBufferId,
    )

    init_node_steps_pipeline = pipeline.Pipeline([feature_params_step])
    update_stats_node_steps_pipeline = pipeline.Pipeline(
        [
            matrix_feature_extractor_step,
            slice_classes_step,
            slice_weights_step,
            random_splitpoint_selection_step,
            one_stream_split_stats_step,
        ]
    )
    update_impurity_node_steps_pipeline = pipeline.Pipeline([impurity_step])

    split_buffers = splitpoints.SplitSelectorBuffers(
        impurity_step.ImpurityBufferId,
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        one_stream_split_stats_step.ChildCountsBufferId,
        one_stream_split_stats_step.LeftStatsBufferId,
        one_stream_split_stats_step.RightStatsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering,
        matrix_feature_extractor_step,
    )
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId)
    split_selector = splitpoints.WaitForBestSplitSelector_f32i32(
        [split_buffers], should_split_criteria, finalizer, split_indices
    )

    matrix_feature_prediction = matrix_features.LinearFloat32MatrixFeature_f32i32(
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA
    )
    estimator_params_updater = classification.ClassEstimatorUpdater_f32i32(
        sample_data_step.WeightsBufferId, buffers.CLASS_LABELS, number_of_classes
    )
    forest_learner = learn.OnlineForestMatrixClassLearner_f32i32(
        try_split_criteria,
        tree_steps_pipeline,
        init_node_steps_pipeline,
        update_stats_node_steps_pipeline,
        update_impurity_node_steps_pipeline,
        impurity_update_period,
        split_selector,
        max_frontier_size,
        number_of_trees,
        5,
        5,
        number_of_classes,
        sample_data_step.IndicesBufferId,
        sample_data_step.WeightsBufferId,
        matrix_feature_prediction,
        estimator_params_updater,
    )
    return forest_learner
    # On the first pass through data learn for each sample counts
    list_of_sample_counts = eval(args.list_of_sample_counts)
    clipped_list_of_sample_counts = [
        min(s, pixel_labels_buffer.GetN()) for s in list_of_sample_counts
    ]
    clipped_list_of_sample_ranges = zip([0] +
                                        clipped_list_of_sample_counts[:-1],
                                        clipped_list_of_sample_counts)
    print clipped_list_of_sample_ranges
    pass_id = 0
    for (start_index, end_index) in clipped_list_of_sample_ranges:
        print start_index
        print end_index

        # Slice data
        datapoint_indices = buffers.as_vector_buffer(
            np.array(np.arange(start_index, end_index), dtype=np.int32))
        sliced_pixel_indices_buffer = pixel_indices_buffer.Slice(
            datapoint_indices)
        sliced_offset_scales_buffer = offset_scales_buffer.Slice(
            datapoint_indices)
        sliced_pixel_labels_buffer = pixel_labels_buffer.Slice(
            datapoint_indices)

        # online_learner.Train(bufferCollection, buffers.Int32Vector(datapoint_indices))
        predictor = forest_learner.fit(
            depth_images=depths_buffer,
            pixel_indices=sliced_pixel_indices_buffer,
            offset_scales=sliced_offset_scales_buffer,
            classes=sliced_pixel_labels_buffer)

        #pickle forest and data used for training