def construct_axis_aligned_forest(self): # (0) X[0] > 2.2 # / \ # (1) X[1] > -5 (2) [0.7 0.1 0.2] # / \ # (3) [0.3 0.3 0.4] (4) [0.3 0.6 0.1] # # (0) X[0] > 5.0 # / \ # (1) X[0] > 2.5 (2) [0.8 0.1 0.1] # / \ # (3) [0.2 0.2 0.6] (4) [0.2 0.7 0.1] path_1 = buffers.as_matrix_buffer(np.array([[1,2],[3,4],[-1,-1],[-1,-1],[-1,-1]], dtype=np.int32)) int_params_1 = buffers.as_matrix_buffer(np.array([[1,0],[1,1],[1,0],[1,0],[1,0]], dtype=np.int32)) float_params_1 = buffers.as_matrix_buffer(np.array([[2.2],[-5],[0],[0],[0]], dtype=np.float32)) ys_1 = buffers.as_matrix_buffer(np.array([[0,0,0],[0,0,0],[0.7,0.1,0.2],[0.3,0.3,0.4],[0.3,0.6,0.1]], dtype=np.float32)) depth_1 = buffers.as_vector_buffer(np.array([0, 1, 1, 2, 2], dtype=np.int32)) counts_1 = buffers.as_vector_buffer(np.array([5, 5, 5, 5, 5], dtype=np.float32)) tree_1 = forest_data.Tree(path_1, int_params_1, float_params_1, depth_1, counts_1, ys_1) path_2 = buffers.as_matrix_buffer(np.array([[1,2],[3,4],[-1,-1],[-1,-1],[-1,-1]], dtype=np.int32)) int_params_2 = buffers.as_matrix_buffer(np.array([[1,0],[1,0],[1,0],[1,0],[1,0]], dtype=np.int32)) float_params_2 = buffers.as_matrix_buffer(np.array([[5.0],[2.5],[0],[0],[0]], dtype=np.float32)) ys_2 = buffers.as_matrix_buffer(np.array([[0,0,0],[0,0,0],[0.8,0.1,0.1],[0.2,0.2,0.6],[0.2,0.7,0.1]], dtype=np.float32)) depth_2 = buffers.as_vector_buffer(np.array([0, 1, 1, 2, 2], dtype=np.int32)) counts_2 = buffers.as_vector_buffer(np.array([5, 5, 5, 5, 5], dtype=np.float32)) tree_2 = forest_data.Tree(path_2, int_params_2, float_params_2, depth_2, counts_2, ys_2) forest = forest_data.Forest([tree_1, tree_2]) return forest
def construct_axis_aligned_forest(self): # (0) X[0] > 2.2 # / \ # (1) X[1] > -5 (2) [0.7 0.1 0.2] # / \ # (3) [0.3 0.3 0.4] (4) [0.3 0.6 0.1] # # (0) X[0] > 5.0 # / \ # (1) X[0] > 2.5 (2) [0.8 0.1 0.1] # / \ # (3) [0.2 0.2 0.6] (4) [0.2 0.7 0.1] path_1 = buffers.as_matrix_buffer( np.array([[1, 2], [3, 4], [-1, -1], [-1, -1], [-1, -1]], dtype=np.int32)) int_params_1 = buffers.as_matrix_buffer( np.array([[1, 0], [1, 1], [1, 0], [1, 0], [1, 0]], dtype=np.int32)) float_params_1 = buffers.as_matrix_buffer( np.array([[2.2], [-5], [0], [0], [0]], dtype=np.float32)) ys_1 = buffers.as_matrix_buffer( np.array([[0, 0, 0], [0, 0, 0], [0.7, 0.1, 0.2], [0.3, 0.3, 0.4], [0.3, 0.6, 0.1]], dtype=np.float32)) depth_1 = buffers.as_vector_buffer( np.array([0, 1, 1, 2, 2], dtype=np.int32)) counts_1 = buffers.as_vector_buffer( np.array([5, 5, 5, 5, 5], dtype=np.float32)) tree_1 = forest_data.Tree(path_1, int_params_1, float_params_1, depth_1, counts_1, ys_1) path_2 = buffers.as_matrix_buffer( np.array([[1, 2], [3, 4], [-1, -1], [-1, -1], [-1, -1]], dtype=np.int32)) int_params_2 = buffers.as_matrix_buffer( np.array([[1, 0], [1, 0], [1, 0], [1, 0], [1, 0]], dtype=np.int32)) float_params_2 = buffers.as_matrix_buffer( np.array([[5.0], [2.5], [0], [0], [0]], dtype=np.float32)) ys_2 = buffers.as_matrix_buffer( np.array([[0, 0, 0], [0, 0, 0], [0.8, 0.1, 0.1], [0.2, 0.2, 0.6], [0.2, 0.7, 0.1]], dtype=np.float32)) depth_2 = buffers.as_vector_buffer( np.array([0, 1, 1, 2, 2], dtype=np.int32)) counts_2 = buffers.as_vector_buffer( np.array([5, 5, 5, 5, 5], dtype=np.float32)) tree_2 = forest_data.Tree(path_2, int_params_2, float_params_2, depth_2, counts_2, ys_2) tree_2.GetExtraInfo().AddBuffer( "first", np.array([3, 21, 1, 22, 1, 5], dtype=np.float32)) forest = forest_data.Forest([tree_1, tree_2]) return forest
def create_scaled_depth_delta_learner_32f(**kwargs): ux = float( kwargs.get('ux') ) uy = float( kwargs.get('uy') ) vx = float( kwargs.get('vx') ) vy = float( kwargs.get('vy') ) number_of_trees = int( kwargs.get('number_of_trees', 10) ) number_of_features = int( kwargs.get('number_of_features', 1) ) feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) ) number_of_jobs = int( kwargs.get('number_of_jobs', 1) ) number_of_classes = int( kwargs['classes'].GetMax() + 1 ) try_split_criteria = create_try_split_criteria(**kwargs) if 'bootstrap' in kwargs and kwargs.get('bootstrap'): sample_data_step = pipeline.BootstrapSamplesStep_i32f32i32(buffers.PIXEL_INDICES) else: sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES) number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW) tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step]) feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(set_number_features_step.OutputBufferId, ux, uy, vx, vy ) depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES, buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES) depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(depth_delta_feature, feature_ordering) slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(buffers.CLASS_LABELS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) class_infogain_walker = classification.ClassInfoGainWalker_f32i32(slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId, number_of_classes) best_splitpint_step = classification.ClassInfoGainBestSplitpointsWalkingSortedStep_f32i32(class_infogain_walker, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering) node_steps_pipeline = pipeline.Pipeline([feature_params_step, depth_delta_feature_extractor_step, slice_classes_step, slice_weights_step, best_splitpint_step]) split_buffers = splitpoints.SplitSelectorBuffers(best_splitpint_step.ImpurityBufferId, best_splitpint_step.SplitpointBufferId, best_splitpint_step.SplitpointCountsBufferId, best_splitpint_step.ChildCountsBufferId, best_splitpint_step.LeftYsBufferId, best_splitpint_step.RightYsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering, sample_data_step.IndicesBufferId) should_split_criteria = create_should_split_criteria(**kwargs) finalizer = classification.ClassEstimatorFinalizer_f32() split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer ) tree_learner = learn.DepthFirstTreeLearner_f32i32(try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector) forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, 5, 5, number_of_classes, number_of_jobs) return forest_learner
def test_pickle(self): # test tensor for dtype in [np.float32, np.float64, np.int32, np.int64]: array = np.array( [[[3, 21, 1], [22, 1, 5]], [[2, 2, 2], [7, 7, 7]]], dtype=dtype) b1 = buffers.as_tensor_buffer(array) pickle.dump(b1, open('tmp.pkl', 'wb')) b2 = pickle.load(open('tmp.pkl', 'rb')) array2 = buffers.as_numpy_array(b2) self.assertTrue((array == array2).all()) # test matrix for dtype in [np.float32, np.float64, np.int32, np.int64]: array = np.array([[3, 21, 1], [22, 1, 5], [2, 2, 2], [7, 7, 7]], dtype=dtype) b1 = buffers.as_matrix_buffer(array) pickle.dump(b1, open('tmp.pkl', 'wb')) b2 = pickle.load(open('tmp.pkl', 'rb')) array2 = buffers.as_numpy_array(b2) self.assertTrue((array == array2).all()) # test vector for dtype in [np.float32, np.float64, np.int32, np.int64]: array = np.array([3, 21, 1], dtype=dtype) b1 = buffers.as_vector_buffer(array) pickle.dump(b1, open('tmp.pkl', 'wb')) b2 = pickle.load(open('tmp.pkl', 'rb')) array2 = buffers.as_numpy_array(b2) self.assertTrue((array == array2).all())
def test_pickle(self): # test tensor for dtype in [np.float32, np.float64, np.int32, np.int64]: array = np.array([[[3, 21, 1], [22, 1, 5]], [[2, 2, 2], [7, 7, 7]]], dtype=dtype) b1 = buffers.as_tensor_buffer(array) pickle.dump(b1, open("tmp.pkl", "wb")) b2 = pickle.load(open("tmp.pkl", "rb")) array2 = buffers.as_numpy_array(b2) self.assertTrue((array == array2).all()) # test matrix for dtype in [np.float32, np.float64, np.int32, np.int64]: array = np.array([[3, 21, 1], [22, 1, 5], [2, 2, 2], [7, 7, 7]], dtype=dtype) b1 = buffers.as_matrix_buffer(array) pickle.dump(b1, open("tmp.pkl", "wb")) b2 = pickle.load(open("tmp.pkl", "rb")) array2 = buffers.as_numpy_array(b2) self.assertTrue((array == array2).all()) # test vector for dtype in [np.float32, np.float64, np.int32, np.int64]: array = np.array([3, 21, 1], dtype=dtype) b1 = buffers.as_vector_buffer(array) pickle.dump(b1, open("tmp.pkl", "wb")) b2 = pickle.load(open("tmp.pkl", "rb")) array2 = buffers.as_numpy_array(b2) self.assertTrue((array == array2).all())
def create_biau2008_regression_axis_aligned_matrix_learner_32f(**kwargs): number_of_trees = int( kwargs.get('number_of_trees', 10) ) number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].shape[0] / 5 + 1) ) number_of_split_retries = int( kwargs.get('number_of_split_retries', 10) ) number_of_features = 1 feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) ) number_of_jobs = int( kwargs.get('number_of_jobs', 1) ) dimension_of_y = int( kwargs['y'].shape[1] ) try_split_criteria = try_split.MinNodeSizeCriteria(2) sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA) number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW) tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step]) feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA) matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA) matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(matrix_feature, feature_ordering) slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(buffers.YS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) impurity_walker = regression.SumOfVarianceWalker_f32i32(slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId, dimension_of_y) best_splitpint_step = regression.SumOfVarianceRandomGapSplitpointsStep_f32i32(impurity_walker, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering) node_steps_pipeline = pipeline.Pipeline([feature_params_step, matrix_feature_extractor_step, slice_ys_step, slice_weights_step, best_splitpint_step]) split_buffers = splitpoints.SplitSelectorBuffers(best_splitpint_step.ImpurityBufferId, best_splitpint_step.SplitpointBufferId, best_splitpint_step.SplitpointCountsBufferId, best_splitpint_step.ChildCountsBufferId, best_splitpint_step.LeftYsBufferId, best_splitpint_step.RightYsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, matrix_feature_extractor_step) should_split_criteria = min_child_size_criteria = should_split.MinChildSizeCriteria(1) finalizer = regression.MeanVarianceEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices ) tree_learner = learn.Biau2008TreeLearner_f32i32(try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves, number_of_split_retries) forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, dimension_of_y, number_of_jobs) return forest_learner
def load_and_sample(pose_path, list_of_poses, number_of_pixels_per_image): depths, labels = load_data(pose_path, list_of_poses) depths_buffer = buffers.as_tensor_buffer(depths) del depths pixel_indices, pixel_labels = sample_pixels_from_images(labels, number_of_pixels_per_image) del labels pixel_indices_buffer = buffers.as_matrix_buffer(pixel_indices) pixel_labels_buffer = buffers.as_vector_buffer(pixel_labels) del pixel_indices del pixel_labels return depths_buffer, pixel_indices_buffer, pixel_labels_buffer
def load_training_data(numpy_filename): f = open(numpy_filename, 'rb') depths = np.load(f) labels = np.load(f) pixel_indices = np.load(f) pixel_labels = np.load(f) depths_buffer = buffers.as_tensor_buffer(depths) del depths del labels pixel_indices_buffer = buffers.as_matrix_buffer(pixel_indices) del pixel_indices pixel_labels_buffer = buffers.as_vector_buffer(pixel_labels) del pixel_labels return depths_buffer, pixel_indices_buffer, pixel_labels_buffer
def create_axis_aligned_matrix_two_stream_learner_32f(**kwargs): number_of_trees = int(kwargs.get('number_of_trees', 10)) number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1]))) feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS)) number_of_splitpoints = int(kwargs.get('number_of_splitpoints', 1)) number_of_jobs = int(kwargs.get('number_of_jobs', 1)) number_of_classes = int(np.max(kwargs['classes']) + 1) probability_of_impurity_stream = float( kwargs.get('probability_of_impurity_stream', 0.5)) try_split_criteria = create_try_split_criteria(**kwargs) if 'bootstrap' in kwargs and kwargs.get('bootstrap'): sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32( buffers.X_FLOAT_DATA) else: sample_data_step = pipeline.AllSamplesStep_f32f32i32( buffers.X_FLOAT_DATA) number_of_features_buffer = buffers.as_vector_buffer( np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep( number_of_features_buffer, pipeline.WHEN_NEW) assign_stream_step = splitpoints.AssignStreamStep_f32i32( sample_data_step.WeightsBufferId, probability_of_impurity_stream) tree_steps_pipeline = pipeline.Pipeline( [sample_data_step, set_number_features_step, assign_stream_step]) feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32( set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA) matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA) matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32( matrix_feature, feature_ordering) slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32( buffers.CLASS_LABELS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32( sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32( assign_stream_step.StreamTypeBufferId, sample_data_step.IndicesBufferId) random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32( matrix_feature_extractor_step.FeatureValuesBufferId, number_of_splitpoints, feature_ordering, slice_assign_stream_step.SlicedBufferId) class_stats_updater = classification.ClassStatsUpdater_f32i32( slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId, number_of_classes) two_stream_split_stats_step = classification.ClassStatsUpdaterTwoStreamStep_f32i32( random_splitpoint_selection_step.SplitpointsBufferId, random_splitpoint_selection_step.SplitpointsCountsBufferId, slice_assign_stream_step.SlicedBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, class_stats_updater) impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32( random_splitpoint_selection_step.SplitpointsCountsBufferId, two_stream_split_stats_step.ChildCountsImpurityBufferId, two_stream_split_stats_step.LeftImpurityStatsBufferId, two_stream_split_stats_step.RightImpurityStatsBufferId) node_steps_pipeline = pipeline.Pipeline([ feature_params_step, matrix_feature_extractor_step, slice_classes_step, slice_weights_step, slice_assign_stream_step, random_splitpoint_selection_step, two_stream_split_stats_step, impurity_step ]) split_buffers = splitpoints.SplitSelectorBuffers( impurity_step.ImpurityBufferId, random_splitpoint_selection_step.SplitpointsBufferId, random_splitpoint_selection_step.SplitpointsCountsBufferId, two_stream_split_stats_step.ChildCountsEstimatorBufferId, two_stream_split_stats_step.LeftEstimatorStatsBufferId, two_stream_split_stats_step.RightEstimatorStatsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, matrix_feature_extractor_step) should_split_criteria = create_should_split_criteria(**kwargs) finalizer = classification.ClassEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32( sample_data_step.IndicesBufferId) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices) if 'tree_order' in kwargs and kwargs.get('tree_order') == 'breadth_first': tree_learner = learn.BreadthFirstTreeLearner_f32i32( try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector) else: tree_learner = learn.DepthFirstTreeLearner_f32i32( try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector) forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, number_of_classes, number_of_jobs) return forest_learner
def create_class_pair_difference_matrix_walking_learner_32f(**kwargs): number_of_trees = int(kwargs.get('number_of_trees', 10)) number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1]))) feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS)) number_of_jobs = int(kwargs.get('number_of_jobs', 1)) number_of_classes = int(np.max(kwargs['classes']) + 1) try_split_criteria = create_try_split_criteria(**kwargs) if 'bootstrap' in kwargs and kwargs.get('bootstrap'): sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32( buffers.X_FLOAT_DATA) else: sample_data_step = pipeline.AllSamplesStep_f32f32i32( buffers.X_FLOAT_DATA) number_of_features_buffer = buffers.as_vector_buffer( np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep( number_of_features_buffer, pipeline.WHEN_NEW) tree_steps_pipeline = pipeline.Pipeline( [sample_data_step, set_number_features_step]) feature_params_step = matrix_features.ClassPairDifferenceParamsStep_f32i32( set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA, buffers.CLASS_LABELS, sample_data_step.IndicesBufferId) matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA) matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32( matrix_feature, feature_ordering) slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32( buffers.CLASS_LABELS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32( sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) class_infogain_walker = classification.ClassInfoGainWalker_f32i32( slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId, number_of_classes) best_splitpint_step = classification.ClassInfoGainBestSplitpointsWalkingSortedStep_f32i32( class_infogain_walker, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering) node_steps_pipeline = pipeline.Pipeline([ feature_params_step, matrix_feature_extractor_step, slice_classes_step, slice_weights_step, best_splitpint_step ]) split_buffers = splitpoints.SplitSelectorBuffers( best_splitpint_step.ImpurityBufferId, best_splitpint_step.SplitpointBufferId, best_splitpint_step.SplitpointCountsBufferId, best_splitpint_step.ChildCountsBufferId, best_splitpint_step.LeftYsBufferId, best_splitpint_step.RightYsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, matrix_feature_extractor_step) should_split_criteria = create_should_split_criteria(**kwargs) finalizer = classification.ClassEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32( sample_data_step.IndicesBufferId) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices) if 'tree_order' in kwargs and kwargs.get('tree_order') == 'breadth_first': tree_learner = learn.BreadthFirstTreeLearner_f32i32( try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector) else: tree_learner = learn.DepthFirstTreeLearner_f32i32( try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector) forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, number_of_classes, number_of_jobs) return forest_learner
def create_biau2012_regression_axis_aligned_matrix_learner_32f(**kwargs): number_of_trees = int(kwargs.get('number_of_trees', 10)) number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].shape[0] / 5 + 1)) number_of_features = int( kwargs.get('number_of_features', (kwargs['x'].shape[1]) / 3 + 0.5)) # number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1]))) feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS)) number_of_jobs = int(kwargs.get('number_of_jobs', 1)) dimension_of_y = int(kwargs['y'].shape[1]) probability_of_impurity_stream = float( kwargs.get('probability_of_impurity_stream', 0.5)) try_split_criteria = create_try_split_criteria(**kwargs) sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA) number_of_features_buffer = buffers.as_vector_buffer( np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep( number_of_features_buffer, pipeline.WHEN_NEW) assert ( np.max(np.abs(kwargs['x'])) <= 1.00 ) # double check that the data has been scaled into a -1,1 hypercube feature_range_buffer = buffers.as_vector_buffer( np.array([-1, 1], dtype=np.float32)) set_feature_range_buffer_step = pipeline.SetFloat32VectorBufferStep( feature_range_buffer, pipeline.WHEN_NEW) assign_stream_step = splitpoints.AssignStreamStep_f32i32( sample_data_step.WeightsBufferId, probability_of_impurity_stream, False) forest_steps_pipeline = pipeline.Pipeline([ sample_data_step, set_number_features_step, set_feature_range_buffer_step, assign_stream_step ]) tree_steps_pipeline = pipeline.Pipeline([]) feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32( set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA) matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA) matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32( matrix_feature, feature_ordering) slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32( buffers.YS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32( sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32( assign_stream_step.StreamTypeBufferId, sample_data_step.IndicesBufferId) quantized_feature_equal = pipeline.FeatureEqualQuantized_f32i32(1.0) midpoint_step = splitpoints.RangeMidpointStep_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal) mean_variance_stats_updater = regression.MeanVarianceStatsUpdater_f32i32( slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId, dimension_of_y) two_stream_split_stats_step = regression.SumOfVarianceTwoStreamStep_f32i32( midpoint_step.SplitpointsBufferId, midpoint_step.SplitpointsCountsBufferId, slice_assign_stream_step.SlicedBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, mean_variance_stats_updater) impurity_step = regression.SumOfVarianceSplitpointsImpurity_f32i32( midpoint_step.SplitpointsCountsBufferId, two_stream_split_stats_step.ChildCountsImpurityBufferId, two_stream_split_stats_step.LeftImpurityStatsBufferId, two_stream_split_stats_step.RightImpurityStatsBufferId) node_steps_pipeline = pipeline.Pipeline([ feature_params_step, matrix_feature_extractor_step, slice_ys_step, slice_weights_step, slice_assign_stream_step, midpoint_step, two_stream_split_stats_step, impurity_step ]) split_buffers = splitpoints.SplitSelectorBuffers( impurity_step.ImpurityBufferId, midpoint_step.SplitpointsBufferId, midpoint_step.SplitpointsCountsBufferId, two_stream_split_stats_step.ChildCountsEstimatorBufferId, two_stream_split_stats_step.LeftEstimatorStatsBufferId, two_stream_split_stats_step.RightEstimatorStatsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, matrix_feature_extractor_step) should_split_criteria = no_split_criteria(**kwargs) finalizer = regression.MeanVarianceEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32( sample_data_step.IndicesBufferId) split_midpoint_ranges = splitpoints.SplitBuffersFeatureRange_f32i32( midpoint_step.PastFloatParamsBufferId, midpoint_step.PastIntParamsBufferId, midpoint_step.PastRangesBufferId, set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal) split_steps = splitpoints.SplitBuffersList( [split_indices, split_midpoint_ranges]) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_steps) tree_learner = learn.BreadthFirstTreeLearner_f32i32( try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves) forest_learner = learn.ParallelForestLearner(tree_learner, forest_steps_pipeline, number_of_trees, dimension_of_y, number_of_jobs) return forest_learner
def create_axis_aligned_matrix_two_stream_learner_32f(**kwargs): number_of_trees = int(kwargs.get("number_of_trees", 10)) number_of_features = int(kwargs.get("number_of_features", np.sqrt(kwargs["x"].shape[1]))) feature_ordering = int(kwargs.get("feature_ordering", pipeline.FEATURES_BY_DATAPOINTS)) number_of_splitpoints = int(kwargs.get("number_of_splitpoints", 1)) number_of_jobs = int(kwargs.get("number_of_jobs", 1)) number_of_classes = int(np.max(kwargs["classes"]) + 1) probability_of_impurity_stream = float(kwargs.get("probability_of_impurity_stream", 0.5)) try_split_criteria = create_try_split_criteria(**kwargs) if "bootstrap" in kwargs and kwargs.get("bootstrap"): sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(buffers.X_FLOAT_DATA) else: sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA) number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW) assign_stream_step = splitpoints.AssignStreamStep_f32i32( sample_data_step.WeightsBufferId, probability_of_impurity_stream ) tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step, assign_stream_step]) feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32( set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA ) matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA, ) matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32( matrix_feature, feature_ordering ) slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(buffers.CLASS_LABELS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32( sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId ) slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32( assign_stream_step.StreamTypeBufferId, sample_data_step.IndicesBufferId ) random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32( matrix_feature_extractor_step.FeatureValuesBufferId, number_of_splitpoints, feature_ordering, slice_assign_stream_step.SlicedBufferId, ) class_stats_updater = classification.ClassStatsUpdater_f32i32( slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId, number_of_classes ) two_stream_split_stats_step = classification.ClassStatsUpdaterTwoStreamStep_f32i32( random_splitpoint_selection_step.SplitpointsBufferId, random_splitpoint_selection_step.SplitpointsCountsBufferId, slice_assign_stream_step.SlicedBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, class_stats_updater, ) impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32( random_splitpoint_selection_step.SplitpointsCountsBufferId, two_stream_split_stats_step.ChildCountsImpurityBufferId, two_stream_split_stats_step.LeftImpurityStatsBufferId, two_stream_split_stats_step.RightImpurityStatsBufferId, ) node_steps_pipeline = pipeline.Pipeline( [ feature_params_step, matrix_feature_extractor_step, slice_classes_step, slice_weights_step, slice_assign_stream_step, random_splitpoint_selection_step, two_stream_split_stats_step, impurity_step, ] ) split_buffers = splitpoints.SplitSelectorBuffers( impurity_step.ImpurityBufferId, random_splitpoint_selection_step.SplitpointsBufferId, random_splitpoint_selection_step.SplitpointsCountsBufferId, two_stream_split_stats_step.ChildCountsEstimatorBufferId, two_stream_split_stats_step.LeftEstimatorStatsBufferId, two_stream_split_stats_step.RightEstimatorStatsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, matrix_feature_extractor_step, ) should_split_criteria = create_should_split_criteria(**kwargs) finalizer = classification.ClassEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices) if "tree_order" in kwargs and kwargs.get("tree_order") == "breadth_first": tree_learner = learn.BreadthFirstTreeLearner_f32i32( try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector ) else: tree_learner = learn.DepthFirstTreeLearner_f32i32( try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector ) forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, number_of_classes, number_of_jobs) return forest_learner
def create_biau2008_regression_scaled_depth_delta_learner_32f(**kwargs): ux = float(kwargs.get('ux')) uy = float(kwargs.get('uy')) vx = float(kwargs.get('vx')) vy = float(kwargs.get('vy')) number_of_trees = int(kwargs.get('number_of_trees', 10)) number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].GetM() / 5 + 1)) number_of_split_retries = int(kwargs.get('number_of_split_retries', 10)) number_of_features = 1 feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS)) number_of_jobs = int(kwargs.get('number_of_jobs', 1)) dimension_of_y = int(kwargs['y'].GetN()) try_split_criteria = try_split.MinNodeSizeCriteria(2) sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES) number_of_features_buffer = buffers.as_vector_buffer( np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep( number_of_features_buffer, pipeline.WHEN_NEW) tree_steps_pipeline = pipeline.Pipeline( [sample_data_step, set_number_features_step]) feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32( set_number_features_step.OutputBufferId, ux, uy, vx, vy) depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES, buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES) depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32( depth_delta_feature, feature_ordering) slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32( buffers.YS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32( sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) impurity_walker = regression.SumOfVarianceWalker_f32i32( slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId, dimension_of_y) best_splitpint_step = regression.SumOfVarianceRandomGapSplitpointsStep_f32i32( impurity_walker, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering) node_steps_pipeline = pipeline.Pipeline([ feature_params_step, depth_delta_feature_extractor_step, slice_ys_step, slice_weights_step, best_splitpint_step ]) split_buffers = splitpoints.SplitSelectorBuffers( best_splitpint_step.ImpurityBufferId, best_splitpint_step.SplitpointBufferId, best_splitpint_step.SplitpointCountsBufferId, best_splitpint_step.ChildCountsBufferId, best_splitpint_step.LeftYsBufferId, best_splitpint_step.RightYsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering, depth_delta_feature_extractor_step) should_split_criteria = min_child_size_criteria = should_split.MinChildSizeCriteria( 1) finalizer = regression.MeanVarianceEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32( sample_data_step.IndicesBufferId) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices) tree_learner = learn.Biau2008TreeLearner_f32i32( try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves, number_of_split_retries) forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, dimension_of_y, number_of_jobs) return forest_learner
def create_regression_axis_aligned_matrix_learner_32f(**kwargs): number_of_trees = int(kwargs.get('number_of_trees', 10)) number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].shape[0] / 5 + 1)) number_of_features = int( kwargs.get('number_of_features', (kwargs['x'].shape[1]) / 3 + 0.5)) # number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1]))) feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS)) number_of_jobs = int(kwargs.get('number_of_jobs', 1)) dimension_of_y = int(kwargs['y'].shape[1]) try_split_criteria = create_try_split_criteria(**kwargs) if 'bootstrap' in kwargs and kwargs.get('bootstrap'): sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32( buffers.X_FLOAT_DATA) else: sample_data_step = pipeline.AllSamplesStep_f32f32i32( buffers.X_FLOAT_DATA) number_of_features_buffer = buffers.as_vector_buffer( np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep( number_of_features_buffer, pipeline.WHEN_NEW) tree_steps_pipeline = pipeline.Pipeline( [sample_data_step, set_number_features_step]) feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32( set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA) matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA) matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32( matrix_feature, feature_ordering) slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32( buffers.YS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32( sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) impurity_walker = regression.SumOfVarianceWalker_f32i32( slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId, dimension_of_y) best_splitpint_step = regression.SumOfVarianceBestSplitpointsWalkingSortedStep_f32i32( impurity_walker, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering) node_steps_pipeline = pipeline.Pipeline([ feature_params_step, matrix_feature_extractor_step, slice_ys_step, slice_weights_step, best_splitpint_step ]) split_buffers = splitpoints.SplitSelectorBuffers( best_splitpint_step.ImpurityBufferId, best_splitpint_step.SplitpointBufferId, best_splitpint_step.SplitpointCountsBufferId, best_splitpint_step.ChildCountsBufferId, best_splitpint_step.LeftYsBufferId, best_splitpint_step.RightYsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, matrix_feature_extractor_step) should_split_criteria = create_should_split_criteria(**kwargs) finalizer = regression.MeanVarianceEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32( sample_data_step.IndicesBufferId) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices) tree_learner = learn.BreadthFirstTreeLearner_f32i32( try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves) forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, dimension_of_y, number_of_jobs) return forest_learner
def create_biau2012_regression_axis_aligned_matrix_learner_32f(**kwargs): number_of_trees = int( kwargs.get('number_of_trees', 10) ) number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].shape[0] / 5 + 1) ) number_of_features = int( kwargs.get('number_of_features', (kwargs['x'].shape[1])/3 + 0.5)) # number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1]))) feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) ) number_of_jobs = int( kwargs.get('number_of_jobs', 1) ) dimension_of_y = int( kwargs['y'].shape[1] ) probability_of_impurity_stream = float(kwargs.get('probability_of_impurity_stream', 0.5) ) try_split_criteria = create_try_split_criteria(**kwargs) sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA) number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW) assert(np.max(np.abs(kwargs['x'])) <= 1.00) # double check that the data has been scaled into a -1,1 hypercube feature_range_buffer = buffers.as_vector_buffer(np.array([-1, 1], dtype=np.float32)) set_feature_range_buffer_step = pipeline.SetFloat32VectorBufferStep(feature_range_buffer, pipeline.WHEN_NEW) assign_stream_step = splitpoints.AssignStreamStep_f32i32(sample_data_step.WeightsBufferId, probability_of_impurity_stream, False) forest_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step, set_feature_range_buffer_step, assign_stream_step]) tree_steps_pipeline = pipeline.Pipeline([]) feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA) matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA) matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(matrix_feature, feature_ordering) slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(buffers.YS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(assign_stream_step.StreamTypeBufferId, sample_data_step.IndicesBufferId) quantized_feature_equal = pipeline.FeatureEqualQuantized_f32i32(1.0) midpoint_step = splitpoints.RangeMidpointStep_f32i32(feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal) mean_variance_stats_updater = regression.MeanVarianceStatsUpdater_f32i32(slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId, dimension_of_y) two_stream_split_stats_step = regression.SumOfVarianceTwoStreamStep_f32i32(midpoint_step.SplitpointsBufferId, midpoint_step.SplitpointsCountsBufferId, slice_assign_stream_step.SlicedBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, mean_variance_stats_updater) impurity_step = regression.SumOfVarianceSplitpointsImpurity_f32i32(midpoint_step.SplitpointsCountsBufferId, two_stream_split_stats_step.ChildCountsImpurityBufferId, two_stream_split_stats_step.LeftImpurityStatsBufferId, two_stream_split_stats_step.RightImpurityStatsBufferId) node_steps_pipeline = pipeline.Pipeline([feature_params_step, matrix_feature_extractor_step, slice_ys_step, slice_weights_step, slice_assign_stream_step, midpoint_step, two_stream_split_stats_step, impurity_step]) split_buffers = splitpoints.SplitSelectorBuffers(impurity_step.ImpurityBufferId, midpoint_step.SplitpointsBufferId, midpoint_step.SplitpointsCountsBufferId, two_stream_split_stats_step.ChildCountsEstimatorBufferId, two_stream_split_stats_step.LeftEstimatorStatsBufferId, two_stream_split_stats_step.RightEstimatorStatsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, matrix_feature_extractor_step) should_split_criteria = no_split_criteria(**kwargs) finalizer = regression.MeanVarianceEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId) split_midpoint_ranges = splitpoints.SplitBuffersFeatureRange_f32i32(midpoint_step.PastFloatParamsBufferId, midpoint_step.PastIntParamsBufferId, midpoint_step.PastRangesBufferId, set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal) split_steps = splitpoints.SplitBuffersList([split_indices, split_midpoint_ranges]) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_steps) tree_learner = learn.BreadthFirstTreeLearner_f32i32(try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves) forest_learner = learn.ParallelForestLearner(tree_learner, forest_steps_pipeline, number_of_trees, dimension_of_y, number_of_jobs) return forest_learner
# Load single pose depth and class labels depths = kinect_utils.load_depth_from_exr("%s%s.exr" % (args.pose_files_input_path, pose_filename)) labels = kinect_utils.load_labels_from_png("%s%s.png" % (args.pose_files_input_path, pose_filename)) pixel_indices, pixel_labels = kinect_utils.sample_pixels_from_image(labels[0,:,:], config.number_of_pixels_per_image) # Randomly sample pixels and offset scales (number_of_datapoints, _) = pixel_indices.shape offset_scales = np.array(np.random.uniform(0.8, 1.2, (number_of_datapoints, 2)), dtype=np.float32) datapoint_indices = np.array(np.arange(number_of_datapoints), dtype=np.int32) # Package buffers for learner bufferCollection = buffers.BufferCollection() bufferCollection.AddFloat32Tensor3Buffer(buffers.DEPTH_IMAGES, buffers.as_tensor_buffer(depths)) bufferCollection.AddFloat32MatrixBuffer(buffers.OFFSET_SCALES, buffers.as_matrix_buffer(offset_scales)) bufferCollection.AddInt32MatrixBuffer(buffers.PIXEL_INDICES, buffers.as_matrix_buffer(pixel_indices)) bufferCollection.AddInt32VectorBuffer(buffers.CLASS_LABELS, buffers.as_vector_buffer(pixel_labels)) # Update learner online_learner.Train(bufferCollection, buffers.Int32Vector(datapoint_indices)) #pickle forest and data used for training if (i+1) % 1 == 0: # if True: forest_pickle_filename = "%s/forest-%d-%d.pkl" % (online_run_folder, pass_id, i+1) pickle.dump(online_learner.GetForest(), gzip.open(forest_pickle_filename, 'wb')) # Print forest stats forestStats = online_learner.GetForest().GetForestStats() forestStats.Print()
offset_scales = np.array(np.random.uniform( 0.8, 1.2, (number_of_datapoints, 2)), dtype=np.float32) datapoint_indices = np.array(np.arange(number_of_datapoints), dtype=np.int32) # Package buffers for learner bufferCollection = buffers.BufferCollection() bufferCollection.AddFloat32Tensor3Buffer( buffers.DEPTH_IMAGES, buffers.as_tensor_buffer(depths)) bufferCollection.AddFloat32MatrixBuffer( buffers.OFFSET_SCALES, buffers.as_matrix_buffer(offset_scales)) bufferCollection.AddInt32MatrixBuffer( buffers.PIXEL_INDICES, buffers.as_matrix_buffer(pixel_indices)) bufferCollection.AddInt32VectorBuffer( buffers.CLASS_LABELS, buffers.as_vector_buffer(pixel_labels)) # Update learner online_learner.Train(bufferCollection, buffers.Int32Vector(datapoint_indices)) #pickle forest and data used for training if (i + 1) % 1 == 0: # if True: forest_pickle_filename = "%s/forest-%d-%d.pkl" % ( online_run_folder, pass_id, i + 1) pickle.dump(online_learner.GetForest(), gzip.open(forest_pickle_filename, 'wb')) # Print forest stats forestStats = online_learner.GetForest().GetForestStats()
def create_biau2012_regression_scaled_depth_delta_learner_32f(**kwargs): ux = float(kwargs.get('ux')) uy = float(kwargs.get('uy')) vx = float(kwargs.get('vx')) vy = float(kwargs.get('vy')) number_of_trees = int(kwargs.get('number_of_trees', 10)) number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].GetM() / 5 + 1)) number_of_features = int(kwargs.get('number_of_features', 1)) feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS)) number_of_jobs = int(kwargs.get('number_of_jobs', 1)) dimension_of_y = int(kwargs['y'].GetN()) probability_of_impurity_stream = float( kwargs.get('probability_of_impurity_stream', 0.5)) try_split_criteria = create_try_split_criteria(**kwargs) sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES) number_of_features_buffer = buffers.as_vector_buffer( np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep( number_of_features_buffer, pipeline.WHEN_NEW) feature_range_buffer = buffers.as_vector_buffer( np.array([-6, 6], dtype=np.float32)) set_feature_range_buffer_step = pipeline.SetFloat32VectorBufferStep( feature_range_buffer, pipeline.WHEN_NEW) assign_stream_step = splitpoints.AssignStreamStep_f32i32( sample_data_step.WeightsBufferId, probability_of_impurity_stream, False) forest_steps_pipeline = pipeline.Pipeline([ sample_data_step, set_number_features_step, set_feature_range_buffer_step, assign_stream_step ]) tree_steps_pipeline = pipeline.Pipeline([]) feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32( set_number_features_step.OutputBufferId, ux, uy, vx, vy) depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES, buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES) depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32( depth_delta_feature, feature_ordering) slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32( buffers.YS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32( sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32( assign_stream_step.StreamTypeBufferId, sample_data_step.IndicesBufferId) quantized_feature_equal = pipeline.FeatureEqualQuantized_f32i32(1.0) midpoint_step = splitpoints.RangeMidpointStep_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal) mean_variance_stats_updater = regression.MeanVarianceStatsUpdater_f32i32( slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId, dimension_of_y) two_stream_split_stats_step = regression.SumOfVarianceTwoStreamStep_f32i32( midpoint_step.SplitpointsBufferId, midpoint_step.SplitpointsCountsBufferId, slice_assign_stream_step.SlicedBufferId, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering, mean_variance_stats_updater) impurity_step = regression.SumOfVarianceSplitpointsImpurity_f32i32( midpoint_step.SplitpointsCountsBufferId, two_stream_split_stats_step.ChildCountsImpurityBufferId, two_stream_split_stats_step.LeftImpurityStatsBufferId, two_stream_split_stats_step.RightImpurityStatsBufferId) node_steps_pipeline = pipeline.Pipeline([ feature_params_step, depth_delta_feature_extractor_step, slice_ys_step, slice_weights_step, slice_assign_stream_step, midpoint_step, two_stream_split_stats_step, impurity_step ]) split_buffers = splitpoints.SplitSelectorBuffers( impurity_step.ImpurityBufferId, midpoint_step.SplitpointsBufferId, midpoint_step.SplitpointsCountsBufferId, two_stream_split_stats_step.ChildCountsEstimatorBufferId, two_stream_split_stats_step.LeftEstimatorStatsBufferId, two_stream_split_stats_step.RightEstimatorStatsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering, depth_delta_feature_extractor_step) should_split_criteria = no_split_criteria(**kwargs) finalizer = regression.MeanVarianceEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32( sample_data_step.IndicesBufferId) split_midpoint_ranges = splitpoints.SplitBuffersFeatureRange_f32i32( midpoint_step.PastFloatParamsBufferId, midpoint_step.PastIntParamsBufferId, midpoint_step.PastRangesBufferId, set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal) split_steps = splitpoints.SplitBuffersList( [split_indices, split_midpoint_ranges]) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_steps) tree_learner = learn.BreadthFirstTreeLearner_f32i32( try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves) forest_learner = learn.ParallelForestLearner(tree_learner, forest_steps_pipeline, number_of_trees, dimension_of_y, number_of_jobs) return forest_learner
def create_online_axis_aligned_matrix_one_stream_learner_32f(**kwargs): number_of_trees = int(kwargs.get('number_of_trees', 10)) number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1]))) feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS)) number_of_splitpoints = int(kwargs.get('number_of_splitpoints', 1)) number_of_classes = int(np.max(kwargs['classes']) + 1) max_frontier_size = int(kwargs.get('max_frontier_size', 10000000)) impurity_update_period = int(kwargs.get('impurity_update_period', 1)) try_split_criteria = create_try_split_criteria(**kwargs) if 'bootstrap' in kwargs and kwargs.get('bootstrap'): sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32( buffers.X_FLOAT_DATA) elif 'poisson_sample' in kwargs: poisson_sample_mean = float(kwargs.get('poisson_sample')) sample_data_step = pipeline.PoissonSamplesStep_f32i32( buffers.X_FLOAT_DATA, poisson_sample_mean) else: sample_data_step = pipeline.AllSamplesStep_f32f32i32( buffers.X_FLOAT_DATA) number_of_features_buffer = buffers.as_vector_buffer( np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep( number_of_features_buffer, pipeline.WHEN_NEW) tree_steps_pipeline = pipeline.Pipeline( [sample_data_step, set_number_features_step]) feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32( set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA) matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA) matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32( matrix_feature, feature_ordering) slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32( buffers.CLASS_LABELS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32( sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32( matrix_feature_extractor_step.FeatureValuesBufferId, number_of_splitpoints, feature_ordering) class_stats_updater = classification.ClassStatsUpdater_f32i32( slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId, number_of_classes) one_stream_split_stats_step = classification.ClassStatsUpdaterOneStreamStep_f32i32( random_splitpoint_selection_step.SplitpointsBufferId, random_splitpoint_selection_step.SplitpointsCountsBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, class_stats_updater) impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32( random_splitpoint_selection_step.SplitpointsCountsBufferId, one_stream_split_stats_step.ChildCountsBufferId, one_stream_split_stats_step.LeftStatsBufferId, one_stream_split_stats_step.RightStatsBufferId) init_node_steps_pipeline = pipeline.Pipeline([feature_params_step]) update_stats_node_steps_pipeline = pipeline.Pipeline([ matrix_feature_extractor_step, slice_classes_step, slice_weights_step, random_splitpoint_selection_step, one_stream_split_stats_step ]) update_impurity_node_steps_pipeline = pipeline.Pipeline([impurity_step]) split_buffers = splitpoints.SplitSelectorBuffers( impurity_step.ImpurityBufferId, random_splitpoint_selection_step.SplitpointsBufferId, random_splitpoint_selection_step.SplitpointsCountsBufferId, one_stream_split_stats_step.ChildCountsBufferId, one_stream_split_stats_step.LeftStatsBufferId, one_stream_split_stats_step.RightStatsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, matrix_feature_extractor_step) should_split_criteria = create_should_split_criteria(**kwargs) finalizer = classification.ClassEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32( sample_data_step.IndicesBufferId) split_selector = splitpoints.WaitForBestSplitSelector_f32i32( [split_buffers], should_split_criteria, finalizer, split_indices) matrix_feature_prediction = matrix_features.LinearFloat32MatrixFeature_f32i32( sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA) estimator_params_updater = classification.ClassEstimatorUpdater_f32i32( sample_data_step.WeightsBufferId, buffers.CLASS_LABELS, number_of_classes) forest_learner = learn.OnlineForestMatrixClassLearner_f32i32( try_split_criteria, tree_steps_pipeline, init_node_steps_pipeline, update_stats_node_steps_pipeline, update_impurity_node_steps_pipeline, impurity_update_period, split_selector, max_frontier_size, number_of_trees, 5, 5, number_of_classes, sample_data_step.IndicesBufferId, sample_data_step.WeightsBufferId, matrix_feature_prediction, estimator_params_updater) return forest_learner
def create_biau2012_regression_scaled_depth_delta_learner_32f(**kwargs): ux = float( kwargs.get('ux') ) uy = float( kwargs.get('uy') ) vx = float( kwargs.get('vx') ) vy = float( kwargs.get('vy') ) number_of_trees = int( kwargs.get('number_of_trees', 10) ) number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].GetM() / 5 + 1) ) number_of_features = int( kwargs.get('number_of_features', 1) ) feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) ) number_of_jobs = int( kwargs.get('number_of_jobs', 1) ) dimension_of_y = int( kwargs['y'].GetN() ) probability_of_impurity_stream = float(kwargs.get('probability_of_impurity_stream', 0.5) ) try_split_criteria = create_try_split_criteria(**kwargs) sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES) number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW) feature_range_buffer = buffers.as_vector_buffer(np.array([-6, 6], dtype=np.float32)) set_feature_range_buffer_step = pipeline.SetFloat32VectorBufferStep(feature_range_buffer, pipeline.WHEN_NEW) assign_stream_step = splitpoints.AssignStreamStep_f32i32(sample_data_step.WeightsBufferId, probability_of_impurity_stream, False) forest_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step, set_feature_range_buffer_step, assign_stream_step]) tree_steps_pipeline = pipeline.Pipeline([]) feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(set_number_features_step.OutputBufferId, ux, uy, vx, vy ) depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES, buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES) depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(depth_delta_feature, feature_ordering) slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(buffers.YS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(assign_stream_step.StreamTypeBufferId, sample_data_step.IndicesBufferId) quantized_feature_equal = pipeline.FeatureEqualQuantized_f32i32(1.0) midpoint_step = splitpoints.RangeMidpointStep_f32i32(feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal) mean_variance_stats_updater = regression.MeanVarianceStatsUpdater_f32i32(slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId, dimension_of_y) two_stream_split_stats_step = regression.SumOfVarianceTwoStreamStep_f32i32(midpoint_step.SplitpointsBufferId, midpoint_step.SplitpointsCountsBufferId, slice_assign_stream_step.SlicedBufferId, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering, mean_variance_stats_updater) impurity_step = regression.SumOfVarianceSplitpointsImpurity_f32i32(midpoint_step.SplitpointsCountsBufferId, two_stream_split_stats_step.ChildCountsImpurityBufferId, two_stream_split_stats_step.LeftImpurityStatsBufferId, two_stream_split_stats_step.RightImpurityStatsBufferId) node_steps_pipeline = pipeline.Pipeline([feature_params_step, depth_delta_feature_extractor_step, slice_ys_step, slice_weights_step, slice_assign_stream_step, midpoint_step, two_stream_split_stats_step, impurity_step]) split_buffers = splitpoints.SplitSelectorBuffers(impurity_step.ImpurityBufferId, midpoint_step.SplitpointsBufferId, midpoint_step.SplitpointsCountsBufferId, two_stream_split_stats_step.ChildCountsEstimatorBufferId, two_stream_split_stats_step.LeftEstimatorStatsBufferId, two_stream_split_stats_step.RightEstimatorStatsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering, depth_delta_feature_extractor_step) should_split_criteria = no_split_criteria(**kwargs) finalizer = regression.MeanVarianceEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId) split_midpoint_ranges = splitpoints.SplitBuffersFeatureRange_f32i32(midpoint_step.PastFloatParamsBufferId, midpoint_step.PastIntParamsBufferId, midpoint_step.PastRangesBufferId, set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal) split_steps = splitpoints.SplitBuffersList([split_indices, split_midpoint_ranges]) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_steps) tree_learner = learn.BreadthFirstTreeLearner_f32i32(try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves) forest_learner = learn.ParallelForestLearner(tree_learner, forest_steps_pipeline, number_of_trees, dimension_of_y, number_of_jobs) return forest_learner
def create_online_scaled_depth_delta_one_stream_learner_32f(**kwargs): number_of_trees = int(kwargs.get('number_of_trees', 10)) number_of_features = int(kwargs.get('number_of_features', 1)) feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS)) number_of_splitpoints = int(kwargs.get('number_of_splitpoints', 1)) number_of_classes = int(kwargs['classes'].GetMax() + 1) max_frontier_size = int(kwargs.get('max_frontier_size', 10000000)) impurity_update_period = int(kwargs.get('impurity_update_period', 1)) ux = float(kwargs.get('ux')) uy = float(kwargs.get('uy')) vx = float(kwargs.get('vx')) vy = float(kwargs.get('vy')) try_split_criteria = create_try_split_criteria(**kwargs) if 'bootstrap' in kwargs and kwargs.get('bootstrap'): sample_data_step = pipeline.BootstrapSamplesStep_i32f32i32( buffers.PIXEL_INDICES) elif 'poisson_sample' in kwargs: poisson_sample_mean = float(kwargs.get('poisson_sample')) sample_data_step = pipeline.PoissonSamplesStep_i32i32( buffers.PIXEL_INDICES, poisson_sample_mean) else: sample_data_step = pipeline.AllSamplesStep_i32f32i32( buffers.PIXEL_INDICES) number_of_features_buffer = buffers.as_vector_buffer( np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep( number_of_features_buffer, pipeline.WHEN_NEW) tree_steps_pipeline = pipeline.Pipeline( [sample_data_step, set_number_features_step]) feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32( set_number_features_step.OutputBufferId, ux, uy, vx, vy) init_node_steps_pipeline = pipeline.Pipeline([feature_params_step]) depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES, buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES) depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32( depth_delta_feature, feature_ordering) slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32( buffers.CLASS_LABELS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32( sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32( depth_delta_feature_extractor_step.FeatureValuesBufferId, number_of_splitpoints, feature_ordering) class_stats_updater = classification.ClassStatsUpdater_f32i32( slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId, number_of_classes) one_stream_split_stats_step = classification.ClassStatsUpdaterOneStreamStep_f32i32( random_splitpoint_selection_step.SplitpointsBufferId, random_splitpoint_selection_step.SplitpointsCountsBufferId, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering, class_stats_updater) update_stats_node_steps_pipeline = pipeline.Pipeline([ depth_delta_feature_extractor_step, slice_classes_step, slice_weights_step, random_splitpoint_selection_step, one_stream_split_stats_step ]) impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32( random_splitpoint_selection_step.SplitpointsCountsBufferId, one_stream_split_stats_step.ChildCountsBufferId, one_stream_split_stats_step.LeftStatsBufferId, one_stream_split_stats_step.RightStatsBufferId) update_impurity_node_steps_pipeline = pipeline.Pipeline([impurity_step]) split_buffers = splitpoints.SplitSelectorBuffers( impurity_step.ImpurityBufferId, random_splitpoint_selection_step.SplitpointsBufferId, random_splitpoint_selection_step.SplitpointsCountsBufferId, one_stream_split_stats_step.ChildCountsBufferId, one_stream_split_stats_step.LeftStatsBufferId, one_stream_split_stats_step.RightStatsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering, depth_delta_feature_extractor_step) should_split_criteria = create_should_split_criteria(**kwargs) finalizer = classification.ClassEstimatorFinalizer_f32() split_selector = splitpoints.WaitForBestSplitSelector_f32i32( [split_buffers], should_split_criteria, finalizer) feature_prediction = image_features.ScaledDepthDeltaFeature_f32i32( sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES, buffers.DEPTH_IMAGES) estimator_params_updater = classification.ClassEstimatorUpdater_f32i32( sample_data_step.WeightsBufferId, buffers.CLASS_LABELS, number_of_classes) forest_learner = learn.OnlineForestScaledDepthDeltaClassLearner_f32i32( try_split_criteria, tree_steps_pipeline, init_node_steps_pipeline, update_stats_node_steps_pipeline, update_impurity_node_steps_pipeline, impurity_update_period, split_selector, max_frontier_size, number_of_trees, 5, 5, number_of_classes, sample_data_step.IndicesBufferId, sample_data_step.WeightsBufferId, feature_prediction, estimator_params_updater) return forest_learner
def create_online_scaled_depth_delta_one_stream_learner_32f(**kwargs): number_of_trees = int( kwargs.get('number_of_trees', 10) ) number_of_features = int( kwargs.get('number_of_features', 1)) feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) ) number_of_splitpoints = int( kwargs.get('number_of_splitpoints', 1 )) number_of_classes = int( kwargs['classes'].GetMax() + 1 ) max_frontier_size = int( kwargs.get('max_frontier_size', 10000000) ) impurity_update_period = int( kwargs.get('impurity_update_period', 1) ) ux = float( kwargs.get('ux') ) uy = float( kwargs.get('uy') ) vx = float( kwargs.get('vx') ) vy = float( kwargs.get('vy') ) try_split_criteria = create_try_split_criteria(**kwargs) if 'bootstrap' in kwargs and kwargs.get('bootstrap'): sample_data_step = pipeline.BootstrapSamplesStep_i32f32i32(buffers.PIXEL_INDICES) elif 'poisson_sample' in kwargs: poisson_sample_mean = float(kwargs.get('poisson_sample')) sample_data_step = pipeline.PoissonSamplesStep_i32i32(buffers.PIXEL_INDICES, poisson_sample_mean) else: sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES) number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW) tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step]) feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(set_number_features_step.OutputBufferId, ux, uy, vx, vy ) init_node_steps_pipeline = pipeline.Pipeline([feature_params_step]) depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES, buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES) depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(depth_delta_feature, feature_ordering) slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(buffers.CLASS_LABELS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32(depth_delta_feature_extractor_step.FeatureValuesBufferId, number_of_splitpoints, feature_ordering) class_stats_updater = classification.ClassStatsUpdater_f32i32(slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId, number_of_classes) one_stream_split_stats_step = classification.ClassStatsUpdaterOneStreamStep_f32i32(random_splitpoint_selection_step.SplitpointsBufferId, random_splitpoint_selection_step.SplitpointsCountsBufferId, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering, class_stats_updater) update_stats_node_steps_pipeline = pipeline.Pipeline([depth_delta_feature_extractor_step, slice_classes_step, slice_weights_step, random_splitpoint_selection_step, one_stream_split_stats_step]) impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32(random_splitpoint_selection_step.SplitpointsCountsBufferId, one_stream_split_stats_step.ChildCountsBufferId, one_stream_split_stats_step.LeftStatsBufferId, one_stream_split_stats_step.RightStatsBufferId) update_impurity_node_steps_pipeline = pipeline.Pipeline([impurity_step]) split_buffers = splitpoints.SplitSelectorBuffers(impurity_step.ImpurityBufferId, random_splitpoint_selection_step.SplitpointsBufferId, random_splitpoint_selection_step.SplitpointsCountsBufferId, one_stream_split_stats_step.ChildCountsBufferId, one_stream_split_stats_step.LeftStatsBufferId, one_stream_split_stats_step.RightStatsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering, sample_data_step.IndicesBufferId) should_split_criteria = create_should_split_criteria(**kwargs) finalizer = classification.ClassEstimatorFinalizer_f32() split_selector = splitpoints.WaitForBestSplitSelector_f32i32([split_buffers], should_split_criteria, finalizer ) feature_prediction = image_features.ScaledDepthDeltaFeature_f32i32(sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES, buffers.DEPTH_IMAGES) estimator_params_updater = classification.ClassEstimatorUpdater_f32i32(sample_data_step.WeightsBufferId, buffers.CLASS_LABELS, number_of_classes) forest_learner = learn.OnlineForestScaledDepthDeltaClassLearner_f32i32( try_split_criteria, tree_steps_pipeline, init_node_steps_pipeline, update_stats_node_steps_pipeline, update_impurity_node_steps_pipeline, impurity_update_period, split_selector, max_frontier_size, number_of_trees, 5, 5, number_of_classes, sample_data_step.IndicesBufferId, sample_data_step.WeightsBufferId, feature_prediction, estimator_params_updater) return forest_learner
def create_consistent_two_stream_regression_axis_aligned_matrix_learner_32f(**kwargs): number_of_trees = int( kwargs.get('number_of_trees', 10) ) number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].shape[0] / 5 + 1) ) number_of_features = int( kwargs.get('number_of_features', (kwargs['x'].shape[1])/3 + 0.5)) # number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1]))) feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) ) number_of_jobs = int( kwargs.get('number_of_jobs', 1) ) dimension_of_y = int( kwargs['y'].shape[1] ) probability_of_impurity_stream = float(kwargs.get('probability_of_impurity_stream', 0.5) ) in_bounds_number_of_points = int(kwargs.get('in_bounds_number_of_points', kwargs['y'].shape[0]/2) ) try_split_criteria = create_try_split_criteria(**kwargs) if 'bootstrap' in kwargs and kwargs.get('bootstrap'): sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(buffers.X_FLOAT_DATA) else: sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA) assign_stream_step = splitpoints.AssignStreamStep_f32i32(sample_data_step.WeightsBufferId, probability_of_impurity_stream) tree_steps_pipeline = pipeline.Pipeline([sample_data_step, assign_stream_step]) if 'poisson_number_of_features' in kwargs and kwargs.get('poisson_number_of_features'): set_number_features_step = pipeline.PoissonStep_f32i32(number_of_features, 1) else: number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW) feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA) matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA) matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(matrix_feature, feature_ordering) slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(buffers.YS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) slice_stream_step = pipeline.SliceInt32VectorBufferStep_i32(assign_stream_step.StreamTypeBufferId, sample_data_step.IndicesBufferId) impurity_walker = regression.SumOfVarianceTwoStreamWalker_f32i32(slice_weights_step.SlicedBufferId, slice_stream_step.SlicedBufferId, slice_ys_step.SlicedBufferId, dimension_of_y) best_splitpint_step = regression.SumOfVarianceTwoStreamBestSplitpointsWalkingSortedStep_f32i32(impurity_walker, slice_stream_step.SlicedBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, splitpoints.AT_MIDPOINT, in_bounds_number_of_points) node_steps_pipeline = pipeline.Pipeline([set_number_features_step, feature_params_step, matrix_feature_extractor_step, slice_ys_step, slice_weights_step, slice_stream_step, best_splitpint_step]) split_buffers = splitpoints.SplitSelectorBuffers(best_splitpint_step.ImpurityBufferId, best_splitpint_step.SplitpointBufferId, best_splitpint_step.SplitpointCountsBufferId, best_splitpint_step.ChildCountsEstimationBufferId, best_splitpint_step.LeftEstimationYsBufferId, best_splitpint_step.RightEstimationYsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, matrix_feature_extractor_step) should_split_criteria = create_should_split_criteria(**kwargs) finalizer = regression.MeanVarianceEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices ) tree_learner = learn.BreadthFirstTreeLearner_f32i32(try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves) forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, dimension_of_y, number_of_jobs) return forest_learner
def create_scaled_depth_delta_learner_32f(**kwargs): ux = float(kwargs.get('ux')) uy = float(kwargs.get('uy')) vx = float(kwargs.get('vx')) vy = float(kwargs.get('vy')) number_of_trees = int(kwargs.get('number_of_trees', 10)) number_of_features = int(kwargs.get('number_of_features', 1)) feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS)) number_of_jobs = int(kwargs.get('number_of_jobs', 1)) number_of_classes = int(kwargs['classes'].GetMax() + 1) try_split_criteria = create_try_split_criteria(**kwargs) if 'bootstrap' in kwargs and kwargs.get('bootstrap'): sample_data_step = pipeline.BootstrapSamplesStep_i32f32i32( buffers.PIXEL_INDICES) else: sample_data_step = pipeline.AllSamplesStep_i32f32i32( buffers.PIXEL_INDICES) number_of_features_buffer = buffers.as_vector_buffer( np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep( number_of_features_buffer, pipeline.WHEN_NEW) tree_steps_pipeline = pipeline.Pipeline( [sample_data_step, set_number_features_step]) feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32( set_number_features_step.OutputBufferId, ux, uy, vx, vy) depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES, buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES) depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32( depth_delta_feature, feature_ordering) slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32( buffers.CLASS_LABELS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32( sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) class_infogain_walker = classification.ClassInfoGainWalker_f32i32( slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId, number_of_classes) best_splitpint_step = classification.ClassInfoGainBestSplitpointsWalkingSortedStep_f32i32( class_infogain_walker, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering) node_steps_pipeline = pipeline.Pipeline([ feature_params_step, depth_delta_feature_extractor_step, slice_classes_step, slice_weights_step, best_splitpint_step ]) split_buffers = splitpoints.SplitSelectorBuffers( best_splitpint_step.ImpurityBufferId, best_splitpint_step.SplitpointBufferId, best_splitpint_step.SplitpointCountsBufferId, best_splitpint_step.ChildCountsBufferId, best_splitpint_step.LeftYsBufferId, best_splitpint_step.RightYsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering, depth_delta_feature_extractor_step) should_split_criteria = create_should_split_criteria(**kwargs) finalizer = classification.ClassEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32( sample_data_step.IndicesBufferId) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices) tree_learner = learn.DepthFirstTreeLearner_f32i32(try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector) forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, number_of_classes, number_of_jobs) return forest_learner
def create_biau2008_regression_scaled_depth_delta_learner_32f(**kwargs): ux = float( kwargs.get('ux') ) uy = float( kwargs.get('uy') ) vx = float( kwargs.get('vx') ) vy = float( kwargs.get('vy') ) number_of_trees = int( kwargs.get('number_of_trees', 10) ) number_of_leaves = int( kwargs.get('number_of_leaves', kwargs['y'].GetM() / 5 + 1) ) number_of_split_retries = int( kwargs.get('number_of_split_retries', 10) ) number_of_features = 1 feature_ordering = int( kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS) ) number_of_jobs = int( kwargs.get('number_of_jobs', 1) ) dimension_of_y = int( kwargs['y'].GetN() ) try_split_criteria = try_split.MinNodeSizeCriteria(2) sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES) number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW) tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step]) feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(set_number_features_step.OutputBufferId, ux, uy, vx, vy ) depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES, buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES) depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(depth_delta_feature, feature_ordering) slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(buffers.YS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId) impurity_walker = regression.SumOfVarianceWalker_f32i32(slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId, dimension_of_y) best_splitpint_step = regression.SumOfVarianceRandomGapSplitpointsStep_f32i32(impurity_walker, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering) node_steps_pipeline = pipeline.Pipeline([feature_params_step, depth_delta_feature_extractor_step, slice_ys_step, slice_weights_step, best_splitpint_step]) split_buffers = splitpoints.SplitSelectorBuffers(best_splitpint_step.ImpurityBufferId, best_splitpint_step.SplitpointBufferId, best_splitpint_step.SplitpointCountsBufferId, best_splitpint_step.ChildCountsBufferId, best_splitpint_step.LeftYsBufferId, best_splitpint_step.RightYsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, depth_delta_feature_extractor_step.FeatureValuesBufferId, feature_ordering, depth_delta_feature_extractor_step) should_split_criteria = min_child_size_criteria = should_split.MinChildSizeCriteria(1) finalizer = regression.MeanVarianceEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices ) tree_learner = learn.Biau2008TreeLearner_f32i32(try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector, number_of_leaves, number_of_split_retries) forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, dimension_of_y, number_of_jobs) return forest_learner
def tree_weights_for_all_trees(forest_size): tree_weights=buffers.as_vector_buffer( np.ones(forest_size, dtype=np.float64) ) return tree_weights
def create_axis_aligned_matrix_walking_learner_32f(**kwargs): number_of_trees = int(kwargs.get("number_of_trees", 10)) number_of_features = int(kwargs.get("number_of_features", np.sqrt(kwargs["x"].shape[1]))) feature_ordering = int(kwargs.get("feature_ordering", pipeline.FEATURES_BY_DATAPOINTS)) number_of_jobs = int(kwargs.get("number_of_jobs", 1)) number_of_classes = int(np.max(kwargs["classes"]) + 1) try_split_criteria = create_try_split_criteria(**kwargs) if "bootstrap" in kwargs and kwargs.get("bootstrap"): sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(buffers.X_FLOAT_DATA) else: sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA) number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW) tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step]) feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32( set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA ) matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA, ) matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32( matrix_feature, feature_ordering ) slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(buffers.CLASS_LABELS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32( sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId ) class_infogain_walker = classification.ClassInfoGainWalker_f32i32( slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId, number_of_classes ) best_splitpint_step = classification.ClassInfoGainBestSplitpointsWalkingSortedStep_f32i32( class_infogain_walker, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering ) node_steps_pipeline = pipeline.Pipeline( [ feature_params_step, matrix_feature_extractor_step, slice_classes_step, slice_weights_step, best_splitpint_step, ] ) split_buffers = splitpoints.SplitSelectorBuffers( best_splitpint_step.ImpurityBufferId, best_splitpint_step.SplitpointBufferId, best_splitpint_step.SplitpointCountsBufferId, best_splitpint_step.ChildCountsBufferId, best_splitpint_step.LeftYsBufferId, best_splitpint_step.RightYsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, matrix_feature_extractor_step, ) should_split_criteria = create_should_split_criteria(**kwargs) finalizer = classification.ClassEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId) split_selector = splitpoints.SplitSelector_f32i32([split_buffers], should_split_criteria, finalizer, split_indices) if "tree_order" in kwargs and kwargs.get("tree_order") == "breadth_first": tree_learner = learn.BreadthFirstTreeLearner_f32i32( try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector ) else: tree_learner = learn.DepthFirstTreeLearner_f32i32( try_split_criteria, tree_steps_pipeline, node_steps_pipeline, split_selector ) forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees, number_of_classes, number_of_jobs) return forest_learner
number_of_datapoints = pixel_indices_buffer.GetM() offset_scales = np.array(np.random.uniform(0.8, 1.2, (number_of_datapoints, 2)), dtype=np.float32) offset_scales_buffer = rftk.buffers.as_matrix_buffer(offset_scales) # On the first pass through data learn for each sample counts list_of_sample_counts = eval(args.list_of_sample_counts) clipped_list_of_sample_counts = [min(s, pixel_labels_buffer.GetN()) for s in list_of_sample_counts] clipped_list_of_sample_ranges = zip([0] + clipped_list_of_sample_counts[:-1], clipped_list_of_sample_counts) print clipped_list_of_sample_ranges pass_id = 0 for (start_index, end_index) in clipped_list_of_sample_ranges: print start_index print end_index # Slice data datapoint_indices = buffers.as_vector_buffer(np.array(np.arange(start_index, end_index), dtype=np.int32)) sliced_pixel_indices_buffer = pixel_indices_buffer.Slice(datapoint_indices) sliced_offset_scales_buffer = offset_scales_buffer.Slice(datapoint_indices) sliced_pixel_labels_buffer = pixel_labels_buffer.Slice(datapoint_indices) # online_learner.Train(bufferCollection, buffers.Int32Vector(datapoint_indices)) predictor = forest_learner.fit(depth_images=depths_buffer, pixel_indices=sliced_pixel_indices_buffer, offset_scales=sliced_offset_scales_buffer, classes=sliced_pixel_labels_buffer) #pickle forest and data used for training forest_pickle_filename = "%s/forest-%d-%d.pkl" % (online_run_folder, pass_id, end_index) pickle.dump(predictor.get_forest(), gzip.open(forest_pickle_filename, 'wb')) # Print forest stats
def create_online_axis_aligned_matrix_one_stream_learner_32f(**kwargs): number_of_trees = int(kwargs.get("number_of_trees", 10)) number_of_features = int(kwargs.get("number_of_features", np.sqrt(kwargs["x"].shape[1]))) feature_ordering = int(kwargs.get("feature_ordering", pipeline.FEATURES_BY_DATAPOINTS)) number_of_splitpoints = int(kwargs.get("number_of_splitpoints", 1)) number_of_classes = int(np.max(kwargs["classes"]) + 1) max_frontier_size = int(kwargs.get("max_frontier_size", 10000000)) impurity_update_period = int(kwargs.get("impurity_update_period", 1)) try_split_criteria = create_try_split_criteria(**kwargs) if "bootstrap" in kwargs and kwargs.get("bootstrap"): sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(buffers.X_FLOAT_DATA) elif "poisson_sample" in kwargs: poisson_sample_mean = float(kwargs.get("poisson_sample")) sample_data_step = pipeline.PoissonSamplesStep_f32i32(buffers.X_FLOAT_DATA, poisson_sample_mean) else: sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA) number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32)) set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW) tree_steps_pipeline = pipeline.Pipeline([sample_data_step, set_number_features_step]) feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32( set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA ) matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32( feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA, ) matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32( matrix_feature, feature_ordering ) slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(buffers.CLASS_LABELS, sample_data_step.IndicesBufferId) slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32( sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId ) random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32( matrix_feature_extractor_step.FeatureValuesBufferId, number_of_splitpoints, feature_ordering ) class_stats_updater = classification.ClassStatsUpdater_f32i32( slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId, number_of_classes ) one_stream_split_stats_step = classification.ClassStatsUpdaterOneStreamStep_f32i32( random_splitpoint_selection_step.SplitpointsBufferId, random_splitpoint_selection_step.SplitpointsCountsBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, class_stats_updater, ) impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32( random_splitpoint_selection_step.SplitpointsCountsBufferId, one_stream_split_stats_step.ChildCountsBufferId, one_stream_split_stats_step.LeftStatsBufferId, one_stream_split_stats_step.RightStatsBufferId, ) init_node_steps_pipeline = pipeline.Pipeline([feature_params_step]) update_stats_node_steps_pipeline = pipeline.Pipeline( [ matrix_feature_extractor_step, slice_classes_step, slice_weights_step, random_splitpoint_selection_step, one_stream_split_stats_step, ] ) update_impurity_node_steps_pipeline = pipeline.Pipeline([impurity_step]) split_buffers = splitpoints.SplitSelectorBuffers( impurity_step.ImpurityBufferId, random_splitpoint_selection_step.SplitpointsBufferId, random_splitpoint_selection_step.SplitpointsCountsBufferId, one_stream_split_stats_step.ChildCountsBufferId, one_stream_split_stats_step.LeftStatsBufferId, one_stream_split_stats_step.RightStatsBufferId, feature_params_step.FloatParamsBufferId, feature_params_step.IntParamsBufferId, matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering, matrix_feature_extractor_step, ) should_split_criteria = create_should_split_criteria(**kwargs) finalizer = classification.ClassEstimatorFinalizer_f32() split_indices = splitpoints.SplitIndices_f32i32(sample_data_step.IndicesBufferId) split_selector = splitpoints.WaitForBestSplitSelector_f32i32( [split_buffers], should_split_criteria, finalizer, split_indices ) matrix_feature_prediction = matrix_features.LinearFloat32MatrixFeature_f32i32( sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA ) estimator_params_updater = classification.ClassEstimatorUpdater_f32i32( sample_data_step.WeightsBufferId, buffers.CLASS_LABELS, number_of_classes ) forest_learner = learn.OnlineForestMatrixClassLearner_f32i32( try_split_criteria, tree_steps_pipeline, init_node_steps_pipeline, update_stats_node_steps_pipeline, update_impurity_node_steps_pipeline, impurity_update_period, split_selector, max_frontier_size, number_of_trees, 5, 5, number_of_classes, sample_data_step.IndicesBufferId, sample_data_step.WeightsBufferId, matrix_feature_prediction, estimator_params_updater, ) return forest_learner
# On the first pass through data learn for each sample counts list_of_sample_counts = eval(args.list_of_sample_counts) clipped_list_of_sample_counts = [ min(s, pixel_labels_buffer.GetN()) for s in list_of_sample_counts ] clipped_list_of_sample_ranges = zip([0] + clipped_list_of_sample_counts[:-1], clipped_list_of_sample_counts) print clipped_list_of_sample_ranges pass_id = 0 for (start_index, end_index) in clipped_list_of_sample_ranges: print start_index print end_index # Slice data datapoint_indices = buffers.as_vector_buffer( np.array(np.arange(start_index, end_index), dtype=np.int32)) sliced_pixel_indices_buffer = pixel_indices_buffer.Slice( datapoint_indices) sliced_offset_scales_buffer = offset_scales_buffer.Slice( datapoint_indices) sliced_pixel_labels_buffer = pixel_labels_buffer.Slice( datapoint_indices) # online_learner.Train(bufferCollection, buffers.Int32Vector(datapoint_indices)) predictor = forest_learner.fit( depth_images=depths_buffer, pixel_indices=sliced_pixel_indices_buffer, offset_scales=sliced_offset_scales_buffer, classes=sliced_pixel_labels_buffer) #pickle forest and data used for training