コード例 #1
0
def create_axis_aligned_matrix_two_stream_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_features = int(
        kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_splitpoints = int(kwargs.get('number_of_splitpoints', 1))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    number_of_classes = int(np.max(kwargs['classes']) + 1)
    probability_of_impurity_stream = float(
        kwargs.get('probability_of_impurity_stream', 0.5))

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    assign_stream_step = splitpoints.AssignStreamStep_f32i32(
        sample_data_step.WeightsBufferId, probability_of_impurity_stream)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step, assign_stream_step])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(
        assign_stream_step.StreamTypeBufferId,
        sample_data_step.IndicesBufferId)

    random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32(
        matrix_feature_extractor_step.FeatureValuesBufferId,
        number_of_splitpoints, feature_ordering,
        slice_assign_stream_step.SlicedBufferId)

    class_stats_updater = classification.ClassStatsUpdater_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId,
        number_of_classes)
    two_stream_split_stats_step = classification.ClassStatsUpdaterTwoStreamStep_f32i32(
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        slice_assign_stream_step.SlicedBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        class_stats_updater)

    impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32(
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsImpurityBufferId,
        two_stream_split_stats_step.LeftImpurityStatsBufferId,
        two_stream_split_stats_step.RightImpurityStatsBufferId)

    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, matrix_feature_extractor_step, slice_classes_step,
        slice_weights_step, slice_assign_stream_step,
        random_splitpoint_selection_step, two_stream_split_stats_step,
        impurity_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        impurity_step.ImpurityBufferId,
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsEstimatorBufferId,
        two_stream_split_stats_step.LeftEstimatorStatsBufferId,
        two_stream_split_stats_step.RightEstimatorStatsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    if 'tree_order' in kwargs and kwargs.get('tree_order') == 'breadth_first':
        tree_learner = learn.BreadthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
            split_selector)
    else:
        tree_learner = learn.DepthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
            split_selector)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 number_of_classes,
                                                 number_of_jobs)
    return forest_learner
コード例 #2
0
def create_online_axis_aligned_matrix_two_stream_consistent_learner_32f(
        **kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_features = int(
        kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_splitpoints = int(kwargs.get('number_of_splitpoints', 1))
    number_of_classes = int(np.max(kwargs['classes']) + 1)
    max_frontier_size = int(kwargs.get('max_frontier_size', 10000000))
    impurity_update_period = int(kwargs.get('impurity_update_period', 1))
    probability_of_impurity_stream = float(
        kwargs.get('probability_of_impurity_stream', 0.5))

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)
    elif 'poisson_sample' in kwargs:
        poisson_sample_mean = float(kwargs.get('poisson_sample'))
        sample_data_step = pipeline.PoissonSamplesStep_f32i32(
            buffers.X_FLOAT_DATA, poisson_sample_mean)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)

    assign_stream_step = splitpoints.AssignStreamStep_f32i32(
        sample_data_step.WeightsBufferId, probability_of_impurity_stream)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, assign_stream_step])

    # On init
    set_number_features_step = pipeline.PoissonStep_f32i32(
        number_of_features, 1)
    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)
    init_node_steps_pipeline = pipeline.Pipeline(
        [set_number_features_step, feature_params_step])

    # On update
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(
        assign_stream_step.StreamTypeBufferId,
        sample_data_step.IndicesBufferId)

    random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32(
        matrix_feature_extractor_step.FeatureValuesBufferId,
        number_of_splitpoints, feature_ordering,
        slice_assign_stream_step.SlicedBufferId)

    class_stats_updater = classification.ClassStatsUpdater_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId,
        number_of_classes)

    two_stream_split_stats_step = classification.ClassStatsUpdaterTwoStreamStep_f32i32(
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        slice_assign_stream_step.SlicedBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        class_stats_updater)

    update_stats_node_steps_pipeline = pipeline.Pipeline([
        matrix_feature_extractor_step, slice_classes_step, slice_weights_step,
        slice_assign_stream_step, random_splitpoint_selection_step,
        two_stream_split_stats_step
    ])

    # On impurity
    impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32(
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsImpurityBufferId,
        two_stream_split_stats_step.LeftImpurityStatsBufferId,
        two_stream_split_stats_step.RightImpurityStatsBufferId)

    update_impurity_node_steps_pipeline = pipeline.Pipeline([impurity_step])

    split_buffers = splitpoints.SplitSelectorBuffers(
        impurity_step.ImpurityBufferId,
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsEstimatorBufferId,
        two_stream_split_stats_step.LeftEstimatorStatsBufferId,
        two_stream_split_stats_step.RightEstimatorStatsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = create_should_split_consistent_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.WaitForBestSplitSelector_f32i32(
        [split_buffers], should_split_criteria, finalizer, split_indices)

    matrix_feature_prediction = matrix_features.LinearFloat32MatrixFeature_f32i32(
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    estimator_params_updater = classification.ClassEstimatorUpdater_f32i32(
        sample_data_step.WeightsBufferId, buffers.CLASS_LABELS,
        number_of_classes)
    forest_learner = learn.OnlineForestMatrixClassLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, init_node_steps_pipeline,
        update_stats_node_steps_pipeline, update_impurity_node_steps_pipeline,
        impurity_update_period, split_selector, max_frontier_size,
        number_of_trees, 5, 5, number_of_classes,
        sample_data_step.IndicesBufferId, sample_data_step.WeightsBufferId,
        matrix_feature_prediction, estimator_params_updater)
    return forest_learner
コード例 #3
0
def create_scaled_depth_delta_learner_32f(**kwargs):
    ux = float(kwargs.get('ux'))
    uy = float(kwargs.get('uy'))
    vx = float(kwargs.get('vx'))
    vy = float(kwargs.get('vy'))

    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_features = int(kwargs.get('number_of_features', 1))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    number_of_classes = int(kwargs['classes'].GetMax() + 1)

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_i32f32i32(
            buffers.PIXEL_INDICES)
    else:
        sample_data_step = pipeline.AllSamplesStep_i32f32i32(
            buffers.PIXEL_INDICES)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step])

    feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(
        set_number_features_step.OutputBufferId, ux, uy, vx, vy)
    depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES,
        buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES)
    depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(
        depth_delta_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    class_infogain_walker = classification.ClassInfoGainWalker_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId,
        number_of_classes)
    best_splitpint_step = classification.ClassInfoGainBestSplitpointsWalkingSortedStep_f32i32(
        class_infogain_walker,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering)
    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, depth_delta_feature_extractor_step,
        slice_classes_step, slice_weights_step, best_splitpint_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        best_splitpint_step.ImpurityBufferId,
        best_splitpint_step.SplitpointBufferId,
        best_splitpint_step.SplitpointCountsBufferId,
        best_splitpint_step.ChildCountsBufferId,
        best_splitpint_step.LeftYsBufferId,
        best_splitpint_step.RightYsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering, depth_delta_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    tree_learner = learn.DepthFirstTreeLearner_f32i32(try_split_criteria,
                                                      tree_steps_pipeline,
                                                      node_steps_pipeline,
                                                      split_selector)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 number_of_classes,
                                                 number_of_jobs)
    return forest_learner
コード例 #4
0
def create_class_pair_difference_matrix_walking_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_features = int(
        kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    number_of_classes = int(np.max(kwargs['classes']) + 1)

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step])

    feature_params_step = matrix_features.ClassPairDifferenceParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA,
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)

    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)

    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    class_infogain_walker = classification.ClassInfoGainWalker_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId,
        number_of_classes)
    best_splitpint_step = classification.ClassInfoGainBestSplitpointsWalkingSortedStep_f32i32(
        class_infogain_walker,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering)
    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, matrix_feature_extractor_step, slice_classes_step,
        slice_weights_step, best_splitpint_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        best_splitpint_step.ImpurityBufferId,
        best_splitpint_step.SplitpointBufferId,
        best_splitpint_step.SplitpointCountsBufferId,
        best_splitpint_step.ChildCountsBufferId,
        best_splitpint_step.LeftYsBufferId,
        best_splitpint_step.RightYsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    if 'tree_order' in kwargs and kwargs.get('tree_order') == 'breadth_first':
        tree_learner = learn.BreadthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
            split_selector)
    else:
        tree_learner = learn.DepthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
            split_selector)

    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 number_of_classes,
                                                 number_of_jobs)
    return forest_learner
コード例 #5
0
def create_consistent_two_stream_regression_scaled_depth_delta_learner_32f(
        **kwargs):
    ux = float(kwargs.get('ux'))
    uy = float(kwargs.get('uy'))
    vx = float(kwargs.get('vx'))
    vy = float(kwargs.get('vy'))

    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_leaves = int(
        kwargs.get('number_of_leaves', kwargs['y'].GetM() / 5 + 1))
    number_of_features = int(kwargs.get('number_of_features', 1))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    dimension_of_y = int(kwargs['y'].GetN())
    probability_of_impurity_stream = float(
        kwargs.get('probability_of_impurity_stream', 0.5))
    in_bounds_number_of_points = int(
        kwargs.get('in_bounds_number_of_points', kwargs['y'].GetM() / 2))

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_i32f32i32(
            buffers.PIXEL_INDICES)
    else:
        sample_data_step = pipeline.AllSamplesStep_i32f32i32(
            buffers.PIXEL_INDICES)

    assign_stream_step = splitpoints.AssignStreamStep_f32i32(
        sample_data_step.WeightsBufferId, probability_of_impurity_stream)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, assign_stream_step])

    # if 'poisson_number_of_features' in kwargs and kwargs.get('poisson_number_of_features'):
    #     set_number_features_step = pipeline.PoissonStep_f32i32(number_of_features, 1)
    # else:
    #     number_of_features_buffer = buffers.as_vector_buffer(np.array([number_of_features], dtype=np.int32))
    #     set_number_features_step = pipeline.SetInt32VectorBufferStep(number_of_features_buffer, pipeline.WHEN_NEW)
    set_number_features_step = pipeline.PoissonStep_f32i32(
        number_of_features, 1)
    feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(
        set_number_features_step.OutputBufferId, ux, uy, vx, vy)
    depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES,
        buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES)
    depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(
        depth_delta_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(
        buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    slice_stream_step = pipeline.SliceInt32VectorBufferStep_i32(
        assign_stream_step.StreamTypeBufferId,
        sample_data_step.IndicesBufferId)

    impurity_walker = regression.SumOfVarianceTwoStreamWalker_f32i32(
        slice_weights_step.SlicedBufferId, slice_stream_step.SlicedBufferId,
        slice_ys_step.SlicedBufferId, dimension_of_y)

    best_splitpint_step = regression.SumOfVarianceTwoStreamBestSplitpointsWalkingSortedStep_f32i32(
        impurity_walker, slice_stream_step.SlicedBufferId,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering, in_bounds_number_of_points)

    node_steps_pipeline = pipeline.Pipeline([
        set_number_features_step, feature_params_step,
        depth_delta_feature_extractor_step, slice_ys_step, slice_weights_step,
        slice_stream_step, best_splitpint_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        best_splitpint_step.ImpurityBufferId,
        best_splitpint_step.SplitpointBufferId,
        best_splitpint_step.SplitpointCountsBufferId,
        best_splitpint_step.ChildCountsEstimationBufferId,
        best_splitpint_step.LeftEstimationYsBufferId,
        best_splitpint_step.RightEstimationYsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering, depth_delta_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    tree_learner = learn.BreadthFirstTreeLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
        split_selector, number_of_leaves)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 dimension_of_y,
                                                 number_of_jobs)
    return forest_learner
コード例 #6
0
def create_biau2012_regression_scaled_depth_delta_learner_32f(**kwargs):
    ux = float(kwargs.get('ux'))
    uy = float(kwargs.get('uy'))
    vx = float(kwargs.get('vx'))
    vy = float(kwargs.get('vy'))

    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_leaves = int(
        kwargs.get('number_of_leaves', kwargs['y'].GetM() / 5 + 1))
    number_of_features = int(kwargs.get('number_of_features', 1))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    dimension_of_y = int(kwargs['y'].GetN())
    probability_of_impurity_stream = float(
        kwargs.get('probability_of_impurity_stream', 0.5))

    try_split_criteria = create_try_split_criteria(**kwargs)

    sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    feature_range_buffer = buffers.as_vector_buffer(
        np.array([-6, 6], dtype=np.float32))
    set_feature_range_buffer_step = pipeline.SetFloat32VectorBufferStep(
        feature_range_buffer, pipeline.WHEN_NEW)

    assign_stream_step = splitpoints.AssignStreamStep_f32i32(
        sample_data_step.WeightsBufferId, probability_of_impurity_stream,
        False)
    forest_steps_pipeline = pipeline.Pipeline([
        sample_data_step, set_number_features_step,
        set_feature_range_buffer_step, assign_stream_step
    ])
    tree_steps_pipeline = pipeline.Pipeline([])

    feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(
        set_number_features_step.OutputBufferId, ux, uy, vx, vy)
    depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES,
        buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES)
    depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(
        depth_delta_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(
        buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(
        assign_stream_step.StreamTypeBufferId,
        sample_data_step.IndicesBufferId)

    quantized_feature_equal = pipeline.FeatureEqualQuantized_f32i32(1.0)

    midpoint_step = splitpoints.RangeMidpointStep_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal)

    mean_variance_stats_updater = regression.MeanVarianceStatsUpdater_f32i32(
        slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId,
        dimension_of_y)

    two_stream_split_stats_step = regression.SumOfVarianceTwoStreamStep_f32i32(
        midpoint_step.SplitpointsBufferId,
        midpoint_step.SplitpointsCountsBufferId,
        slice_assign_stream_step.SlicedBufferId,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering, mean_variance_stats_updater)

    impurity_step = regression.SumOfVarianceSplitpointsImpurity_f32i32(
        midpoint_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsImpurityBufferId,
        two_stream_split_stats_step.LeftImpurityStatsBufferId,
        two_stream_split_stats_step.RightImpurityStatsBufferId)

    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, depth_delta_feature_extractor_step, slice_ys_step,
        slice_weights_step, slice_assign_stream_step, midpoint_step,
        two_stream_split_stats_step, impurity_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        impurity_step.ImpurityBufferId, midpoint_step.SplitpointsBufferId,
        midpoint_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsEstimatorBufferId,
        two_stream_split_stats_step.LeftEstimatorStatsBufferId,
        two_stream_split_stats_step.RightEstimatorStatsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering, depth_delta_feature_extractor_step)
    should_split_criteria = no_split_criteria(**kwargs)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_midpoint_ranges = splitpoints.SplitBuffersFeatureRange_f32i32(
        midpoint_step.PastFloatParamsBufferId,
        midpoint_step.PastIntParamsBufferId, midpoint_step.PastRangesBufferId,
        set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal)
    split_steps = splitpoints.SplitBuffersList(
        [split_indices, split_midpoint_ranges])

    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_steps)

    tree_learner = learn.BreadthFirstTreeLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
        split_selector, number_of_leaves)
    forest_learner = learn.ParallelForestLearner(tree_learner,
                                                 forest_steps_pipeline,
                                                 number_of_trees,
                                                 dimension_of_y,
                                                 number_of_jobs)
    return forest_learner
コード例 #7
0
def create_biau2008_regression_scaled_depth_delta_learner_32f(**kwargs):
    ux = float(kwargs.get('ux'))
    uy = float(kwargs.get('uy'))
    vx = float(kwargs.get('vx'))
    vy = float(kwargs.get('vy'))

    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_leaves = int(
        kwargs.get('number_of_leaves', kwargs['y'].GetM() / 5 + 1))
    number_of_split_retries = int(kwargs.get('number_of_split_retries', 10))
    number_of_features = 1
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    dimension_of_y = int(kwargs['y'].GetN())

    try_split_criteria = try_split.MinNodeSizeCriteria(2)

    sample_data_step = pipeline.AllSamplesStep_i32f32i32(buffers.PIXEL_INDICES)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step])

    feature_params_step = image_features.PixelPairGaussianOffsetsStep_f32i32(
        set_number_features_step.OutputBufferId, ux, uy, vx, vy)
    depth_delta_feature = image_features.ScaledDepthDeltaFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.PIXEL_INDICES,
        buffers.DEPTH_IMAGES, buffers.OFFSET_SCALES)
    depth_delta_feature_extractor_step = image_features.ScaledDepthDeltaFeatureExtractorStep_f32i32(
        depth_delta_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(
        buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)

    impurity_walker = regression.SumOfVarianceWalker_f32i32(
        slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId,
        dimension_of_y)

    best_splitpint_step = regression.SumOfVarianceRandomGapSplitpointsStep_f32i32(
        impurity_walker,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering)

    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, depth_delta_feature_extractor_step, slice_ys_step,
        slice_weights_step, best_splitpint_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        best_splitpint_step.ImpurityBufferId,
        best_splitpint_step.SplitpointBufferId,
        best_splitpint_step.SplitpointCountsBufferId,
        best_splitpint_step.ChildCountsBufferId,
        best_splitpint_step.LeftYsBufferId,
        best_splitpint_step.RightYsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        depth_delta_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering, depth_delta_feature_extractor_step)
    should_split_criteria = min_child_size_criteria = should_split.MinChildSizeCriteria(
        1)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    tree_learner = learn.Biau2008TreeLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
        split_selector, number_of_leaves, number_of_split_retries)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 dimension_of_y,
                                                 number_of_jobs)
    return forest_learner
コード例 #8
0
def create_regression_axis_aligned_matrix_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_leaves = int(
        kwargs.get('number_of_leaves', kwargs['y'].shape[0] / 5 + 1))
    number_of_features = int(
        kwargs.get('number_of_features', (kwargs['x'].shape[1]) / 3 + 0.5))
    # number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    dimension_of_y = int(kwargs['y'].shape[1])

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(
        buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    impurity_walker = regression.SumOfVarianceWalker_f32i32(
        slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId,
        dimension_of_y)
    best_splitpint_step = regression.SumOfVarianceBestSplitpointsWalkingSortedStep_f32i32(
        impurity_walker, matrix_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering)
    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, matrix_feature_extractor_step, slice_ys_step,
        slice_weights_step, best_splitpint_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        best_splitpint_step.ImpurityBufferId,
        best_splitpint_step.SplitpointBufferId,
        best_splitpint_step.SplitpointCountsBufferId,
        best_splitpint_step.ChildCountsBufferId,
        best_splitpint_step.LeftYsBufferId,
        best_splitpint_step.RightYsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    tree_learner = learn.BreadthFirstTreeLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
        split_selector, number_of_leaves)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 dimension_of_y,
                                                 number_of_jobs)
    return forest_learner
コード例 #9
0
def create_biau2012_regression_axis_aligned_matrix_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_leaves = int(
        kwargs.get('number_of_leaves', kwargs['y'].shape[0] / 5 + 1))
    number_of_features = int(
        kwargs.get('number_of_features', (kwargs['x'].shape[1]) / 3 + 0.5))
    # number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    dimension_of_y = int(kwargs['y'].shape[1])
    probability_of_impurity_stream = float(
        kwargs.get('probability_of_impurity_stream', 0.5))

    try_split_criteria = create_try_split_criteria(**kwargs)

    sample_data_step = pipeline.AllSamplesStep_f32f32i32(buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    assert (
        np.max(np.abs(kwargs['x'])) <= 1.00
    )  # double check that the data has been scaled into a -1,1 hypercube
    feature_range_buffer = buffers.as_vector_buffer(
        np.array([-1, 1], dtype=np.float32))
    set_feature_range_buffer_step = pipeline.SetFloat32VectorBufferStep(
        feature_range_buffer, pipeline.WHEN_NEW)

    assign_stream_step = splitpoints.AssignStreamStep_f32i32(
        sample_data_step.WeightsBufferId, probability_of_impurity_stream,
        False)
    forest_steps_pipeline = pipeline.Pipeline([
        sample_data_step, set_number_features_step,
        set_feature_range_buffer_step, assign_stream_step
    ])
    tree_steps_pipeline = pipeline.Pipeline([])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(
        buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(
        assign_stream_step.StreamTypeBufferId,
        sample_data_step.IndicesBufferId)

    quantized_feature_equal = pipeline.FeatureEqualQuantized_f32i32(1.0)

    midpoint_step = splitpoints.RangeMidpointStep_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal)

    mean_variance_stats_updater = regression.MeanVarianceStatsUpdater_f32i32(
        slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId,
        dimension_of_y)

    two_stream_split_stats_step = regression.SumOfVarianceTwoStreamStep_f32i32(
        midpoint_step.SplitpointsBufferId,
        midpoint_step.SplitpointsCountsBufferId,
        slice_assign_stream_step.SlicedBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        mean_variance_stats_updater)

    impurity_step = regression.SumOfVarianceSplitpointsImpurity_f32i32(
        midpoint_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsImpurityBufferId,
        two_stream_split_stats_step.LeftImpurityStatsBufferId,
        two_stream_split_stats_step.RightImpurityStatsBufferId)

    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, matrix_feature_extractor_step, slice_ys_step,
        slice_weights_step, slice_assign_stream_step, midpoint_step,
        two_stream_split_stats_step, impurity_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        impurity_step.ImpurityBufferId, midpoint_step.SplitpointsBufferId,
        midpoint_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsEstimatorBufferId,
        two_stream_split_stats_step.LeftEstimatorStatsBufferId,
        two_stream_split_stats_step.RightEstimatorStatsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = no_split_criteria(**kwargs)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_midpoint_ranges = splitpoints.SplitBuffersFeatureRange_f32i32(
        midpoint_step.PastFloatParamsBufferId,
        midpoint_step.PastIntParamsBufferId, midpoint_step.PastRangesBufferId,
        set_feature_range_buffer_step.OutputBufferId, quantized_feature_equal)
    split_steps = splitpoints.SplitBuffersList(
        [split_indices, split_midpoint_ranges])

    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_steps)

    tree_learner = learn.BreadthFirstTreeLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
        split_selector, number_of_leaves)
    forest_learner = learn.ParallelForestLearner(tree_learner,
                                                 forest_steps_pipeline,
                                                 number_of_trees,
                                                 dimension_of_y,
                                                 number_of_jobs)
    return forest_learner