def create_online_axis_aligned_matrix_two_stream_consistent_learner_32f(
        **kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_features = int(
        kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_splitpoints = int(kwargs.get('number_of_splitpoints', 1))
    number_of_classes = int(np.max(kwargs['classes']) + 1)
    max_frontier_size = int(kwargs.get('max_frontier_size', 10000000))
    impurity_update_period = int(kwargs.get('impurity_update_period', 1))
    probability_of_impurity_stream = float(
        kwargs.get('probability_of_impurity_stream', 0.5))

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)
    elif 'poisson_sample' in kwargs:
        poisson_sample_mean = float(kwargs.get('poisson_sample'))
        sample_data_step = pipeline.PoissonSamplesStep_f32i32(
            buffers.X_FLOAT_DATA, poisson_sample_mean)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)

    assign_stream_step = splitpoints.AssignStreamStep_f32i32(
        sample_data_step.WeightsBufferId, probability_of_impurity_stream)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, assign_stream_step])

    # On init
    set_number_features_step = pipeline.PoissonStep_f32i32(
        number_of_features, 1)
    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)
    init_node_steps_pipeline = pipeline.Pipeline(
        [set_number_features_step, feature_params_step])

    # On update
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(
        assign_stream_step.StreamTypeBufferId,
        sample_data_step.IndicesBufferId)

    random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32(
        matrix_feature_extractor_step.FeatureValuesBufferId,
        number_of_splitpoints, feature_ordering,
        slice_assign_stream_step.SlicedBufferId)

    class_stats_updater = classification.ClassStatsUpdater_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId,
        number_of_classes)

    two_stream_split_stats_step = classification.ClassStatsUpdaterTwoStreamStep_f32i32(
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        slice_assign_stream_step.SlicedBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        class_stats_updater)

    update_stats_node_steps_pipeline = pipeline.Pipeline([
        matrix_feature_extractor_step, slice_classes_step, slice_weights_step,
        slice_assign_stream_step, random_splitpoint_selection_step,
        two_stream_split_stats_step
    ])

    # On impurity
    impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32(
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsImpurityBufferId,
        two_stream_split_stats_step.LeftImpurityStatsBufferId,
        two_stream_split_stats_step.RightImpurityStatsBufferId)

    update_impurity_node_steps_pipeline = pipeline.Pipeline([impurity_step])

    split_buffers = splitpoints.SplitSelectorBuffers(
        impurity_step.ImpurityBufferId,
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsEstimatorBufferId,
        two_stream_split_stats_step.LeftEstimatorStatsBufferId,
        two_stream_split_stats_step.RightEstimatorStatsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = create_should_split_consistent_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.WaitForBestSplitSelector_f32i32(
        [split_buffers], should_split_criteria, finalizer, split_indices)

    matrix_feature_prediction = matrix_features.LinearFloat32MatrixFeature_f32i32(
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    estimator_params_updater = classification.ClassEstimatorUpdater_f32i32(
        sample_data_step.WeightsBufferId, buffers.CLASS_LABELS,
        number_of_classes)
    forest_learner = learn.OnlineForestMatrixClassLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, init_node_steps_pipeline,
        update_stats_node_steps_pipeline, update_impurity_node_steps_pipeline,
        impurity_update_period, split_selector, max_frontier_size,
        number_of_trees, 5, 5, number_of_classes,
        sample_data_step.IndicesBufferId, sample_data_step.WeightsBufferId,
        matrix_feature_prediction, estimator_params_updater)
    return forest_learner
def create_class_pair_difference_matrix_walking_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_features = int(
        kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    number_of_classes = int(np.max(kwargs['classes']) + 1)

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step])

    feature_params_step = matrix_features.ClassPairDifferenceParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA,
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)

    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)

    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    class_infogain_walker = classification.ClassInfoGainWalker_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId,
        number_of_classes)
    best_splitpint_step = classification.ClassInfoGainBestSplitpointsWalkingSortedStep_f32i32(
        class_infogain_walker,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering)
    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, matrix_feature_extractor_step, slice_classes_step,
        slice_weights_step, best_splitpint_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        best_splitpint_step.ImpurityBufferId,
        best_splitpint_step.SplitpointBufferId,
        best_splitpint_step.SplitpointCountsBufferId,
        best_splitpint_step.ChildCountsBufferId,
        best_splitpint_step.LeftYsBufferId,
        best_splitpint_step.RightYsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    if 'tree_order' in kwargs and kwargs.get('tree_order') == 'breadth_first':
        tree_learner = learn.BreadthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
            split_selector)
    else:
        tree_learner = learn.DepthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
            split_selector)

    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 number_of_classes,
                                                 number_of_jobs)
    return forest_learner
def create_axis_aligned_matrix_two_stream_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_features = int(
        kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_splitpoints = int(kwargs.get('number_of_splitpoints', 1))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    number_of_classes = int(np.max(kwargs['classes']) + 1)
    probability_of_impurity_stream = float(
        kwargs.get('probability_of_impurity_stream', 0.5))

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    assign_stream_step = splitpoints.AssignStreamStep_f32i32(
        sample_data_step.WeightsBufferId, probability_of_impurity_stream)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step, assign_stream_step])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_classes_step = pipeline.SliceInt32VectorBufferStep_i32(
        buffers.CLASS_LABELS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    slice_assign_stream_step = pipeline.SliceInt32VectorBufferStep_i32(
        assign_stream_step.StreamTypeBufferId,
        sample_data_step.IndicesBufferId)

    random_splitpoint_selection_step = splitpoints.RandomSplitpointsStep_f32i32(
        matrix_feature_extractor_step.FeatureValuesBufferId,
        number_of_splitpoints, feature_ordering,
        slice_assign_stream_step.SlicedBufferId)

    class_stats_updater = classification.ClassStatsUpdater_f32i32(
        slice_weights_step.SlicedBufferId, slice_classes_step.SlicedBufferId,
        number_of_classes)
    two_stream_split_stats_step = classification.ClassStatsUpdaterTwoStreamStep_f32i32(
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        slice_assign_stream_step.SlicedBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        class_stats_updater)

    impurity_step = classification.ClassInfoGainSplitpointsImpurity_f32i32(
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsImpurityBufferId,
        two_stream_split_stats_step.LeftImpurityStatsBufferId,
        two_stream_split_stats_step.RightImpurityStatsBufferId)

    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, matrix_feature_extractor_step, slice_classes_step,
        slice_weights_step, slice_assign_stream_step,
        random_splitpoint_selection_step, two_stream_split_stats_step,
        impurity_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        impurity_step.ImpurityBufferId,
        random_splitpoint_selection_step.SplitpointsBufferId,
        random_splitpoint_selection_step.SplitpointsCountsBufferId,
        two_stream_split_stats_step.ChildCountsEstimatorBufferId,
        two_stream_split_stats_step.LeftEstimatorStatsBufferId,
        two_stream_split_stats_step.RightEstimatorStatsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = classification.ClassEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    if 'tree_order' in kwargs and kwargs.get('tree_order') == 'breadth_first':
        tree_learner = learn.BreadthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
            split_selector)
    else:
        tree_learner = learn.DepthFirstTreeLearner_f32i32(
            try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
            split_selector)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 number_of_classes,
                                                 number_of_jobs)
    return forest_learner
Beispiel #4
0
def create_regression_axis_aligned_matrix_learner_32f(**kwargs):
    number_of_trees = int(kwargs.get('number_of_trees', 10))
    number_of_leaves = int(
        kwargs.get('number_of_leaves', kwargs['y'].shape[0] / 5 + 1))
    number_of_features = int(
        kwargs.get('number_of_features', (kwargs['x'].shape[1]) / 3 + 0.5))
    # number_of_features = int( kwargs.get('number_of_features', np.sqrt(kwargs['x'].shape[1])))
    feature_ordering = int(
        kwargs.get('feature_ordering', pipeline.FEATURES_BY_DATAPOINTS))
    number_of_jobs = int(kwargs.get('number_of_jobs', 1))
    dimension_of_y = int(kwargs['y'].shape[1])

    try_split_criteria = create_try_split_criteria(**kwargs)

    if 'bootstrap' in kwargs and kwargs.get('bootstrap'):
        sample_data_step = pipeline.BootstrapSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)
    else:
        sample_data_step = pipeline.AllSamplesStep_f32f32i32(
            buffers.X_FLOAT_DATA)

    number_of_features_buffer = buffers.as_vector_buffer(
        np.array([number_of_features], dtype=np.int32))
    set_number_features_step = pipeline.SetInt32VectorBufferStep(
        number_of_features_buffer, pipeline.WHEN_NEW)
    tree_steps_pipeline = pipeline.Pipeline(
        [sample_data_step, set_number_features_step])

    feature_params_step = matrix_features.AxisAlignedParamsStep_f32i32(
        set_number_features_step.OutputBufferId, buffers.X_FLOAT_DATA)
    matrix_feature = matrix_features.LinearFloat32MatrixFeature_f32i32(
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        sample_data_step.IndicesBufferId, buffers.X_FLOAT_DATA)
    matrix_feature_extractor_step = matrix_features.LinearFloat32MatrixFeatureExtractorStep_f32i32(
        matrix_feature, feature_ordering)
    slice_ys_step = pipeline.SliceFloat32MatrixBufferStep_i32(
        buffers.YS, sample_data_step.IndicesBufferId)
    slice_weights_step = pipeline.SliceFloat32VectorBufferStep_i32(
        sample_data_step.WeightsBufferId, sample_data_step.IndicesBufferId)
    impurity_walker = regression.SumOfVarianceWalker_f32i32(
        slice_weights_step.SlicedBufferId, slice_ys_step.SlicedBufferId,
        dimension_of_y)
    best_splitpint_step = regression.SumOfVarianceBestSplitpointsWalkingSortedStep_f32i32(
        impurity_walker, matrix_feature_extractor_step.FeatureValuesBufferId,
        feature_ordering)
    node_steps_pipeline = pipeline.Pipeline([
        feature_params_step, matrix_feature_extractor_step, slice_ys_step,
        slice_weights_step, best_splitpint_step
    ])

    split_buffers = splitpoints.SplitSelectorBuffers(
        best_splitpint_step.ImpurityBufferId,
        best_splitpint_step.SplitpointBufferId,
        best_splitpint_step.SplitpointCountsBufferId,
        best_splitpint_step.ChildCountsBufferId,
        best_splitpint_step.LeftYsBufferId,
        best_splitpint_step.RightYsBufferId,
        feature_params_step.FloatParamsBufferId,
        feature_params_step.IntParamsBufferId,
        matrix_feature_extractor_step.FeatureValuesBufferId, feature_ordering,
        matrix_feature_extractor_step)
    should_split_criteria = create_should_split_criteria(**kwargs)
    finalizer = regression.MeanVarianceEstimatorFinalizer_f32()
    split_indices = splitpoints.SplitIndices_f32i32(
        sample_data_step.IndicesBufferId)
    split_selector = splitpoints.SplitSelector_f32i32([split_buffers],
                                                      should_split_criteria,
                                                      finalizer, split_indices)

    tree_learner = learn.BreadthFirstTreeLearner_f32i32(
        try_split_criteria, tree_steps_pipeline, node_steps_pipeline,
        split_selector, number_of_leaves)
    forest_learner = learn.ParallelForestLearner(tree_learner, number_of_trees,
                                                 dimension_of_y,
                                                 number_of_jobs)
    return forest_learner