コード例 #1
0
def check_evaluation_pairs(class_probability_matrix, observed_labels):
    """Checks evaluation pairs for errors.

    P = number of evaluation pairs
    K = number of classes

    :param class_probability_matrix: P-by-K numpy array of floats.
        class_probability_matrix[i, k] is the predicted probability that the
        [i]th example belongs to the [k]th class.
    :param observed_labels: length-P numpy array of integers.  If
        observed_labels[i] = k, the [i]th example truly belongs to the [k]th
        class.
    """

    error_checking.assert_is_numpy_array(
        class_probability_matrix, num_dimensions=2)
    error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.)
    error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.)

    num_evaluation_pairs = class_probability_matrix.shape[0]
    num_classes = class_probability_matrix.shape[1]

    error_checking.assert_is_numpy_array(
        observed_labels, exact_dimensions=numpy.array([num_evaluation_pairs]))
    error_checking.assert_is_integer_numpy_array(observed_labels)
    error_checking.assert_is_geq_numpy_array(observed_labels, 0)
    error_checking.assert_is_less_than_numpy_array(observed_labels, num_classes)
コード例 #2
0
def extract_radar_grid_points(field_matrix, row_indices, column_indices):
    """Extracts grid points from radar field.

    M = number of rows (unique grid-point latitudes)
    N = number of columns (unique grid-point longitudes)
    P = number of points to extract

    :param field_matrix: M-by-N numpy array with values of a single radar field.
    :param row_indices: length-P numpy array with row indices of points to
        extract.
    :param column_indices: length-P numpy array with column indices of points to
        extract.
    :return: extracted_values: length-P numpy array of values extracted from
        field_matrix.
    """

    error_checking.assert_is_real_numpy_array(field_matrix)
    error_checking.assert_is_numpy_array(field_matrix, num_dimensions=2)
    num_grid_rows = field_matrix.shape[0]
    num_grid_columns = field_matrix.shape[1]

    error_checking.assert_is_integer_numpy_array(row_indices)
    error_checking.assert_is_geq_numpy_array(row_indices, 0)
    error_checking.assert_is_less_than_numpy_array(row_indices, num_grid_rows)

    error_checking.assert_is_integer_numpy_array(column_indices)
    error_checking.assert_is_geq_numpy_array(column_indices, 0)
    error_checking.assert_is_less_than_numpy_array(column_indices,
                                                   num_grid_columns)

    return field_matrix[row_indices, column_indices]
コード例 #3
0
def _check_evaluation_pairs(class_probability_matrix, observed_labels):
    """Checks evaluation pairs for errors.

    P = number of evaluation pairs
    K = number of classes

    :param class_probability_matrix: P-by-K numpy array of floats.
        class_probability_matrix[i, k] is the predicted probability that the
        [i]th example belongs to the [k]th class.
    :param observed_labels: length-P numpy array of integers.  If
        observed_labels[i] = k, the [i]th example truly belongs to the [k]th
        class.
    """

    # TODO(thunderhoser): This method is duplicated from evaluation_utils.py.  I
    # can't just import evaluation_utils.py, because this leads to a circular
    # import chain.  The answer is to put this method somewhere more general.

    error_checking.assert_is_numpy_array(class_probability_matrix,
                                         num_dimensions=2)
    error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.)
    error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.)

    num_evaluation_pairs = class_probability_matrix.shape[0]
    num_classes = class_probability_matrix.shape[1]

    error_checking.assert_is_numpy_array(observed_labels,
                                         exact_dimensions=numpy.array(
                                             [num_evaluation_pairs]))
    error_checking.assert_is_integer_numpy_array(observed_labels)
    error_checking.assert_is_geq_numpy_array(observed_labels, 0)
    error_checking.assert_is_less_than_numpy_array(observed_labels,
                                                   num_classes)
コード例 #4
0
    def test_assert_is_negative_numpy_array_true_with_nan_allowed(self):
        """Checks assert_is_less_than_numpy_array; base_value = 0, inputs < 0.

        In this case, input array contains NaN's and allow_nan = True.
        """

        error_checking.assert_is_less_than_numpy_array(
            NEGATIVE_NUMPY_ARRAY_WITH_NANS, 0, allow_nan=True)
コード例 #5
0
def get_events_in_hours(desired_hours,
                        verbose,
                        event_hours=None,
                        event_times_unix_sec=None):
    """Finds events in desired hours.

    If `event_hours is None`, `event_times_unix_sec` will be used.

    :param desired_hours: 1-D numpy array of desired hours (range 0...23).
    :param verbose: Boolean flag.  If True, will print messages to command
        window.
    :param event_hours: 1-D numpy array of event hours (range 0...23).
    :param event_times_unix_sec: 1-D numpy array of event times.
    :return: desired_event_indices: 1-D numpy array with indices of events in
        desired hours.
    """

    if event_hours is None:
        error_checking.assert_is_numpy_array(event_times_unix_sec,
                                             num_dimensions=1)

        event_hours = numpy.array([
            int(time_conversion.unix_sec_to_string(t, '%H'))
            for t in event_times_unix_sec
        ],
                                  dtype=int)

    error_checking.assert_is_integer_numpy_array(event_hours)
    error_checking.assert_is_numpy_array(event_hours, num_dimensions=1)
    error_checking.assert_is_geq_numpy_array(event_hours, 0)
    error_checking.assert_is_less_than_numpy_array(event_hours,
                                                   NUM_HOURS_IN_DAY)

    error_checking.assert_is_integer_numpy_array(desired_hours)
    error_checking.assert_is_numpy_array(desired_hours, num_dimensions=1)
    error_checking.assert_is_geq_numpy_array(desired_hours, 0)
    error_checking.assert_is_less_than_numpy_array(desired_hours,
                                                   NUM_HOURS_IN_DAY)

    error_checking.assert_is_boolean(verbose)

    desired_event_flags = numpy.array(
        [m in desired_hours for m in event_hours], dtype=bool)
    desired_event_indices = numpy.where(desired_event_flags)[0]

    if not verbose:
        return desired_event_indices, event_hours

    print('{0:d} of {1:d} events are in hours {2:s}!'.format(
        len(desired_event_indices), len(event_hours), str(desired_hours)))

    return desired_event_indices, event_hours
コード例 #6
0
def check_target_array(target_array, num_dimensions, num_classes):
    """Error-checks target values.

    :param target_array: numpy array in one of two formats.
    [1] length-E integer numpy array of target values.  All values are -2
        ("dead storm") or 0...[K - 1], where K = number of classes.
    [2] E-by-K numpy array, where each value is 0 or 1.  If target_array[i, k] =
        1, the [i]th storm object belongs to the [k]th class.  Classes are
        mutually exclusive and collectively exhaustive, so the sum across each
        row of the matrix is 1.

    :param num_dimensions: Number of dimensions expected in `target_array`.
    :param num_classes: Number of classes that should be represented in
        `target_array`.
    """

    error_checking.assert_is_integer(num_dimensions)
    error_checking.assert_is_geq(num_dimensions, 1)
    error_checking.assert_is_leq(num_dimensions, 2)
    error_checking.assert_is_integer(num_classes)
    error_checking.assert_is_geq(num_classes, 2)

    num_examples = target_array.shape[0]

    if num_dimensions == 1:
        error_checking.assert_is_integer_numpy_array(target_array)

        these_expected_dim = numpy.array([num_examples], dtype=int)
        error_checking.assert_is_numpy_array(
            target_array, exact_dimensions=these_expected_dim)

        # TODO(thunderhoser): This is a HACK.  Should do better input-checking.

        # live_storm_object_indices = numpy.where(
        #     target_array != target_val_utils.DEAD_STORM_INTEGER
        # )[0]
        # error_checking.assert_is_geq_numpy_array(
        #     target_array[live_storm_object_indices], 0
        # )

        error_checking.assert_is_geq_numpy_array(
            target_array, target_val_utils.DEAD_STORM_INTEGER)
        error_checking.assert_is_less_than_numpy_array(target_array,
                                                       num_classes)
    else:
        error_checking.assert_is_geq_numpy_array(target_array, 0)
        error_checking.assert_is_leq_numpy_array(target_array, 1)

        these_expected_dim = numpy.array([num_examples, num_classes],
                                         dtype=int)
        error_checking.assert_is_numpy_array(
            target_array, exact_dimensions=these_expected_dim)
コード例 #7
0
    def test_get_random_sample_points_full_size(self):
        """Ensures correct output from _get_random_sample_points.

        In this case, for_downsized_examples = False.
        """

        (these_row_indices,
         these_column_indices) = evaluation_utils._get_random_sample_points(
             num_points=NUM_POINTS_TO_SAMPLE, for_downsized_examples=False)

        error_checking.assert_is_integer_numpy_array(these_row_indices)
        error_checking.assert_is_geq_numpy_array(these_row_indices, 0)
        error_checking.assert_is_less_than_numpy_array(these_row_indices,
                                                       NUM_ROWS_FOR_FCN_INPUT)

        error_checking.assert_is_integer_numpy_array(these_column_indices)
        error_checking.assert_is_geq_numpy_array(these_column_indices, 0)
        error_checking.assert_is_less_than_numpy_array(
            these_column_indices, NUM_COLUMNS_FOR_FCN_INPUT)
コード例 #8
0
def _check_polygons(polygon_objects_grid_coords, num_panel_rows,
                    num_panel_columns, panel_row_by_polygon,
                    panel_column_by_polygon):
    """Error-checks list of polygons.

    :param polygon_objects_grid_coords: See doc for
        `polygons_from_pixel_to_grid_coords`.
    :param num_panel_rows: Same.
    :param num_panel_columns: Same.
    :param panel_row_by_polygon: Same.
    :param panel_column_by_polygon: Same.
    """

    error_checking.assert_is_integer(num_panel_rows)
    error_checking.assert_is_greater(num_panel_rows, 0)
    error_checking.assert_is_integer(num_panel_columns)
    error_checking.assert_is_greater(num_panel_columns, 0)

    num_polygons = len(polygon_objects_grid_coords)
    if num_polygons == 0:
        return

    error_checking.assert_is_numpy_array(numpy.array(
        polygon_objects_grid_coords, dtype=object),
                                         num_dimensions=1)

    these_expected_dim = numpy.array([num_polygons], dtype=int)

    error_checking.assert_is_integer_numpy_array(panel_row_by_polygon)
    error_checking.assert_is_numpy_array(panel_row_by_polygon,
                                         exact_dimensions=these_expected_dim)
    error_checking.assert_is_geq_numpy_array(panel_row_by_polygon, 0)
    error_checking.assert_is_less_than_numpy_array(panel_row_by_polygon,
                                                   num_panel_rows)

    error_checking.assert_is_integer_numpy_array(panel_column_by_polygon)
    error_checking.assert_is_numpy_array(panel_column_by_polygon,
                                         exact_dimensions=these_expected_dim)
    error_checking.assert_is_geq_numpy_array(panel_column_by_polygon, 0)
    error_checking.assert_is_less_than_numpy_array(panel_column_by_polygon,
                                                   num_panel_columns)
コード例 #9
0
def get_contingency_table(predicted_labels, observed_labels, num_classes):
    """Creates either binary or multi-class contingency table.

    P = number of evaluation pairs
    K = number of classes

    :param predicted_labels: length-P numpy array of predicted class labels
        (integers).
    :param observed_labels: length-P numpy array of true class labels
        (integers).
    :param num_classes: Number of classes.
    :return: contingency_table_as_matrix: K-by-K numpy array.
        contingency_table_as_matrix[i, j] is the number of examples for which
        the predicted label is i and the true label is j.
    """

    error_checking.assert_is_integer(num_classes)
    error_checking.assert_is_greater(num_classes, 2)

    error_checking.assert_is_numpy_array(predicted_labels, num_dimensions=1)
    error_checking.assert_is_integer_numpy_array(predicted_labels)
    error_checking.assert_is_geq_numpy_array(predicted_labels, 0)
    error_checking.assert_is_less_than_numpy_array(
        predicted_labels, num_classes)

    num_evaluation_pairs = len(predicted_labels)
    error_checking.assert_is_numpy_array(
        observed_labels, exact_dimensions=numpy.array([num_evaluation_pairs]))
    error_checking.assert_is_integer_numpy_array(observed_labels)
    error_checking.assert_is_geq_numpy_array(observed_labels, 0)
    error_checking.assert_is_less_than_numpy_array(observed_labels, num_classes)

    contingency_table_as_matrix = numpy.full(
        (num_classes, num_classes), -1, dtype=int)

    for i in range(num_classes):
        for j in range(num_classes):
            contingency_table_as_matrix[i, j] = numpy.sum(
                numpy.logical_and(predicted_labels == i, observed_labels == j))

    return contingency_table_as_matrix
コード例 #10
0
    def test_get_random_sample_points_downsized_no_mask(self):
        """Ensures correct output from _get_random_sample_points.

        In this case,
        `for_downsized_examples = True and narr_mask_matrix is None`.
        """

        (these_row_indices,
         these_column_indices) = evaluation_utils._get_random_sample_points(
             num_points=NUM_POINTS_TO_SAMPLE,
             for_downsized_examples=True,
             narr_mask_matrix=None)

        error_checking.assert_is_integer_numpy_array(these_row_indices)
        error_checking.assert_is_geq_numpy_array(these_row_indices, 0)
        error_checking.assert_is_less_than_numpy_array(
            these_row_indices, NARR_MASK_MATRIX.shape[0])

        error_checking.assert_is_integer_numpy_array(these_column_indices)
        error_checking.assert_is_geq_numpy_array(these_column_indices, 0)
        error_checking.assert_is_less_than_numpy_array(
            these_column_indices, NARR_MASK_MATRIX.shape[1])
コード例 #11
0
def subset_by_index(example_dict, desired_indices):
    """Subsets examples by index.

    :param example_dict: See doc for `example_io.read_file`.
    :param desired_indices: 1-D numpy array of desired indices.
    :return: example_dict: Same as input but with fewer examples.
    """

    error_checking.assert_is_numpy_array(desired_indices, num_dimensions=1)
    error_checking.assert_is_integer_numpy_array(desired_indices)
    error_checking.assert_is_geq_numpy_array(desired_indices, 0)
    error_checking.assert_is_less_than_numpy_array(
        desired_indices, len(example_dict[VALID_TIMES_KEY]))

    for this_key in ONE_PER_EXAMPLE_KEYS:
        if isinstance(example_dict[this_key], list):
            example_dict[this_key] = [
                example_dict[this_key][k] for k in desired_indices
            ]
        else:
            example_dict[this_key] = (example_dict[this_key][desired_indices,
                                                             ...])

    return example_dict
コード例 #12
0
def _run(example_file_name, example_indices, num_radar_rows, num_radar_columns,
         normalization_file_name, output_dir_name):
    """Plots data augmentation.

    This is effectively the main method.

    :param example_file_name: See documentation at top of file.
    :param example_indices: Same.
    :param num_radar_rows: Same.
    :param num_radar_columns: Same.
    :param normalization_file_name: Same.
    :param output_dir_name: Same.
    """

    if num_radar_rows <= 0:
        num_radar_rows = None
    if num_radar_columns <= 0:
        num_radar_columns = None

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    print('Reading data from: "{0:s}"...'.format(example_file_name))
    example_dict = input_examples.read_example_file(
        netcdf_file_name=example_file_name,
        read_all_target_vars=True,
        include_soundings=False,
        num_rows_to_keep=num_radar_rows,
        num_columns_to_keep=num_radar_columns,
        radar_field_names_to_keep=[RADAR_FIELD_NAME],
        radar_heights_to_keep_m_agl=numpy.array([RADAR_HEIGHT_M_AGL],
                                                dtype=int))

    if input_examples.REFL_IMAGE_MATRIX_KEY in example_dict:
        radar_matrix = example_dict[input_examples.REFL_IMAGE_MATRIX_KEY]
    else:
        radar_matrix = example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY]

    num_examples_total = radar_matrix.shape[0]
    error_checking.assert_is_geq_numpy_array(example_indices, 0)
    error_checking.assert_is_less_than_numpy_array(example_indices,
                                                   num_examples_total)

    radar_matrix = radar_matrix[example_indices, ...]
    full_storm_id_strings = [
        example_dict[input_examples.FULL_IDS_KEY][k] for k in example_indices
    ]
    storm_times_unix_sec = example_dict[
        input_examples.STORM_TIMES_KEY][example_indices]

    radar_matrix = dl_utils.normalize_radar_images(
        radar_image_matrix=radar_matrix,
        field_names=[RADAR_FIELD_NAME],
        normalization_type_string=NORMALIZATION_TYPE_STRING,
        normalization_param_file_name=normalization_file_name)

    num_examples = radar_matrix.shape[0]
    dummy_target_values = numpy.full(num_examples, 0, dtype=int)

    radar_matrix = trainval_io._augment_radar_images(
        list_of_predictor_matrices=[radar_matrix],
        target_array=dummy_target_values,
        x_translations_pixels=X_TRANSLATIONS_PX,
        y_translations_pixels=Y_TRANSLATIONS_PX,
        ccw_rotation_angles_deg=CCW_ROTATION_ANGLES_DEG,
        noise_standard_deviation=NOISE_STANDARD_DEVIATION,
        num_noisings=1,
        flip_in_x=False,
        flip_in_y=False)[0][0]

    radar_matrix = dl_utils.denormalize_radar_images(
        radar_image_matrix=radar_matrix,
        field_names=[RADAR_FIELD_NAME],
        normalization_type_string=NORMALIZATION_TYPE_STRING,
        normalization_param_file_name=normalization_file_name)

    orig_radar_matrix = radar_matrix[:num_examples, ...]
    radar_matrix = radar_matrix[num_examples:, ...]
    translated_radar_matrix = radar_matrix[:num_examples, ...]
    radar_matrix = radar_matrix[num_examples:, ...]
    rotated_radar_matrix = radar_matrix[:num_examples, ...]
    noised_radar_matrix = radar_matrix[num_examples:, ...]

    for i in range(num_examples):
        _plot_one_example(orig_radar_matrix=orig_radar_matrix[i, ...],
                          translated_radar_matrix=translated_radar_matrix[i,
                                                                          ...],
                          rotated_radar_matrix=rotated_radar_matrix[i, ...],
                          noised_radar_matrix=noised_radar_matrix[i, ...],
                          output_dir_name=output_dir_name,
                          full_storm_id_string=full_storm_id_strings[i],
                          storm_time_unix_sec=storm_times_unix_sec[i])
コード例 #13
0
def _run(example_file_name, example_indices, num_radar_rows, num_radar_columns,
         normalization_file_name, output_dir_name):
    """Makes figure to explain one convolution block.

    This is effectively the main method.

    :param example_file_name: See documentation at top of file.
    :param example_indices: Same.
    :param num_radar_rows: Same.
    :param num_radar_columns: Same.
    :param normalization_file_name: Same.
    :param output_dir_name: Same.
    """

    if num_radar_rows <= 0:
        num_radar_rows = None
    if num_radar_columns <= 0:
        num_radar_columns = None

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    print('Reading data from: "{0:s}"...'.format(example_file_name))
    example_dict = input_examples.read_example_file(
        netcdf_file_name=example_file_name, read_all_target_vars=False,
        target_name=DUMMY_TARGET_NAME, include_soundings=False,
        num_rows_to_keep=num_radar_rows, num_columns_to_keep=num_radar_columns,
        radar_heights_to_keep_m_agl=numpy.array([RADAR_HEIGHT_M_AGL], dtype=int)
    )

    if input_examples.REFL_IMAGE_MATRIX_KEY in example_dict:
        input_feature_matrix = example_dict[
            input_examples.REFL_IMAGE_MATRIX_KEY]
    else:
        field_index = example_dict[input_examples.RADAR_FIELDS_KEY].index(
            RADAR_FIELD_NAME
        )

        input_feature_matrix = example_dict[
            input_examples.RADAR_IMAGE_MATRIX_KEY
        ][..., [field_index]]

    num_examples = input_feature_matrix.shape[0]
    error_checking.assert_is_geq_numpy_array(example_indices, 0)
    error_checking.assert_is_less_than_numpy_array(
        example_indices, num_examples)

    input_feature_matrix = dl_utils.normalize_radar_images(
        radar_image_matrix=input_feature_matrix, field_names=[RADAR_FIELD_NAME],
        normalization_type_string=NORMALIZATION_TYPE_STRING,
        normalization_param_file_name=normalization_file_name)

    if len(input_feature_matrix.shape) == 4:
        input_feature_matrix = input_feature_matrix[..., 0]
    else:
        input_feature_matrix = input_feature_matrix[..., 0, 0]

    input_feature_matrix = numpy.expand_dims(input_feature_matrix, axis=-1)

    print('Doing convolution for all {0:d} examples...'.format(num_examples))
    feature_matrix_after_conv = None

    for i in range(num_examples):
        this_feature_matrix = standalone_utils.do_2d_convolution(
            feature_matrix=input_feature_matrix[i, ...] + 0,
            kernel_matrix=KERNEL_MATRIX, pad_edges=False, stride_length_px=1
        )[0, ...]

        if feature_matrix_after_conv is None:
            feature_matrix_after_conv = numpy.full(
                (num_examples,) + this_feature_matrix.shape, numpy.nan
            )

        feature_matrix_after_conv[i, ...] = this_feature_matrix

    print('Doing activation for all {0:d} examples...'.format(num_examples))
    feature_matrix_after_activn = standalone_utils.do_activation(
        input_values=feature_matrix_after_conv + 0,
        function_name=architecture_utils.RELU_FUNCTION_STRING, alpha=0.2)

    print('Doing batch norm for all {0:d} examples...'.format(num_examples))
    feature_matrix_after_bn = standalone_utils.do_batch_normalization(
        feature_matrix=feature_matrix_after_activn + 0
    )

    print('Doing max-pooling for all {0:d} examples...\n'.format(num_examples))
    feature_matrix_after_pooling = None

    for i in range(num_examples):
        this_feature_matrix = standalone_utils.do_2d_pooling(
            feature_matrix=feature_matrix_after_bn[i, ...], stride_length_px=2,
            pooling_type_string=standalone_utils.MAX_POOLING_TYPE_STRING
        )[0, ...]

        if feature_matrix_after_pooling is None:
            feature_matrix_after_pooling = numpy.full(
                (num_examples,) + this_feature_matrix.shape, numpy.nan
            )

        feature_matrix_after_pooling[i, ...] = this_feature_matrix

    for i in example_indices:
        this_output_file_name = '{0:s}/convolution_block{1:06d}.jpg'.format(
            output_dir_name, i)

        _plot_one_example(
            input_feature_matrix=input_feature_matrix[i, ...],
            feature_matrix_after_conv=feature_matrix_after_conv[i, ...],
            feature_matrix_after_activn=feature_matrix_after_activn[i, ...],
            feature_matrix_after_bn=feature_matrix_after_bn[i, ...],
            feature_matrix_after_pooling=feature_matrix_after_pooling[i, ...],
            output_file_name=this_output_file_name)
コード例 #14
0
def fit_mvn_for_each_class(feature_table,
                           class_labels,
                           num_classes,
                           assume_diagonal_covar_matrix=False):
    """For each class, fits data to a multivariate normal distribution.

    N = number of examples
    M = number of features (input variables)
    K = number of classes

    :param feature_table: pandas DataFrame with N rows and M columns.  Column
        names are feature names.
    :param class_labels: length-N numpy array of class labels.  Should be
        integers ranging from 0...[num_classes - 1].
    :param num_classes: Number of classes.
    :param assume_diagonal_covar_matrix: See documentation for
        fit_multivariate_normal.
    :return: list_of_mvn_dictionaries: length-K list of dictionaries, each with
        the following keys.
    list_of_mvn_dictionaries[k]['prior_class_probability']: Prior probability of
        [k]th class.  This is the frequency of value (k - 1) in `class_labels`.
    list_of_mvn_dictionaries[k]['orig_feature_table']: Original feature table
        (before transforming marginals to normal distribution) for [k]th class.
    list_of_mvn_dictionaries[k]['feature_names']: length-M list of feature names
        (same for each class).
    list_of_mvn_dictionaries[k]['feature_means']: length-M numpy array with mean
        value of each feature, given the [k]th class.
    list_of_mvn_dictionaries[k]['covariance_matrix']: M-by-M numpy array.
        Covariance matrix, given the [k]th class.
    list_of_mvn_dictionaries[k]['covar_matrix_inverse']: Inverse of covariance
        matrix for [k]th class.
    list_of_mvn_dictionaries[k]['covar_matrix_determinant']: Determinant of
        covariance matrix for [k]th class.
    :raises: ValueError: if any class is not represented in `class_labels`.
    """

    num_examples = len(feature_table.index)

    error_checking.assert_is_integer(num_classes)
    error_checking.assert_is_geq(num_classes, 2)
    error_checking.assert_is_integer_numpy_array(class_labels)
    error_checking.assert_is_numpy_array(class_labels,
                                         exact_dimensions=numpy.array(
                                             [num_examples]))
    error_checking.assert_is_geq_numpy_array(class_labels, 0)
    error_checking.assert_is_less_than_numpy_array(class_labels, num_classes)

    list_of_mvn_dictionaries = []
    for k in range(num_classes):
        these_flags = class_labels == k
        if not numpy.any(these_flags):
            error_string = ('Class {0:d} (label {1:d}) does not exist in the '
                            'input data.').format(k + 1, k)
            raise ValueError(error_string)

        these_indices = numpy.where(these_flags)[0]
        this_dict = fit_multivariate_normal(
            feature_table.iloc[these_indices],
            assume_diagonal_covar_matrix=assume_diagonal_covar_matrix)

        this_dict.update({
            PRIOR_CLASS_PROBABILITY_KEY:
            float(len(these_indices)) / num_examples
        })
        this_dict.update(
            {ORIG_FEATURE_TABLE_KEY: feature_table.iloc[these_indices]})

        list_of_mvn_dictionaries.append(this_dict)

    return list_of_mvn_dictionaries
コード例 #15
0
    def test_assert_is_negative_numpy_array_true(self):
        """assert_is_less_than_numpy_array; base_value = 0, inputs < 0."""

        error_checking.assert_is_less_than_numpy_array(NEGATIVE_NUMPY_ARRAY, 0)
コード例 #16
0
    def test_assert_is_negative_numpy_array_non_positive(self):
        """assert_is_less_than_numpy_array; base_value = 0, inputs <= 0."""

        with self.assertRaises(ValueError):
            error_checking.assert_is_less_than_numpy_array(
                NON_POSITIVE_NUMPY_ARRAY, 0)
コード例 #17
0
    def test_assert_is_negative_numpy_array_mixed_sign(self):
        """assert_is_less_than_numpy_array; base_value = 0, inputs mixed."""

        with self.assertRaises(ValueError):
            error_checking.assert_is_less_than_numpy_array(
                MIXED_SIGN_NUMPY_ARRAY, 0)