def fields_and_heights_to_names(field_names,
                                heights_m_agl,
                                include_units=True):
    """Converts list of radar field/height pairs to panel names.

    P = number of panels

    :param field_names: length-P list with names of radar fields.  Each must be
        accepted by `radar_utils.check_field_name`.
    :param heights_m_agl: length-P numpy array of heights (metres above ground
        level).
    :param include_units: Boolean flag.  If True, panel names will include
        units.
    :return: panel_names: length-P list of panel names (to be printed at bottoms
        of panels).
    """

    error_checking.assert_is_boolean(include_units)

    error_checking.assert_is_string_list(field_names)
    error_checking.assert_is_numpy_array(numpy.array(field_names),
                                         num_dimensions=1)
    num_panels = len(field_names)

    error_checking.assert_is_numpy_array(heights_m_agl,
                                         exact_dimensions=numpy.array(
                                             [num_panels]))
    error_checking.assert_is_geq_numpy_array(heights_m_agl, 0.)
    heights_m_agl = numpy.round(heights_m_agl).astype(int)

    panel_names = [''] * num_panels

    for i in range(num_panels):
        this_field_name_verbose = field_name_to_verbose(
            field_name=field_names[i], include_units=include_units)

        panel_names[i] = '{0:s}\nat {1:d} km AGL'.format(
            this_field_name_verbose,
            int(numpy.round(heights_m_agl[i] * METRES_TO_KM)))

    return panel_names
Exemple #2
0
def _check_statistic_names(statistic_names):
    """Ensures that statistic names are valid.

    :param statistic_names: 1-D list of statistic names.
    :raises: ValueError: if any element of `statistic_names` is not in
        `STATISTIC_NAMES`.
    """

    error_checking.assert_is_string_list(statistic_names)
    error_checking.assert_is_numpy_array(numpy.array(statistic_names),
                                         num_dimensions=1)

    for this_name in statistic_names:
        if this_name in STATISTIC_NAMES:
            continue

        error_string = ('\n\n' + str(STATISTIC_NAMES) +
                        '\n\nValid statistic names ' +
                        '(listed above) do not include the following: "' +
                        this_name + '"')
        raise ValueError(error_string)
Exemple #3
0
def get_region_properties(binary_image_matrix,
                          property_names=DEFAULT_REGION_PROP_NAMES):
    """Computes region properties for one shape (polygon).

    M = number of rows in grid
    N = number of columns in grid

    :param binary_image_matrix: M-by-N Boolean numpy array.  If
        binary_image_matrix[i, j] = True, grid point [i, j] is inside the
        polygon.  Otherwise, grid point [i, j] is outside the polygon.
    :param property_names: 1-D list of region properties to compute.
    :return: property_dict: Dictionary, where each key is a string from
        `property_names` and each item is the corresponding value.
    """

    error_checking.assert_is_boolean_numpy_array(binary_image_matrix)
    error_checking.assert_is_numpy_array(binary_image_matrix, num_dimensions=2)

    error_checking.assert_is_string_list(property_names)
    error_checking.assert_is_numpy_array(numpy.array(property_names),
                                         num_dimensions=1)

    regionprops_object = skimage.measure.regionprops(
        binary_image_matrix.astype(int))[0]
    property_dict = {}

    for this_name in property_names:
        if this_name == ORIENTATION_NAME:
            property_dict.update({
                this_name:
                RADIANS_TO_DEGREES *
                getattr(regionprops_object, _stat_name_new_to_orig(this_name))
            })
        else:
            property_dict.update({
                this_name:
                getattr(regionprops_object, _stat_name_new_to_orig(this_name))
            })

    return property_dict
Exemple #4
0
def fields_and_refl_heights_to_dict(field_names,
                                    data_source,
                                    refl_heights_m_asl=None):
    """Converts two arrays (field names and reflectivity heights) to dictionary.

    :param field_names: 1-D list with names of radar fields in GewitterGefahr
        format.
    :param data_source: Data source (string).
    :param refl_heights_m_asl: 1-D numpy array of reflectivity heights (metres
        above sea level).
    :return: field_to_heights_dict_m_asl: Dictionary, where each key is a field
        name and each value is a 1-D numpy array of heights (metres above sea
        level).
    """

    check_data_source(data_source)
    error_checking.assert_is_string_list(field_names)
    error_checking.assert_is_numpy_array(numpy.array(field_names),
                                         num_dimensions=1)

    field_to_heights_dict_m_asl = {}

    for this_field_name in field_names:
        if this_field_name == radar_utils.REFL_NAME:
            radar_utils.check_heights(data_source=data_source,
                                      heights_m_asl=refl_heights_m_asl,
                                      field_name=this_field_name)

            field_to_heights_dict_m_asl.update(
                {this_field_name: refl_heights_m_asl})

        else:
            field_to_heights_dict_m_asl.update({
                this_field_name:
                radar_utils.get_valid_heights(data_source=data_source,
                                              field_name=this_field_name)
            })

    return field_to_heights_dict_m_asl
Exemple #5
0
def get_curvature_based_stats(
        polygon_object_xy, statistic_names=DEFAULT_CURVATURE_BASED_STAT_NAMES):
    """Computes curvature-based statistics for one shape (polygon).

    :param polygon_object_xy: Instance of `shapely.geometry.Polygon`, where x-
        and y-coordinates are in metres.  If the polygon is a storm object (or
        anything else with only 90-degree angles), we recommend (but do not
        enforce) that it be smoothed -- using, for example,
        `smoothing_via_iterative_averaging.sia_for_closed_polygon`.
    :param statistic_names: 1-D list of curvature-based statistics to compute.
    :return: statistic_dict: Dictionary, where each key is a string from
        `statistic_names` and each item is the corresponding value.
    """

    error_checking.assert_is_string_list(statistic_names)
    error_checking.assert_is_numpy_array(
        numpy.array(statistic_names), num_dimensions=1)

    vertex_curvatures_metres01 = shape_utils.get_curvature_for_closed_polygon(
        polygon_object_xy)

    statistic_dict = {}
    if MEAN_ABS_CURVATURE_NAME in statistic_names:
        statistic_dict.update(
            {MEAN_ABS_CURVATURE_NAME:
                 numpy.mean(numpy.absolute(vertex_curvatures_metres01))})

    if BENDING_ENERGY_NAME in statistic_names:
        statistic_dict.update(
            {BENDING_ENERGY_NAME: numpy.sum(
                vertex_curvatures_metres01 ** 2) / polygon_object_xy.length})

    if COMPACTNESS_NAME in statistic_names:
        statistic_dict.update(
            {COMPACTNESS_NAME: polygon_object_xy.length ** 2 / (
                4 * numpy.pi * polygon_object_xy.area)})

    return statistic_dict
def unzip_tar(tar_file_name, target_directory_name=None,
              file_and_dir_names_to_unzip=None):
    """Unzips tar file.

    :param tar_file_name: Path to input file.
    :param target_directory_name: Path to output directory.
    :param file_and_dir_names_to_unzip: List of files and directories to extract
        from the tar file.  Each list element should be a relative path inside
        the tar file.  After unzipping, the same relative path will exist inside
        `target_directory_name`.
    """

    error_checking.assert_is_string(tar_file_name)
    error_checking.assert_is_string_list(file_and_dir_names_to_unzip)
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=target_directory_name)

    unix_command_string = 'tar -C "{0:s}" -xvf "{1:s}"'.format(
        target_directory_name, tar_file_name)
    for this_relative_path in file_and_dir_names_to_unzip:
        unix_command_string += ' "' + this_relative_path + '"'

    os.system(unix_command_string)
Exemple #7
0
def get_basic_statistics(polygon_object_xy,
                         statistic_names=DEFAULT_BASIC_STAT_NAMES):
    """Computes basic statistics for simple polygon.

    A "basic statistic" is one stored in the `shapely.geometry.Polygon` object.

    :param polygon_object_xy: Instance of `shapely.geometry.Polygon`.
    :param statistic_names: 1-D list of basic stats to compute.
    :return: basic_stat_dict: Dictionary, where each key is a string from
        `statistic_names` and each item is the corresponding value.
    """

    error_checking.assert_is_string_list(statistic_names)
    error_checking.assert_is_numpy_array(numpy.array(statistic_names),
                                         num_dimensions=1)

    basic_stat_dict = {}
    if AREA_NAME in statistic_names:
        basic_stat_dict.update({AREA_NAME: polygon_object_xy.area})
    if PERIMETER_NAME in statistic_names:
        basic_stat_dict.update({PERIMETER_NAME: polygon_object_xy.length})

    return basic_stat_dict
Exemple #8
0
def fields_and_refl_heights_to_pairs(field_names, heights_m_asl):
    """Converts unique arrays (field names and heights) to non-unique ones.

    F = number of fields
    H = number of heights
    N = F * H = number of field/height pairs

    :param field_names: length-F list with names of radar fields in
        GewitterGefahr format.
    :param heights_m_asl: length-H numpy array of heights (metres above sea
        level).
    :return: field_name_by_pair: length-N list of field names.
    :return: height_by_pair_m_asl: length-N numpy array of corresponding heights
        (metres above sea level).
    """

    error_checking.assert_is_string_list(field_names)
    error_checking.assert_is_numpy_array(numpy.array(field_names),
                                         num_dimensions=1)

    radar_utils.check_heights(data_source=radar_utils.GRIDRAD_SOURCE_ID,
                              heights_m_asl=heights_m_asl)

    field_name_by_pair = []
    height_by_pair_m_asl = numpy.array([], dtype=int)

    for this_field_name in field_names:
        radar_utils.field_name_new_to_orig(
            field_name=this_field_name,
            data_source_name=radar_utils.GRIDRAD_SOURCE_ID)

        field_name_by_pair += [this_field_name] * len(heights_m_asl)
        height_by_pair_m_asl = numpy.concatenate(
            (height_by_pair_m_asl, heights_m_asl))

    return field_name_by_pair, height_by_pair_m_asl
Exemple #9
0
def add_metadata(novelty_dict, baseline_full_id_strings,
                 baseline_storm_times_unix_sec, trial_full_id_strings,
                 trial_storm_times_unix_sec, cnn_file_name,
                 upconvnet_file_name):
    """Adds metadata to novelty-detection results.

    B = number of baseline examples
    T = number of trial examples

    :param novelty_dict: Dictionary created by `do_novelty_detection`.
    :param baseline_full_id_strings: length-B list of full storm IDs for
        baseline examples.
    :param baseline_storm_times_unix_sec: length-B numpy array of valid times
        for baseline examples.
    :param trial_full_id_strings: length-T list of full storm IDs for trial
        examples.
    :param trial_storm_times_unix_sec: length-T numpy array of valid times for
        baseline examples.
    :param cnn_file_name: Path to file with CNN used for novelty detection
        (readable by `cnn.read_model`).
    :param upconvnet_file_name: Path to file with upconvnet used for novelty
        detection (readable by `cnn.read_model`).

    :return: novelty_dict: Dictionary with the following keys.
    novelty_dict['list_of_baseline_input_matrices']: See doc for
        `do_novelty_detection`.
    novelty_dict['list_of_trial_input_matrices']: Same.
    novelty_dict['novel_indices']: Same.
    novelty_dict['novel_image_matrix_upconv']: Same.
    novelty_dict['novel_image_matrix_upconv_svd']: Same.
    novelty_dict['percent_svd_variance_to_keep']: Same.
    novelty_dict['cnn_feature_layer_name']: Same.
    novelty_dict['multipass']: Same.
    novelty_dict['baseline_full_id_strings']: See input doc for this method.
    novelty_dict['baseline_storm_times_unix_sec']: Same.
    novelty_dict['trial_full_id_strings']: Same.
    novelty_dict['trial_storm_times_unix_sec']: Same.
    novelty_dict['cnn_file_name']: Same.
    novelty_dict['upconvnet_file_name']: Same.
    """

    num_baseline_examples = novelty_dict[BASELINE_INPUTS_KEY][0].shape[0]
    these_expected_dim = numpy.array([num_baseline_examples], dtype=int)

    error_checking.assert_is_string_list(baseline_full_id_strings)
    error_checking.assert_is_numpy_array(numpy.array(baseline_full_id_strings),
                                         exact_dimensions=these_expected_dim)

    error_checking.assert_is_integer_numpy_array(baseline_storm_times_unix_sec)
    error_checking.assert_is_numpy_array(baseline_storm_times_unix_sec,
                                         exact_dimensions=these_expected_dim)

    num_trial_examples = novelty_dict[TRIAL_INPUTS_KEY][0].shape[0]
    these_expected_dim = numpy.array([num_trial_examples], dtype=int)

    error_checking.assert_is_string_list(trial_full_id_strings)
    error_checking.assert_is_numpy_array(numpy.array(trial_full_id_strings),
                                         exact_dimensions=these_expected_dim)

    error_checking.assert_is_integer_numpy_array(trial_storm_times_unix_sec)
    error_checking.assert_is_numpy_array(trial_storm_times_unix_sec,
                                         exact_dimensions=these_expected_dim)

    error_checking.assert_is_string(cnn_file_name)
    error_checking.assert_is_string(upconvnet_file_name)

    novelty_dict.update({
        BASELINE_IDS_KEY: baseline_full_id_strings,
        BASELINE_STORM_TIMES_KEY: baseline_storm_times_unix_sec,
        TRIAL_IDS_KEY: trial_full_id_strings,
        TRIAL_STORM_TIMES_KEY: trial_storm_times_unix_sec,
        CNN_FILE_KEY: cnn_file_name,
        UPCONVNET_FILE_KEY: upconvnet_file_name
    })

    return novelty_dict
def write_standard_file(pickle_file_name,
                        list_of_input_matrices,
                        list_of_saliency_matrices,
                        storm_ids,
                        storm_times_unix_sec,
                        model_file_name,
                        saliency_metadata_dict,
                        sounding_pressure_matrix_pascals=None):
    """Writes saliency maps (one per example) to Pickle file.

    T = number of input tensors to the model
    E = number of examples (storm objects)
    H = number of height levels per sounding

    :param pickle_file_name: Path to output file.
    :param list_of_input_matrices: length-T list of numpy arrays, containing
        predictors (inputs to the model).  The first dimension of each array
        must have length E.
    :param list_of_saliency_matrices: length-T list of numpy arrays, containing
        saliency values.  list_of_saliency_matrices[i] must have the same
        dimensions as list_of_input_matrices[i].
    :param storm_ids: length-E list of storm IDs (strings).
    :param storm_times_unix_sec: length-E numpy array of storm times.
    :param model_file_name: Path to file with trained model (readable by
        `cnn.read_model`).
    :param saliency_metadata_dict: Dictionary created by `check_metadata`.
    :param sounding_pressure_matrix_pascals: E-by-H numpy array of pressure
        levels in soundings.  Useful only when the model input contains
        soundings with no pressure, because it is needed to plot soundings.
    :raises: ValueError: if `list_of_input_matrices` and
        `list_of_saliency_matrices` have different lengths.
    """

    error_checking.assert_is_string(model_file_name)
    error_checking.assert_is_string_list(storm_ids)
    error_checking.assert_is_numpy_array(numpy.array(storm_ids),
                                         num_dimensions=1)

    num_storm_objects = len(storm_ids)
    these_expected_dim = numpy.array([num_storm_objects], dtype=int)

    error_checking.assert_is_integer_numpy_array(storm_times_unix_sec)
    error_checking.assert_is_numpy_array(storm_times_unix_sec,
                                         exact_dimensions=these_expected_dim)

    error_checking.assert_is_list(list_of_input_matrices)
    error_checking.assert_is_list(list_of_saliency_matrices)
    num_input_matrices = len(list_of_input_matrices)
    num_saliency_matrices = len(list_of_saliency_matrices)

    if num_input_matrices != num_saliency_matrices:
        error_string = (
            'Number of input matrices ({0:d}) should equal number of saliency '
            'matrices ({1:d}).').format(num_input_matrices,
                                        num_saliency_matrices)

        raise ValueError(error_string)

    for i in range(num_input_matrices):
        error_checking.assert_is_numpy_array_without_nan(
            list_of_input_matrices[i])
        error_checking.assert_is_numpy_array_without_nan(
            list_of_saliency_matrices[i])

        these_expected_dim = numpy.array(
            (num_storm_objects, ) + list_of_input_matrices[i].shape[1:],
            dtype=int)
        error_checking.assert_is_numpy_array(
            list_of_input_matrices[i], exact_dimensions=these_expected_dim)

        these_expected_dim = numpy.array(list_of_input_matrices[i].shape,
                                         dtype=int)
        error_checking.assert_is_numpy_array(
            list_of_saliency_matrices[i], exact_dimensions=these_expected_dim)

    if sounding_pressure_matrix_pascals is not None:
        error_checking.assert_is_numpy_array_without_nan(
            sounding_pressure_matrix_pascals)
        error_checking.assert_is_greater_numpy_array(
            sounding_pressure_matrix_pascals, 0.)
        error_checking.assert_is_numpy_array(sounding_pressure_matrix_pascals,
                                             num_dimensions=2)

        these_expected_dim = numpy.array(
            (num_storm_objects, ) + sounding_pressure_matrix_pascals.shape[1:],
            dtype=int)
        error_checking.assert_is_numpy_array(
            sounding_pressure_matrix_pascals,
            exact_dimensions=these_expected_dim)

    saliency_dict = {
        INPUT_MATRICES_KEY: list_of_input_matrices,
        SALIENCY_MATRICES_KEY: list_of_saliency_matrices,
        STORM_IDS_KEY: storm_ids,
        STORM_TIMES_KEY: storm_times_unix_sec,
        MODEL_FILE_NAME_KEY: model_file_name,
        COMPONENT_TYPE_KEY: saliency_metadata_dict[COMPONENT_TYPE_KEY],
        TARGET_CLASS_KEY: saliency_metadata_dict[TARGET_CLASS_KEY],
        LAYER_NAME_KEY: saliency_metadata_dict[LAYER_NAME_KEY],
        IDEAL_ACTIVATION_KEY: saliency_metadata_dict[IDEAL_ACTIVATION_KEY],
        NEURON_INDICES_KEY: saliency_metadata_dict[NEURON_INDICES_KEY],
        CHANNEL_INDEX_KEY: saliency_metadata_dict[CHANNEL_INDEX_KEY],
        SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pascals
    }

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(saliency_dict, pickle_file_handle)
    pickle_file_handle.close()
def plot_many_2d_grids(data_matrix,
                       field_names,
                       axes_objects,
                       panel_names=None,
                       plot_grid_lines=True,
                       colour_map_objects=None,
                       colour_norm_objects=None,
                       refl_opacity=DEFAULT_OPACITY,
                       plot_colour_bar_flags=None,
                       panel_name_font_size=DEFAULT_FONT_SIZE,
                       colour_bar_font_size=DEFAULT_FONT_SIZE,
                       colour_bar_length=DEFAULT_COLOUR_BAR_LENGTH):
    """Plots many 2-D grids in paneled figure.

    M = number of rows in grid
    N = number of columns in grid
    C = number of fields

    :param data_matrix: M-by-N-by-C numpy array of radar values.
    :param field_names: length-C list of field names.
    :param axes_objects: length-C list of axes handles (instances of
        `matplotlib.axes._subplots.AxesSubplot`).
    :param panel_names: length-C list of panel names (to be printed at bottom of
        each panel).  If None, panel names will not be printed.
    :param plot_grid_lines: Boolean flag.  If True, will plot grid lines over
        radar images.
    :param colour_map_objects: length-C list of colour schemes (instances of
        `matplotlib.pyplot.cm` or similar).  If None, will use default colour
        scheme for each field.
    :param colour_norm_objects: length-C list of colour-normalizers (instances
        of `matplotlib.colors.BoundaryNorm` or similar).  If None, will use
        default normalizer for each field.
    :param refl_opacity: Opacity for reflectivity colour scheme.  Used only if
        `colour_map_objects is None and colour_norm_objects is None`.
    :param plot_colour_bar_flags: length-C numpy array of Boolean flags.  If
        `plot_colour_bar_flags[k] == True`, will plot colour bar for [k]th
        panel.  If None, will plot no colour bars.
    :param panel_name_font_size: Font size for panel names.
    :param colour_bar_font_size: Font size for colour-bar tick marks.
    :param colour_bar_length: Length of colour bars (as fraction of axis
        length).
    :return: colour_bar_objects: length-C list of colour bars.  If
        `plot_colour_bar_flags[k] == False`, colour_bar_objects[k] will be None.
    """

    error_checking.assert_is_numpy_array(data_matrix, num_dimensions=3)
    num_fields = data_matrix.shape[-1]
    these_expected_dim = numpy.array([num_fields], dtype=int)

    error_checking.assert_is_string_list(field_names)
    error_checking.assert_is_numpy_array(numpy.array(field_names),
                                         exact_dimensions=these_expected_dim)

    error_checking.assert_is_numpy_array(numpy.array(axes_objects),
                                         exact_dimensions=these_expected_dim)

    if panel_names is None:
        panel_names = [None] * num_fields
    else:
        error_checking.assert_is_string_list(panel_names)
        error_checking.assert_is_numpy_array(
            numpy.array(panel_names), exact_dimensions=these_expected_dim)

    if colour_map_objects is None or colour_norm_objects is None:
        colour_map_objects = [None] * num_fields
        colour_norm_objects = [None] * num_fields
    else:
        error_checking.assert_is_numpy_array(
            numpy.array(colour_map_objects),
            exact_dimensions=these_expected_dim)
        error_checking.assert_is_numpy_array(
            numpy.array(colour_norm_objects),
            exact_dimensions=these_expected_dim)

    if plot_colour_bar_flags is None:
        plot_colour_bar_flags = numpy.full(num_fields, 0, dtype=bool)

    error_checking.assert_is_boolean_numpy_array(plot_colour_bar_flags)
    error_checking.assert_is_numpy_array(plot_colour_bar_flags,
                                         exact_dimensions=these_expected_dim)

    colour_bar_objects = [None] * num_fields

    for k in range(num_fields):
        this_colour_map_object, this_colour_norm_object = (
            plot_2d_grid_without_coords(
                field_matrix=data_matrix[..., k],
                field_name=field_names[k],
                axes_object=axes_objects[k],
                annotation_string=panel_names[k],
                font_size=panel_name_font_size,
                plot_grid_lines=plot_grid_lines,
                colour_map_object=copy.deepcopy(colour_map_objects[k]),
                colour_norm_object=copy.deepcopy(colour_norm_objects[k]),
                refl_opacity=refl_opacity))

        if not plot_colour_bar_flags[k]:
            continue

        colour_bar_objects[k] = plotting_utils.plot_colour_bar(
            axes_object_or_matrix=axes_objects[k],
            data_matrix=data_matrix[..., k],
            colour_map_object=this_colour_map_object,
            colour_norm_object=this_colour_norm_object,
            orientation_string='horizontal',
            font_size=colour_bar_font_size,
            fraction_of_axis_length=colour_bar_length,
            extend_min=field_names[k] in SHEAR_VORT_DIV_NAMES,
            extend_max=True)

    return colour_bar_objects
def write_file(netcdf_file_name, init_scalar_predictor_matrix,
               final_scalar_predictor_matrix, init_vector_predictor_matrix,
               final_vector_predictor_matrix, initial_activations,
               final_activations, example_id_strings, model_file_name,
               layer_name, neuron_indices, ideal_activation, num_iterations,
               learning_rate, l2_weight):
    """Writes backwards-optimization results to file.

    E = number of examples
    H = number of heights
    P_s = number of scalar predictors
    P_v = number of vector predictors

    :param netcdf_file_name: Path to output file.
    :param init_scalar_predictor_matrix: numpy array (E x P_s) of initial
        predictor values.
    :param final_scalar_predictor_matrix: Same but with final values.
    :param init_vector_predictor_matrix: numpy array (E x H x P_v) of initial
        predictor values.
    :param final_vector_predictor_matrix: Same but with final values.
    :param initial_activations: length-E numpy array of initial activations,
        before optimization.
    :param final_activations: Same but with final activations, after
        optimization.
    :param example_id_strings: length-E list of example IDs.
    :param model_file_name: Path to file with neural net used to create saliency
        maps (readable by `neural_net.read_model`).
    :param layer_name: See doc for `check_metadata`.
    :param neuron_indices: Same.
    :param ideal_activation: Same.
    :param num_iterations: Same.
    :param learning_rate: Same.
    :param l2_weight: Same.
    """

    # Check input args.
    check_metadata(layer_name=layer_name,
                   neuron_indices=neuron_indices,
                   ideal_activation=ideal_activation,
                   num_iterations=num_iterations,
                   learning_rate=learning_rate,
                   l2_weight=l2_weight)

    error_checking.assert_is_numpy_array_without_nan(
        init_scalar_predictor_matrix)
    error_checking.assert_is_numpy_array(init_scalar_predictor_matrix,
                                         num_dimensions=2)

    error_checking.assert_is_numpy_array_without_nan(
        final_scalar_predictor_matrix)
    error_checking.assert_is_numpy_array(
        final_scalar_predictor_matrix,
        exact_dimensions=numpy.array(init_scalar_predictor_matrix.shape,
                                     dtype=int))

    error_checking.assert_is_numpy_array_without_nan(
        init_vector_predictor_matrix)
    error_checking.assert_is_numpy_array(init_vector_predictor_matrix,
                                         num_dimensions=3)

    error_checking.assert_is_numpy_array_without_nan(
        final_vector_predictor_matrix)
    error_checking.assert_is_numpy_array(
        final_vector_predictor_matrix,
        exact_dimensions=numpy.array(init_vector_predictor_matrix.shape,
                                     dtype=int))

    num_examples = init_vector_predictor_matrix.shape[0]
    expected_dim = numpy.array([num_examples], dtype=int)

    error_checking.assert_is_numpy_array_without_nan(initial_activations)
    error_checking.assert_is_numpy_array(initial_activations,
                                         exact_dimensions=expected_dim)

    error_checking.assert_is_numpy_array_without_nan(final_activations)
    error_checking.assert_is_numpy_array(final_activations,
                                         exact_dimensions=expected_dim)

    error_checking.assert_is_string_list(example_id_strings)
    error_checking.assert_is_numpy_array(numpy.array(example_id_strings),
                                         exact_dimensions=expected_dim)

    error_checking.assert_is_string(model_file_name)

    # Write to NetCDF file.
    file_system_utils.mkdir_recursive_if_necessary(file_name=netcdf_file_name)
    dataset_object = netCDF4.Dataset(netcdf_file_name,
                                     'w',
                                     format='NETCDF3_64BIT_OFFSET')

    dataset_object.setncattr(MODEL_FILE_KEY, model_file_name)
    dataset_object.setncattr(LAYER_NAME_KEY, layer_name)
    dataset_object.setncattr(NEURON_INDICES_KEY, neuron_indices)
    dataset_object.setncattr(IDEAL_ACTIVATION_KEY, ideal_activation)
    dataset_object.setncattr(NUM_ITERATIONS_KEY, num_iterations)
    dataset_object.setncattr(LEARNING_RATE_KEY, learning_rate)
    dataset_object.setncattr(L2_WEIGHT_KEY, l2_weight)

    dataset_object.createDimension(EXAMPLE_DIMENSION_KEY, num_examples)
    dataset_object.createDimension(SCALAR_PREDICTOR_DIM_KEY,
                                   init_scalar_predictor_matrix.shape[-1])
    dataset_object.createDimension(HEIGHT_DIMENSION_KEY,
                                   init_vector_predictor_matrix.shape[1])
    dataset_object.createDimension(VECTOR_PREDICTOR_DIM_KEY,
                                   init_vector_predictor_matrix.shape[2])

    if num_examples == 0:
        num_id_characters = 1
    else:
        num_id_characters = numpy.max(
            numpy.array([len(id) for id in example_id_strings]))

    dataset_object.createDimension(EXAMPLE_ID_CHAR_DIM_KEY, num_id_characters)

    this_string_format = 'S{0:d}'.format(num_id_characters)
    example_ids_char_array = netCDF4.stringtochar(
        numpy.array(example_id_strings, dtype=this_string_format))

    dataset_object.createVariable(EXAMPLE_IDS_KEY,
                                  datatype='S1',
                                  dimensions=(EXAMPLE_DIMENSION_KEY,
                                              EXAMPLE_ID_CHAR_DIM_KEY))
    dataset_object.variables[EXAMPLE_IDS_KEY][:] = numpy.array(
        example_ids_char_array)

    if init_scalar_predictor_matrix.size > 0:
        these_dim = (EXAMPLE_DIMENSION_KEY, SCALAR_PREDICTOR_DIM_KEY)

        dataset_object.createVariable(INIT_SCALAR_PREDICTORS_KEY,
                                      datatype=numpy.float32,
                                      dimensions=these_dim)
        dataset_object.variables[INIT_SCALAR_PREDICTORS_KEY][:] = (
            init_scalar_predictor_matrix)

        dataset_object.createVariable(FINAL_SCALAR_PREDICTORS_KEY,
                                      datatype=numpy.float32,
                                      dimensions=these_dim)
        dataset_object.variables[FINAL_SCALAR_PREDICTORS_KEY][:] = (
            final_scalar_predictor_matrix)

    if init_vector_predictor_matrix.size > 0:
        these_dim = (EXAMPLE_DIMENSION_KEY, HEIGHT_DIMENSION_KEY,
                     VECTOR_PREDICTOR_DIM_KEY)

        dataset_object.createVariable(INIT_VECTOR_PREDICTORS_KEY,
                                      datatype=numpy.float32,
                                      dimensions=these_dim)
        dataset_object.variables[INIT_VECTOR_PREDICTORS_KEY][:] = (
            init_vector_predictor_matrix)

        dataset_object.createVariable(FINAL_VECTOR_PREDICTORS_KEY,
                                      datatype=numpy.float32,
                                      dimensions=these_dim)
        dataset_object.variables[FINAL_VECTOR_PREDICTORS_KEY][:] = (
            final_vector_predictor_matrix)

    dataset_object.createVariable(INITIAL_ACTIVATIONS_KEY,
                                  datatype=numpy.float32,
                                  dimensions=EXAMPLE_DIMENSION_KEY)
    dataset_object.variables[INITIAL_ACTIVATIONS_KEY][:] = initial_activations

    dataset_object.createVariable(FINAL_ACTIVATIONS_KEY,
                                  datatype=numpy.float32,
                                  dimensions=EXAMPLE_DIMENSION_KEY)
    dataset_object.variables[FINAL_ACTIVATIONS_KEY][:] = initial_activations

    dataset_object.close()
def soundings_to_metpy_dictionaries(sounding_matrix,
                                    field_names,
                                    height_levels_m_agl=None,
                                    storm_elevations_m_asl=None):
    """Converts soundings to format required by MetPy.

    If `sounding_matrix` contains pressures, `height_levels_m_agl` and
    `storm_elevations_m_asl` will not be used.

    Otherwise, `height_levels_m_agl` and `storm_elevations_m_asl` will be used
    to estimate the pressure levels for each sounding.

    :param sounding_matrix: numpy array (E x H_s x F_s) of soundings.
    :param field_names: list (length F_s) of field names, in the order that they
        appear in `sounding_matrix`.
    :param height_levels_m_agl: numpy array (length H_s) of height levels
        (metres above ground level), in the order that they appear in
        `sounding_matrix`.
    :param storm_elevations_m_asl: length-E numpy array of storm elevations
        (metres above sea level).
    :return: list_of_metpy_dictionaries: length-E list of dictionaries.  The
        format of each dictionary is described in the input doc for
        `sounding_plotting.plot_sounding`.
    """

    error_checking.assert_is_string_list(field_names)
    error_checking.assert_is_numpy_array(numpy.array(field_names),
                                         num_dimensions=1)
    check_soundings(sounding_matrix=sounding_matrix,
                    num_fields=len(field_names))

    try:
        pressure_index = field_names.index(soundings.PRESSURE_NAME)
        pressure_matrix_pascals = sounding_matrix[..., pressure_index]
    except ValueError:
        error_checking.assert_is_geq_numpy_array(height_levels_m_agl, 0)
        error_checking.assert_is_numpy_array(height_levels_m_agl,
                                             num_dimensions=1)

        error_checking.assert_is_numpy_array_without_nan(
            storm_elevations_m_asl)
        error_checking.assert_is_numpy_array(storm_elevations_m_asl,
                                             num_dimensions=1)

        num_height_levels = len(height_levels_m_agl)
        num_examples = len(storm_elevations_m_asl)
        check_soundings(sounding_matrix=sounding_matrix,
                        num_examples=num_examples,
                        num_height_levels=num_height_levels)

        height_matrix_m_asl = numpy.full((num_examples, num_height_levels),
                                         numpy.nan)
        for i in range(num_examples):
            height_matrix_m_asl[i, ...] = (height_levels_m_agl +
                                           storm_elevations_m_asl[i])

        pressure_matrix_pascals = standard_atmo.height_to_pressure(
            height_matrix_m_asl)

    try:
        temperature_index = field_names.index(soundings.TEMPERATURE_NAME)
        temperature_matrix_kelvins = sounding_matrix[..., temperature_index]
    except ValueError:
        virtual_pot_temp_index = field_names.index(
            soundings.VIRTUAL_POTENTIAL_TEMPERATURE_NAME)
        temperature_matrix_kelvins = (
            temperature_conversions.temperatures_from_potential_temperatures(
                potential_temperatures_kelvins=sounding_matrix[
                    ..., virtual_pot_temp_index],
                total_pressures_pascals=pressure_matrix_pascals))

    try:
        specific_humidity_index = field_names.index(
            soundings.SPECIFIC_HUMIDITY_NAME)
        dewpoint_matrix_kelvins = (
            moisture_conversions.specific_humidity_to_dewpoint(
                specific_humidities_kg_kg01=sounding_matrix[
                    ..., specific_humidity_index],
                total_pressures_pascals=pressure_matrix_pascals))
    except ValueError:
        relative_humidity_index = field_names.index(
            soundings.RELATIVE_HUMIDITY_NAME)
        dewpoint_matrix_kelvins = (
            moisture_conversions.relative_humidity_to_dewpoint(
                relative_humidities=sounding_matrix[...,
                                                    relative_humidity_index],
                temperatures_kelvins=temperature_matrix_kelvins,
                total_pressures_pascals=pressure_matrix_pascals))

    temperature_matrix_celsius = temperature_conversions.kelvins_to_celsius(
        temperature_matrix_kelvins)
    dewpoint_matrix_celsius = temperature_conversions.kelvins_to_celsius(
        dewpoint_matrix_kelvins)

    try:
        u_wind_index = field_names.index(soundings.U_WIND_NAME)
        v_wind_index = field_names.index(soundings.V_WIND_NAME)
        include_wind = True
    except ValueError:
        include_wind = False

    num_examples = sounding_matrix.shape[0]
    list_of_metpy_dictionaries = [None] * num_examples

    for i in range(num_examples):
        list_of_metpy_dictionaries[i] = {
            soundings.PRESSURE_COLUMN_METPY:
            pressure_matrix_pascals[i, :] * PASCALS_TO_MB,
            soundings.TEMPERATURE_COLUMN_METPY:
            temperature_matrix_celsius[i, :],
            soundings.DEWPOINT_COLUMN_METPY: dewpoint_matrix_celsius[i, :],
        }

        if include_wind:
            list_of_metpy_dictionaries[i].update({
                soundings.U_WIND_COLUMN_METPY:
                (sounding_matrix[i, ..., u_wind_index] *
                 METRES_PER_SECOND_TO_KT),
                soundings.V_WIND_COLUMN_METPY:
                (sounding_matrix[i, ..., v_wind_index] *
                 METRES_PER_SECOND_TO_KT)
            })

    return list_of_metpy_dictionaries
def denormalize_soundings(sounding_matrix,
                          field_names,
                          normalization_type_string,
                          normalization_param_file_name,
                          test_mode=False,
                          min_normalized_value=0.,
                          max_normalized_value=1.,
                          normalization_table=None):
    """Denormalizes soundings.

    This method is the inverse of `normalize_soundings`.

    :param sounding_matrix: See doc for `normalize_soundings`.
    :param field_names: Same.
    :param normalization_type_string: Same.
    :param normalization_param_file_name: Path to file with normalization
        params.  Will be read by `read_normalization_params_from_file`.
    :param test_mode: For testing only.  Leave this alone.
    :param min_normalized_value: Same.
    :param max_normalized_value: Same.
    :param normalization_table: For testing only.  Leave this alone.
    :return: sounding_matrix: Denormalized version of input, with the same
        dimensions.
    """

    error_checking.assert_is_boolean(test_mode)
    if not test_mode:
        normalization_table = read_normalization_params_from_file(
            normalization_param_file_name)[2]

    error_checking.assert_is_string_list(field_names)
    error_checking.assert_is_numpy_array(numpy.array(field_names),
                                         num_dimensions=1)

    num_fields = len(field_names)
    check_soundings(sounding_matrix=sounding_matrix, num_fields=num_fields)
    _check_normalization_type(normalization_type_string)

    if normalization_type_string == MINMAX_NORMALIZATION_TYPE_STRING:
        error_checking.assert_is_greater(max_normalized_value,
                                         min_normalized_value)
        # error_checking.assert_is_geq_numpy_array(
        #     sounding_matrix, min_normalized_value)
        # error_checking.assert_is_leq_numpy_array(
        #     sounding_matrix, max_normalized_value)

    for j in range(num_fields):
        if normalization_type_string == MINMAX_NORMALIZATION_TYPE_STRING:
            this_min_value = normalization_table[MIN_VALUE_COLUMN].loc[
                field_names[j]]
            this_max_value = normalization_table[MAX_VALUE_COLUMN].loc[
                field_names[j]]

            sounding_matrix[..., j] = (
                (sounding_matrix[..., j] - min_normalized_value) /
                (max_normalized_value - min_normalized_value))
            sounding_matrix[..., j] = this_min_value + (
                sounding_matrix[..., j] * (this_max_value - this_min_value))
        else:
            this_mean = normalization_table[MEAN_VALUE_COLUMN].loc[
                field_names[j]]
            this_standard_deviation = normalization_table[
                STANDARD_DEVIATION_COLUMN].loc[field_names[j]]

            sounding_matrix[..., j] = this_mean + (this_standard_deviation *
                                                   sounding_matrix[..., j])

    return sounding_matrix
Exemple #15
0
def find_many_raw_files(
    desired_times_unix_sec,
    spc_date_strings,
    data_source,
    field_names,
    top_directory_name,
    reflectivity_heights_m_asl=None,
    max_time_offset_for_az_shear_sec=DEFAULT_MAX_TIME_OFFSET_FOR_AZ_SHEAR_SEC,
    max_time_offset_for_non_shear_sec=DEFAULT_MAX_TIME_OFFSET_FOR_NON_SHEAR_SEC
):
    """Finds raw file for each field/height pair and time step.

    N = number of input times
    T = number of unique input times
    F = number of field/height pairs

    :param desired_times_unix_sec: length-N numpy array with desired valid
        times.
    :param spc_date_strings: length-N list of corresponding SPC dates (format
        "yyyymmdd").
    :param data_source: Data source ("myrorss" or "mrms").
    :param field_names: 1-D list of field names.
    :param top_directory_name: Name of top-level directory with radar data from
        the given source.
    :param reflectivity_heights_m_asl: 1-D numpy array of heights (metres above
        sea level) for the field "reflectivity_dbz".  If "reflectivity_dbz" is
        not in `field_names`, leave this as None.
    :param max_time_offset_for_az_shear_sec: Max time offset (between desired
        and actual valid time) for azimuthal-shear fields.
    :param max_time_offset_for_non_shear_sec: Max time offset (between desired
        and actual valid time) for non-azimuthal-shear fields.
    :return: file_dictionary: Dictionary with the following keys.
    file_dictionary['radar_file_name_matrix']: T-by-F numpy array of paths to
        raw files.
    file_dictionary['unique_times_unix_sec']: length-T numpy array of unique
        valid times.
    file_dictionary['spc_date_strings_for_unique_times']: length-T numpy array
        of corresponding SPC dates.
    file_dictionary['field_name_by_pair']: length-F list of field names.
    file_dictionary['height_by_pair_m_asl']: length-F numpy array of heights
        (metres above sea level).
    """

    field_name_by_pair, height_by_pair_m_asl = (
        myrorss_and_mrms_utils.fields_and_refl_heights_to_pairs(
            field_names=field_names,
            data_source=data_source,
            refl_heights_m_asl=reflectivity_heights_m_asl))

    num_fields = len(field_name_by_pair)

    error_checking.assert_is_integer_numpy_array(desired_times_unix_sec)
    error_checking.assert_is_numpy_array(desired_times_unix_sec,
                                         num_dimensions=1)
    num_times = len(desired_times_unix_sec)

    error_checking.assert_is_string_list(spc_date_strings)
    error_checking.assert_is_numpy_array(numpy.array(spc_date_strings),
                                         exact_dimensions=numpy.array(
                                             [num_times]))

    spc_dates_unix_sec = numpy.array([
        time_conversion.spc_date_string_to_unix_sec(s)
        for s in spc_date_strings
    ])

    time_matrix = numpy.hstack(
        (numpy.reshape(desired_times_unix_sec, (num_times, 1)),
         numpy.reshape(spc_dates_unix_sec, (num_times, 1))))

    unique_time_matrix = numpy.vstack(
        {tuple(this_row)
         for this_row in time_matrix}).astype(int)

    unique_times_unix_sec = unique_time_matrix[:, 0]
    spc_dates_at_unique_times_unix_sec = unique_time_matrix[:, 1]

    sort_indices = numpy.argsort(unique_times_unix_sec)
    unique_times_unix_sec = unique_times_unix_sec[sort_indices]
    spc_dates_at_unique_times_unix_sec = spc_dates_at_unique_times_unix_sec[
        sort_indices]

    num_unique_times = len(unique_times_unix_sec)
    radar_file_name_matrix = numpy.full((num_unique_times, num_fields),
                                        '',
                                        dtype=object)

    for i in range(num_unique_times):
        this_spc_date_string = time_conversion.time_to_spc_date_string(
            spc_dates_at_unique_times_unix_sec[i])

        for j in range(num_fields):
            if field_name_by_pair[j] in AZIMUTHAL_SHEAR_FIELD_NAMES:
                this_max_time_offset_sec = max_time_offset_for_az_shear_sec
                this_raise_error_flag = False
            else:
                this_max_time_offset_sec = max_time_offset_for_non_shear_sec
                this_raise_error_flag = True

            if this_max_time_offset_sec == 0:
                radar_file_name_matrix[i, j] = find_raw_file(
                    unix_time_sec=unique_times_unix_sec[i],
                    spc_date_string=this_spc_date_string,
                    field_name=field_name_by_pair[j],
                    data_source=data_source,
                    top_directory_name=top_directory_name,
                    height_m_asl=height_by_pair_m_asl[j],
                    raise_error_if_missing=this_raise_error_flag)
            else:
                radar_file_name_matrix[i, j] = find_raw_file_inexact_time(
                    desired_time_unix_sec=unique_times_unix_sec[i],
                    spc_date_string=this_spc_date_string,
                    field_name=field_name_by_pair[j],
                    data_source=data_source,
                    top_directory_name=top_directory_name,
                    height_m_asl=height_by_pair_m_asl[j],
                    max_time_offset_sec=this_max_time_offset_sec,
                    raise_error_if_missing=this_raise_error_flag)

            if radar_file_name_matrix[i, j] is None:
                this_time_string = time_conversion.unix_sec_to_string(
                    unique_times_unix_sec[i], TIME_FORMAT_FOR_LOG_MESSAGES)

                warning_string = (
                    'Cannot find file for "{0:s}" at {1:d} metres ASL and '
                    '{2:s}.').format(field_name_by_pair[j],
                                     int(height_by_pair_m_asl[j]),
                                     this_time_string)

                warnings.warn(warning_string)

    return {
        RADAR_FILE_NAMES_KEY: radar_file_name_matrix,
        UNIQUE_TIMES_KEY: unique_times_unix_sec,
        SPC_DATES_AT_UNIQUE_TIMES_KEY: spc_dates_at_unique_times_unix_sec,
        FIELD_NAME_BY_PAIR_KEY: field_name_by_pair,
        HEIGHT_BY_PAIR_KEY: numpy.round(height_by_pair_m_asl).astype(int)
    }
Exemple #16
0
def run_sfs_on_sklearn_model(
        training_predictor_matrix, training_target_values,
        validation_predictor_matrix, validation_target_values, predictor_names,
        model_object, cost_function, min_loss_decrease=None,
        min_percentage_loss_decrease=None,
        num_steps_for_loss_decrease=DEFAULT_NUM_STEPS_FOR_LOSS_DECREASE):
    """Runs sequential forward selection (SFS) on scikit-learn model.

    T = number of training examples
    V = number of validation examples
    P = number of predictors

    :param training_predictor_matrix: T-by-P numpy array of predictor values.
    :param training_target_values: length-T numpy array of target values
        (integer class labels, since this method supports only classification).
    :param validation_predictor_matrix: V-by-P numpy array of predictor values.
    :param validation_target_values: length-V numpy array of target values.
    :param predictor_names: length-P list with names of predictor variables.
    :param model_object: Instance of scikit-learn model.  Must implement the
        methods `fit` and `predict_proba`.

    :param cost_function: Cost function (used to assess model on validation
        data).  Should have the following inputs and outputs.
    Input: target_values: Same as input `validation_target_values` for this
        method.
    Input: class_probability_matrix: V-by-K matrix of class probabilities, where
        K = number of classes.  class_probability_matrix[i, k] is the predicted
        probability that the [i]th example belongs to the [k]th class.
    Output: cost: Scalar value.

    :param min_loss_decrease: Used to determine stopping criterion.  If the loss
        has decreased by less than `min_loss_decrease` over the last
        `num_steps_for_loss_decrease` steps of sequential selection, the
        algorithm will stop.
    :param min_percentage_loss_decrease:
        [used only if `min_loss_decrease is None`]
        Used to determine stopping criterion.  If the loss has decreased by less
        than `min_percentage_loss_decrease` over the last
        `num_steps_for_loss_decrease` steps of sequential selection, the
        algorithm will stop.
    :param num_steps_for_loss_decrease: See above.

    :return: result_dict: See documentation for `run_sfs`.
    """

    # TODO(thunderhoser): This method does not involve deep learning, so
    # shouldn't really be in this file.

    # Check input args.
    error_checking.assert_is_numpy_array_without_nan(training_predictor_matrix)
    error_checking.assert_is_numpy_array(
        training_predictor_matrix, num_dimensions=2)

    num_training_examples = training_predictor_matrix.shape[0]
    num_predictors = training_predictor_matrix.shape[1]

    error_checking.assert_is_integer_numpy_array(training_target_values)
    error_checking.assert_is_geq_numpy_array(training_target_values, 0)
    error_checking.assert_is_numpy_array(
        training_target_values,
        exact_dimensions=numpy.array([num_training_examples])
    )

    error_checking.assert_is_numpy_array_without_nan(
        validation_predictor_matrix)
    num_validation_examples = validation_predictor_matrix.shape[0]
    error_checking.assert_is_numpy_array(
        validation_predictor_matrix,
        exact_dimensions=numpy.array([num_validation_examples, num_predictors])
    )

    error_checking.assert_is_integer_numpy_array(validation_target_values)
    error_checking.assert_is_geq_numpy_array(validation_target_values, 0)
    error_checking.assert_is_numpy_array(
        validation_target_values,
        exact_dimensions=numpy.array([num_validation_examples])
    )

    error_checking.assert_is_string_list(predictor_names)
    error_checking.assert_is_numpy_array(
        numpy.array(predictor_names),
        exact_dimensions=numpy.array([num_predictors])
    )

    # Create climatological model.
    num_classes = 1 + max(
        [numpy.max(training_target_values), numpy.max(validation_target_values)]
    )

    climo_validation_prob_matrix = numpy.full(
        (num_validation_examples, num_classes), numpy.nan)
    for k in range(num_classes):
        climo_validation_prob_matrix[..., k] = numpy.mean(
            training_target_values == k)

    climo_cost = cost_function(validation_target_values,
                               climo_validation_prob_matrix)

    print('Cost of climatological model: {0:.4e}\n'.format(climo_cost))

    # Do dirty work.
    remaining_predictor_names = predictor_names + []
    selected_predictor_name_by_step = []
    lowest_cost_by_step = []

    step_num = 0

    while len(remaining_predictor_names) > 0:
        print('\n')
        step_num += 1

        lowest_cost = numpy.inf
        best_predictor_name = None

        for this_predictor_name in remaining_predictor_names:
            print((
                'Trying predictor "{0:s}" at step {1:d} of SFS... '
            ).format(this_predictor_name, step_num))

            these_indices = [
                predictor_names.index(s)
                for s in selected_predictor_name_by_step
            ]
            these_indices.append(predictor_names.index(this_predictor_name))
            these_indices = numpy.array(these_indices, dtype=int)

            this_training_matrix = training_predictor_matrix[..., these_indices]
            this_validation_matrix = validation_predictor_matrix[
                ..., these_indices]

            new_model_object = sklearn.base.clone(model_object)
            new_model_object.fit(this_training_matrix, training_target_values)

            this_validation_prob_matrix = new_model_object.predict_proba(
                this_validation_matrix)
            this_cost = cost_function(validation_target_values,
                                      this_validation_prob_matrix)

            print('Validation loss after adding "{0:s}" = {1:.4e}\n'.format(
                this_predictor_name, this_cost))

            if this_cost > lowest_cost:
                continue

            lowest_cost = this_cost + 0.
            best_predictor_name = this_predictor_name + ''

        stopping_criterion = _eval_sfs_stopping_criterion(
            min_loss_decrease=min_loss_decrease,
            min_percentage_loss_decrease=min_percentage_loss_decrease,
            num_steps_for_loss_decrease=num_steps_for_loss_decrease,
            lowest_cost_by_step=lowest_cost_by_step + [lowest_cost])

        if stopping_criterion:
            break

        selected_predictor_name_by_step.append(best_predictor_name)
        lowest_cost_by_step.append(lowest_cost)
        remaining_predictor_names.remove(best_predictor_name)

        print('Best predictor = "{0:s}" ... new cost = {1:.4e}'.format(
            best_predictor_name, lowest_cost))
        print(SEPARATOR_STRING)

    return {
        MIN_DECREASE_KEY: min_loss_decrease,
        MIN_PERCENT_DECREASE_KEY: min_percentage_loss_decrease,
        NUM_STEPS_FOR_DECREASE_KEY: num_steps_for_loss_decrease,
        SELECTED_PREDICTORS_KEY: selected_predictor_name_by_step,
        LOWEST_COSTS_KEY: lowest_cost_by_step
    }
Exemple #17
0
def write_ungridded_predictions(
        netcdf_file_name, class_probability_matrix, storm_ids,
        storm_times_unix_sec, target_name, observed_labels=None):
    """Writes predictions to NetCDF file.

    K = number of classes
    E = number of examples (storm objects)

    :param netcdf_file_name: Path to output file.
    :param class_probability_matrix: E-by-K numpy array of forecast
        probabilities.
    :param storm_ids: length-E list of storm IDs (strings).
    :param storm_times_unix_sec: length-E numpy array of valid times.
    :param target_name: Name of target variable.
    :param observed_labels: [this may be None]
        length-E numpy array of observed labels (integers in 0...[K - 1]).
    """

    # Check input args.
    error_checking.assert_is_numpy_array(
        class_probability_matrix, num_dimensions=2)
    error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.)
    error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.)

    num_examples = class_probability_matrix.shape[0]
    these_expected_dim = numpy.array([num_examples], dtype=int)

    error_checking.assert_is_string_list(storm_ids)
    error_checking.assert_is_numpy_array(
        numpy.array(storm_ids), exact_dimensions=these_expected_dim)

    error_checking.assert_is_integer_numpy_array(storm_times_unix_sec)
    error_checking.assert_is_numpy_array(
        storm_times_unix_sec, exact_dimensions=these_expected_dim)

    target_val_utils.target_name_to_params(target_name)

    if observed_labels is not None:
        error_checking.assert_is_integer_numpy_array(observed_labels)
        error_checking.assert_is_numpy_array(
            observed_labels, exact_dimensions=these_expected_dim)

    # Write to NetCDF file.
    file_system_utils.mkdir_recursive_if_necessary(file_name=netcdf_file_name)
    dataset_object = netCDF4.Dataset(
        netcdf_file_name, 'w', format='NETCDF3_64BIT_OFFSET')

    dataset_object.setncattr(TARGET_NAME_KEY, target_name)
    dataset_object.createDimension(
        EXAMPLE_DIMENSION_KEY, class_probability_matrix.shape[0]
    )
    dataset_object.createDimension(
        CLASS_DIMENSION_KEY, class_probability_matrix.shape[1]
    )

    if num_examples == 0:
        num_id_characters = 1
    else:
        num_id_characters = 1 + numpy.max(numpy.array([
            len(s) for s in storm_ids
        ]))

    dataset_object.createDimension(STORM_ID_CHAR_DIM_KEY, num_id_characters)

    # Add storm IDs.
    this_string_format = 'S{0:d}'.format(num_id_characters)
    storm_ids_char_array = netCDF4.stringtochar(numpy.array(
        storm_ids, dtype=this_string_format
    ))

    dataset_object.createVariable(
        STORM_IDS_KEY, datatype='S1',
        dimensions=(EXAMPLE_DIMENSION_KEY, STORM_ID_CHAR_DIM_KEY)
    )
    dataset_object.variables[STORM_IDS_KEY][:] = numpy.array(
        storm_ids_char_array)

    # Add storm times.
    dataset_object.createVariable(
        STORM_TIMES_KEY, datatype=numpy.int32, dimensions=EXAMPLE_DIMENSION_KEY
    )
    dataset_object.variables[STORM_TIMES_KEY][:] = storm_times_unix_sec

    # Add probabilities.
    dataset_object.createVariable(
        PROBABILITY_MATRIX_KEY, datatype=numpy.float32,
        dimensions=(EXAMPLE_DIMENSION_KEY, CLASS_DIMENSION_KEY)
    )
    dataset_object.variables[PROBABILITY_MATRIX_KEY][:] = (
        class_probability_matrix
    )

    if observed_labels is not None:
        dataset_object.createVariable(
            OBSERVED_LABELS_KEY, datatype=numpy.int32,
            dimensions=EXAMPLE_DIMENSION_KEY
        )
        dataset_object.variables[OBSERVED_LABELS_KEY][:] = observed_labels

    dataset_object.close()
Exemple #18
0
def write_standard_file(pickle_file_name,
                        denorm_predictor_matrices,
                        cam_matrices,
                        guided_cam_matrices,
                        full_storm_id_strings,
                        storm_times_unix_sec,
                        model_file_name,
                        target_class,
                        target_layer_name,
                        sounding_pressure_matrix_pa=None):
    """Writes class-activation maps (one per storm object) to Pickle file.

    E = number of examples (storm objects)
    H = number of sounding heights

    :param pickle_file_name: Path to output file.
    :param denorm_predictor_matrices: See doc for `_check_in_and_out_matrices`.
    :param cam_matrices: Same.
    :param guided_cam_matrices: Same.
    :param full_storm_id_strings: length-E list of storm IDs.
    :param storm_times_unix_sec: length-E numpy array of storm times.
    :param model_file_name: Path to model that created saliency maps (readable
        by `cnn.read_model`).
    :param target_class: Target class.  `cam_matrices` and `guided_cam_matrices`
        contain activations for the [k + 1]th class, where k = `target_class`.
    :param target_layer_name: Name of target layer.
    :param sounding_pressure_matrix_pa: E-by-H numpy array of pressure
        levels.  Needed only if the model is trained with soundings but without
        pressure as a predictor.
    """

    error_checking.assert_is_string(model_file_name)
    error_checking.assert_is_integer(target_class)
    error_checking.assert_is_geq(target_class, 0)
    error_checking.assert_is_string(target_layer_name)

    error_checking.assert_is_string_list(full_storm_id_strings)
    error_checking.assert_is_numpy_array(numpy.array(full_storm_id_strings),
                                         num_dimensions=1)

    num_examples = len(full_storm_id_strings)
    these_expected_dim = numpy.array([num_examples], dtype=int)

    error_checking.assert_is_integer_numpy_array(storm_times_unix_sec)
    error_checking.assert_is_numpy_array(storm_times_unix_sec,
                                         exact_dimensions=these_expected_dim)

    _check_in_and_out_matrices(predictor_matrices=denorm_predictor_matrices,
                               num_examples=num_examples,
                               cam_matrices=cam_matrices,
                               guided_cam_matrices=guided_cam_matrices)

    if sounding_pressure_matrix_pa is not None:
        error_checking.assert_is_numpy_array_without_nan(
            sounding_pressure_matrix_pa)
        error_checking.assert_is_greater_numpy_array(
            sounding_pressure_matrix_pa, 0.)
        error_checking.assert_is_numpy_array(sounding_pressure_matrix_pa,
                                             num_dimensions=2)

        these_expected_dim = numpy.array(
            (num_examples, ) + sounding_pressure_matrix_pa.shape[1:],
            dtype=int)
        error_checking.assert_is_numpy_array(
            sounding_pressure_matrix_pa, exact_dimensions=these_expected_dim)

    gradcam_dict = {
        PREDICTOR_MATRICES_KEY: denorm_predictor_matrices,
        CAM_MATRICES_KEY: cam_matrices,
        GUIDED_CAM_MATRICES_KEY: guided_cam_matrices,
        MODEL_FILE_KEY: model_file_name,
        FULL_STORM_IDS_KEY: full_storm_id_strings,
        STORM_TIMES_KEY: storm_times_unix_sec,
        TARGET_CLASS_KEY: target_class,
        TARGET_LAYER_KEY: target_layer_name,
        SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pa
    }

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(gradcam_dict, pickle_file_handle)
    pickle_file_handle.close()
Exemple #19
0
def download_files_via_http(online_file_names,
                            local_file_names,
                            user_name=None,
                            password=None,
                            host_name=None,
                            raise_error_if_fails=True):
    """Downloads files via HTTP.
    N = number of files to download
    :param online_file_names: length-N list of URLs.  Example:
        "https://nomads.ncdc.noaa.gov/data/narr/201212/20121212/
        narr-a_221_20121212_1200_000.grb"
    :param local_file_names: length-N list of target paths on local machine (to
        which files will be downloaded).
    :param user_name: User name on HTTP server.  To login anonymously, leave
        this as None.
    :param password: Password on HTTP server.  To login anonymously, leave
        this as None.
    :param host_name: Host name (base URL name) for HTTP server.  Example:
        "https://nomads.ncdc.noaa.gov"
    :param raise_error_if_fails: Boolean flag.  If True and download fails, this
        method will raise an error.
    :return: local_file_names: Same as input, except that if download failed for
        the [i]th file, local_file_names[i] = None.
    :raises: ValueError: if download failed and raise_error_if_fails = True.
    :raises: urllib2.HTTPError: if download failed for any reason not in
        `ACCEPTABLE_HTTP_ERROR_CODES` or `ACCEPTABLE_URL_ERROR_CODES`.  This
        error will be raised regardless of the flag `raise_error_if_fails`.
    """

    if not (user_name is None or password is None):
        error_checking.assert_is_string(user_name)
        error_checking.assert_is_string(password)
        error_checking.assert_is_string(host_name)

        manager_object = urllib.request.HTTPPasswordMgrWithDefaultRealm()
        manager_object.add_password(realm=None,
                                    uri=host_name,
                                    user=user_name,
                                    passwd=password)

        authentication_handler = urllib.request.HTTPBasicAuthHandler(
            manager_object)
        opener_object = urllib.request.build_opener(authentication_handler)
        urllib.request.install_opener(opener_object)

    error_checking.assert_is_string_list(online_file_names)
    error_checking.assert_is_numpy_array(numpy.asarray(online_file_names),
                                         num_dimensions=1)
    num_files = len(online_file_names)

    error_checking.assert_is_string_list(local_file_names)
    error_checking.assert_is_numpy_array(numpy.asarray(online_file_names),
                                         exact_dimensions=numpy.array(
                                             [num_files]))

    error_checking.assert_is_boolean(raise_error_if_fails)

    for i in range(num_files):
        this_download_succeeded = False
        this_response_object = None

        try:
            this_response_object = urllib.request.urlopen(online_file_names[i])
            this_download_succeeded = True

        except urllib.error.HTTPError as this_error:
            if (raise_error_if_fails
                    or this_error.code not in ACCEPTABLE_HTTP_ERROR_CODES):
                raise

        except urllib.error.URLError as this_error:
            error_words = this_error.reason.split()
            acceptable_error_flags = numpy.array(
                [w in str(ACCEPTABLE_URL_ERROR_CODES) for w in error_words],
                dtype=bool)

            if raise_error_if_fails or not numpy.any(acceptable_error_flags):
                raise

        if not this_download_succeeded:
            warnings.warn('Could not download file: {0:s}'.format(
                online_file_names[i]))

            local_file_names[i] = None
            continue

        file_system_utils.mkdir_recursive_if_necessary(
            file_name=local_file_names[i])

        with open(local_file_names[i], 'wb') as this_file_handle:
            while True:
                this_chunk = this_response_object.read(NUM_BYTES_PER_BLOCK)
                if not this_chunk:
                    break

                this_file_handle.write(this_chunk)

        if not os.path.isfile(local_file_names[i]):
            error_string = (
                'Could not download file.  Local file expected at: "{0:s}"'
            ).format(local_file_names[i])

            if raise_error_if_fails:
                raise ValueError(error_string)

            warnings.warn(error_string)
            local_file_names[i] = None

    return local_file_names
Exemple #20
0
def write_standard_file(pickle_file_name,
                        init_function_name_or_matrices,
                        list_of_optimized_matrices,
                        model_file_name,
                        num_iterations,
                        learning_rate,
                        component_type_string,
                        target_class=None,
                        layer_name=None,
                        neuron_indices=None,
                        channel_index=None,
                        ideal_activation=None,
                        storm_ids=None,
                        storm_times_unix_sec=None):
    """Writes optimized learning examples to Pickle file.

    E = number of examples (storm objects)

    :param pickle_file_name: Path to output file.
    :param init_function_name_or_matrices: See doc for `_do_gradient_descent`.
        The only difference here is that, if a function was used, the input
        argument must be the function *name* rather than the function itself.
    :param list_of_optimized_matrices: List of numpy arrays created by
        `_do_gradient_descent`.
    :param model_file_name: Path to file with trained model (readable by
        `cnn.read_model`).
    :param num_iterations: See doc for `_do_gradient_descent`.
    :param learning_rate: Same.
    :param component_type_string: See doc for
        `model_interpretation.check_component_metadata`.
    :param target_class: Same.
    :param layer_name: Same.
    :param neuron_indices: Same.
    :param channel_index: Same.
    :param ideal_activation: See doc for `optimize_input_for_neuron` or
        `optimize_input_for_channel`.
    :param storm_ids:
        [used only if `init_function_name_or_matrices` is list of matrices]
        length-E list of storm IDs (strings).
    :param storm_times_unix_sec:
        [used only if `init_function_name_or_matrices` is list of matrices]
        length-E numpy array of storm times.
    :raises: ValueError: if `init_function_name_or_matrices` is a list of numpy
        arrays and has a different length than `list_of_optimized_matrices`.
    """

    model_interpretation.check_component_metadata(
        component_type_string=component_type_string,
        target_class=target_class,
        layer_name=layer_name,
        neuron_indices=neuron_indices,
        channel_index=channel_index)

    _check_input_args(num_iterations=num_iterations,
                      learning_rate=learning_rate,
                      ideal_activation=ideal_activation)

    error_checking.assert_is_string(model_file_name)
    error_checking.assert_is_list(list_of_optimized_matrices)

    if isinstance(init_function_name_or_matrices, str):
        num_storm_objects = None
    else:
        num_init_matrices = len(init_function_name_or_matrices)
        num_optimized_matrices = len(list_of_optimized_matrices)

        if num_init_matrices != num_optimized_matrices:
            error_string = (
                'Number of input matrices ({0:d}) should equal number of output'
                ' matrices ({1:d}).').format(num_init_matrices,
                                             num_optimized_matrices)

            raise ValueError(error_string)

        error_checking.assert_is_string_list(storm_ids)
        error_checking.assert_is_numpy_array(numpy.array(storm_ids),
                                             num_dimensions=1)

        num_storm_objects = len(storm_ids)
        these_expected_dim = numpy.array([num_storm_objects], dtype=int)

        error_checking.assert_is_integer_numpy_array(storm_times_unix_sec)
        error_checking.assert_is_numpy_array(
            storm_times_unix_sec, exact_dimensions=these_expected_dim)

    num_matrices = len(list_of_optimized_matrices)

    for i in range(num_matrices):
        error_checking.assert_is_numpy_array_without_nan(
            list_of_optimized_matrices[i])

        if num_storm_objects is not None:
            these_expected_dim = numpy.array(
                (num_storm_objects, ) +
                list_of_optimized_matrices[i].shape[1:],
                dtype=int)
            error_checking.assert_is_numpy_array(
                list_of_optimized_matrices[i],
                exact_dimensions=these_expected_dim)

        if not isinstance(init_function_name_or_matrices, str):
            error_checking.assert_is_numpy_array_without_nan(
                init_function_name_or_matrices[i])

            these_expected_dim = numpy.array(
                list_of_optimized_matrices[i].shape, dtype=int)

            error_checking.assert_is_numpy_array(
                init_function_name_or_matrices[i],
                exact_dimensions=these_expected_dim)

    optimization_dict = {
        INIT_FUNCTION_KEY: init_function_name_or_matrices,
        OPTIMIZED_MATRICES_KEY: list_of_optimized_matrices,
        MODEL_FILE_NAME_KEY: model_file_name,
        NUM_ITERATIONS_KEY: num_iterations,
        LEARNING_RATE_KEY: learning_rate,
        COMPONENT_TYPE_KEY: component_type_string,
        TARGET_CLASS_KEY: target_class,
        LAYER_NAME_KEY: layer_name,
        IDEAL_ACTIVATION_KEY: ideal_activation,
        NEURON_INDICES_KEY: neuron_indices,
        CHANNEL_INDEX_KEY: channel_index,
        STORM_IDS_KEY: storm_ids,
        STORM_TIMES_KEY: storm_times_unix_sec
    }

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(optimization_dict, pickle_file_handle)
    pickle_file_handle.close()
def _run(evaluation_file_names, line_styles, line_colour_strings,
         set_descriptions_verbose, confidence_level, use_log_scale,
         plot_by_height, output_dir_name):
    """Plots model evaluation.

    This is effectively the main method.

    :param evaluation_file_names: See documentation at top of file.
    :param line_styles: Same.
    :param line_colour_strings: Same.
    :param set_descriptions_verbose: Same.
    :param confidence_level: Same.
    :param use_log_scale: Same.
    :param plot_by_height: Same.
    :param output_dir_name: Same.
    """

    # Check input args.
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    if confidence_level < 0:
        confidence_level = None

    if confidence_level is not None:
        error_checking.assert_is_geq(confidence_level, 0.9)
        error_checking.assert_is_less_than(confidence_level, 1.)

    num_evaluation_sets = len(evaluation_file_names)
    expected_dim = numpy.array([num_evaluation_sets], dtype=int)

    error_checking.assert_is_string_list(line_styles)
    error_checking.assert_is_numpy_array(numpy.array(line_styles),
                                         exact_dimensions=expected_dim)

    error_checking.assert_is_string_list(set_descriptions_verbose)
    error_checking.assert_is_numpy_array(numpy.array(set_descriptions_verbose),
                                         exact_dimensions=expected_dim)

    set_descriptions_verbose = [
        s.replace('_', ' ') for s in set_descriptions_verbose
    ]
    set_descriptions_abbrev = [
        s.lower().replace(' ', '-') for s in set_descriptions_verbose
    ]

    error_checking.assert_is_string_list(line_colour_strings)
    error_checking.assert_is_numpy_array(numpy.array(line_colour_strings),
                                         exact_dimensions=expected_dim)
    line_colours = [
        numpy.fromstring(s, dtype=float, sep='_') / 255
        for s in line_colour_strings
    ]

    for i in range(num_evaluation_sets):
        error_checking.assert_is_numpy_array(line_colours[i],
                                             exact_dimensions=numpy.array(
                                                 [3], dtype=int))
        error_checking.assert_is_geq_numpy_array(line_colours[i], 0.)
        error_checking.assert_is_leq_numpy_array(line_colours[i], 1.)

    # Read files.
    evaluation_tables_xarray = [xarray.Dataset()] * num_evaluation_sets
    prediction_dicts = [dict()] * num_evaluation_sets

    for i in range(num_evaluation_sets):
        print('Reading data from: "{0:s}"...'.format(evaluation_file_names[i]))
        evaluation_tables_xarray[i] = evaluation.read_file(
            evaluation_file_names[i])

        this_prediction_file_name = (
            evaluation_tables_xarray[i].attrs[evaluation.PREDICTION_FILE_KEY])

        print(
            'Reading data from: "{0:s}"...'.format(this_prediction_file_name))
        prediction_dicts[i] = prediction_io.read_file(
            this_prediction_file_name)

    model_file_name = (
        evaluation_tables_xarray[0].attrs[evaluation.MODEL_FILE_KEY])
    model_metafile_name = neural_net.find_metafile(
        model_dir_name=os.path.split(model_file_name)[0],
        raise_error_if_missing=True)

    print('Reading metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = neural_net.read_metafile(model_metafile_name)
    generator_option_dict = model_metadata_dict[
        neural_net.TRAINING_OPTIONS_KEY]

    scalar_target_names = (
        generator_option_dict[neural_net.SCALAR_TARGET_NAMES_KEY])
    vector_target_names = (
        generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY])
    heights_m_agl = generator_option_dict[neural_net.HEIGHTS_KEY]

    try:
        t = evaluation_tables_xarray[0]
        aux_target_names = t.coords[evaluation.AUX_TARGET_FIELD_DIM].values
    except:
        aux_target_names = []

    num_scalar_targets = len(scalar_target_names)
    num_vector_targets = len(vector_target_names)
    num_heights = len(heights_m_agl)
    num_aux_targets = len(aux_target_names)

    example_dict = {
        example_utils.SCALAR_TARGET_NAMES_KEY:
        scalar_target_names,
        example_utils.VECTOR_TARGET_NAMES_KEY:
        vector_target_names,
        example_utils.HEIGHTS_KEY:
        heights_m_agl,
        example_utils.SCALAR_PREDICTOR_NAMES_KEY:
        generator_option_dict[neural_net.SCALAR_PREDICTOR_NAMES_KEY],
        example_utils.VECTOR_PREDICTOR_NAMES_KEY:
        generator_option_dict[neural_net.VECTOR_PREDICTOR_NAMES_KEY]
    }

    normalization_file_name = (
        generator_option_dict[neural_net.NORMALIZATION_FILE_KEY])
    print(('Reading training examples (for climatology) from: "{0:s}"...'
           ).format(normalization_file_name))

    training_example_dict = example_io.read_file(normalization_file_name)
    training_example_dict = example_utils.subset_by_height(
        example_dict=training_example_dict, heights_m_agl=heights_m_agl)
    mean_training_example_dict = normalization.create_mean_example(
        new_example_dict=example_dict,
        training_example_dict=training_example_dict)

    print(SEPARATOR_STRING)

    # Do actual stuff.
    _plot_error_distributions(
        prediction_dicts=prediction_dicts,
        model_metadata_dict=model_metadata_dict,
        aux_target_names=aux_target_names,
        set_descriptions_abbrev=set_descriptions_abbrev,
        set_descriptions_verbose=set_descriptions_verbose,
        output_dir_name=output_dir_name)
    print(SEPARATOR_STRING)

    _plot_reliability_by_height(
        evaluation_tables_xarray=evaluation_tables_xarray,
        vector_target_names=vector_target_names,
        heights_m_agl=heights_m_agl,
        set_descriptions_abbrev=set_descriptions_abbrev,
        set_descriptions_verbose=set_descriptions_verbose,
        output_dir_name=output_dir_name)
    print(SEPARATOR_STRING)

    for k in range(num_vector_targets):
        for this_score_name in list(SCORE_NAME_TO_PROFILE_KEY.keys()):
            _plot_score_profile(
                evaluation_tables_xarray=evaluation_tables_xarray,
                line_styles=line_styles,
                line_colours=line_colours,
                set_descriptions_verbose=set_descriptions_verbose,
                confidence_level=confidence_level,
                target_name=vector_target_names[k],
                score_name=this_score_name,
                use_log_scale=use_log_scale,
                output_dir_name=output_dir_name)

    print(SEPARATOR_STRING)

    for k in range(num_scalar_targets):
        _plot_attributes_diagram(
            evaluation_tables_xarray=evaluation_tables_xarray,
            line_styles=line_styles,
            line_colours=line_colours,
            set_descriptions_abbrev=set_descriptions_abbrev,
            set_descriptions_verbose=set_descriptions_verbose,
            confidence_level=confidence_level,
            mean_training_example_dict=mean_training_example_dict,
            target_name=scalar_target_names[k],
            output_dir_name=output_dir_name)

    for k in range(num_aux_targets):
        _plot_attributes_diagram(
            evaluation_tables_xarray=evaluation_tables_xarray,
            line_styles=line_styles,
            line_colours=line_colours,
            set_descriptions_abbrev=set_descriptions_abbrev,
            set_descriptions_verbose=set_descriptions_verbose,
            confidence_level=confidence_level,
            mean_training_example_dict=mean_training_example_dict,
            target_name=aux_target_names[k],
            output_dir_name=output_dir_name)

    if not plot_by_height:
        return

    print(SEPARATOR_STRING)

    for k in range(num_vector_targets):
        for j in range(num_heights):
            _plot_attributes_diagram(
                evaluation_tables_xarray=evaluation_tables_xarray,
                line_styles=line_styles,
                line_colours=line_colours,
                set_descriptions_abbrev=set_descriptions_abbrev,
                set_descriptions_verbose=set_descriptions_verbose,
                confidence_level=confidence_level,
                mean_training_example_dict=mean_training_example_dict,
                height_m_agl=heights_m_agl[j],
                target_name=vector_target_names[k],
                output_dir_name=output_dir_name)

        if k != num_vector_targets - 1:
            print(SEPARATOR_STRING)
Exemple #22
0
    def test_assert_is_string_list_true(self):
        """Checks assert_is_string_list when input is string list."""

        error_checking.assert_is_string_list(STRING_LIST)
def write_target_values(storm_to_events_table, target_names, netcdf_file_name):
    """Writes target values to NetCDF file.

    :param storm_to_events_table: pandas DataFrame created by
        `create_wind_regression_targets`, `create_wind_classification_targets`,
        or `create_tornado_targets`.
    :param target_names: 1-D list with names of target variables to write.  Each
        name must be a column in `storm_to_events_table`.
    :param netcdf_file_name: Path to output file.
    :raises: ValueError: if any item in `target_names` is not a valid name.
    """

    error_checking.assert_is_string_list(target_names)
    error_checking.assert_is_numpy_array(
        numpy.array(target_names), num_dimensions=1
    )

    for this_target_name in target_names:
        this_param_dict = target_name_to_params(this_target_name)
        if this_param_dict is not None:
            continue

        error_string = (
            '"{0:s}" is not a valid name for a target variable.'
        ).format(this_target_name)

        raise ValueError(error_string)

    file_system_utils.mkdir_recursive_if_necessary(file_name=netcdf_file_name)
    netcdf_dataset = netCDF4.Dataset(
        netcdf_file_name, 'w', format='NETCDF3_64BIT_OFFSET')

    full_id_strings = storm_to_events_table[
        tracking_utils.FULL_ID_COLUMN].values

    num_storm_objects = len(full_id_strings)
    num_id_characters = 0

    for i in range(num_storm_objects):
        num_id_characters = max([
            num_id_characters, len(full_id_strings[i])
        ])

    netcdf_dataset.createDimension(
        STORM_OBJECT_DIMENSION_KEY, num_storm_objects)
    netcdf_dataset.createDimension(CHARACTER_DIMENSION_KEY, num_id_characters)

    netcdf_dataset.createVariable(
        FULL_IDS_KEY, datatype='S1',
        dimensions=(STORM_OBJECT_DIMENSION_KEY, CHARACTER_DIMENSION_KEY)
    )

    string_type = 'S{0:d}'.format(num_id_characters)
    full_ids_char_array = netCDF4.stringtochar(numpy.array(
        full_id_strings, dtype=string_type
    ))
    netcdf_dataset.variables[FULL_IDS_KEY][:] = numpy.array(full_ids_char_array)

    netcdf_dataset.createVariable(
        VALID_TIMES_KEY, datatype=numpy.int32,
        dimensions=STORM_OBJECT_DIMENSION_KEY
    )
    netcdf_dataset.variables[VALID_TIMES_KEY][:] = storm_to_events_table[
        tracking_utils.VALID_TIME_COLUMN].values

    for this_target_name in target_names:
        netcdf_dataset.createVariable(
            this_target_name, datatype=numpy.float32,
            dimensions=STORM_OBJECT_DIMENSION_KEY
        )
        netcdf_dataset.variables[this_target_name][:] = storm_to_events_table[
            this_target_name].values

    netcdf_dataset.close()
def plot_predictors(
        example_dict, example_index, predictor_names, predictor_colours,
        predictor_line_widths, predictor_line_styles, use_log_scale,
        include_units=True, handle_dict=None):
    """Plots several predictors on the same set of axes.

    P = number of predictors to plot (must all be profiles)

    :param example_dict: See doc for `example_io.read_file`.
    :param example_index: Will plot the [i]th example, where
        i = `example_index`.
    :param predictor_names: length-P list with names of predictors to plot.
    :param predictor_colours: length-P list of colours (each colour in any
        format accepted by matplotlib).
    :param predictor_line_widths: length-P numpy array of line widths.
    :param predictor_line_styles: length-P list of line styles (each style in
        any format accepted by matplotlib).
    :param use_log_scale: Boolean flag.  If True, will plot height (y-axis) in
        logarithmic scale.  If False, will plot height in linear scale.
    :param include_units: Boolean flag.  If True, axis titles will include units
        and values will be converted from default to plotting units.  If False,
        axis titles will *not* include units and this method will *not* convert
        units.
    :param handle_dict: See output doc.  If None, will create new figure on the
        fly.
    :return: handle_dict: Dictionary with the following keys.
    handle_dict['figure_object']: Figure handle (instance of
        `matplotlib.figure.Figure`).
    handle_dict['axes_objects']: length-P list of axes handles (each an instance
        of `matplotlib.axes._subplots.AxesSubplot`).
    """

    # Check input args.
    error_checking.assert_is_integer(example_index)
    error_checking.assert_is_geq(example_index, 0)
    error_checking.assert_is_boolean(use_log_scale)
    error_checking.assert_is_boolean(include_units)

    error_checking.assert_is_string_list(predictor_names)
    num_predictors = len(predictor_names)
    error_checking.assert_is_leq(num_predictors, 4)

    for k in range(num_predictors):
        assert predictor_names[k] in example_utils.ALL_PREDICTOR_NAMES
        # assert predictor_names[k] in example_utils.ALL_VECTOR_PREDICTOR_NAMES

    assert len(predictor_colours) == num_predictors
    assert len(predictor_line_widths) == num_predictors
    assert len(predictor_line_styles) == num_predictors

    # Housekeeping.
    _set_font_size(FANCY_FONT_SIZE)

    if handle_dict is None:
        figure_object, first_axes_object = pyplot.subplots(
            1, 1,
            figsize=(FANCY_FIGURE_WIDTH_INCHES, FANCY_FIGURE_HEIGHT_INCHES)
        )

        axes_objects = [first_axes_object]
        figure_object.subplots_adjust(bottom=0.75)

        if use_log_scale:
            pyplot.yscale('log')

        for k in range(1, num_predictors):
            axes_objects.append(axes_objects[0].twiny())

            if k == 2:
                axes_objects[k].spines['top'].set_position(('axes', 1.15))
                _make_spines_invisible(axes_objects[k])
                axes_objects[k].spines['top'].set_visible(True)

            if k == 3:
                axes_objects[k].xaxis.set_ticks_position('bottom')
                axes_objects[k].xaxis.set_label_position('bottom')
                axes_objects[k].spines['bottom'].set_position(('axes', -0.15))
                _make_spines_invisible(axes_objects[k])
                axes_objects[k].spines['bottom'].set_visible(True)
    else:
        figure_object = handle_dict[FIGURE_HANDLE_KEY]
        axes_objects = handle_dict[AXES_OBJECTS_KEY]

    heights_km_agl = METRES_TO_KM * example_dict[example_utils.HEIGHTS_KEY]
    tick_mark_dict = dict(size=4, width=1.5)

    for k in range(num_predictors):
        if predictor_names[k] in example_utils.ALL_SCALAR_PREDICTOR_NAMES:

            # TODO(thunderhoser): This is a HACK to deal with saliency maps.
            j = example_dict[example_utils.SCALAR_PREDICTOR_NAMES_KEY].index(
                predictor_names[k]
            )
            these_predictor_values = (
                example_dict[example_utils.SCALAR_PREDICTOR_VALS_KEY][
                    example_index, :, j
                ]
            )
        else:
            these_predictor_values = example_utils.get_field_from_dict(
                example_dict=example_dict, field_name=predictor_names[k]
            )[example_index, ...]

        if include_units:
            if predictor_names[k] == example_utils.TEMPERATURE_NAME:
                these_predictor_values = temperature_conv.kelvins_to_celsius(
                    these_predictor_values
                )
            else:
                these_predictor_values = (
                    PREDICTOR_NAME_TO_CONV_FACTOR[predictor_names[k]] *
                    these_predictor_values
                )

        axes_objects[k].plot(
            these_predictor_values, heights_km_agl, color=predictor_colours[k],
            linewidth=predictor_line_widths[k],
            linestyle=predictor_line_styles[k]
        )

        x_label_string = copy.deepcopy(
            PREDICTOR_NAME_TO_VERBOSE[predictor_names[k]]
        )
        if not include_units:
            x_label_string = x_label_string.split(' (')[0]

        axes_objects[k].set_xlabel(x_label_string)
        axes_objects[k].xaxis.label.set_color(predictor_colours[k])
        axes_objects[k].tick_params(
            axis='x', colors=predictor_colours[k], **tick_mark_dict
        )

    axes_objects[0].set_ylabel('Height (km AGL)')
    axes_objects[0].set_ylim([
        numpy.min(heights_km_agl), numpy.max(heights_km_agl)
    ])

    height_strings = create_height_labels(
        tick_values_km_agl=axes_objects[0].get_yticks(),
        use_log_scale=use_log_scale
    )
    axes_objects[0].set_yticklabels(height_strings)
    axes_objects[0].tick_params(axis='y', **tick_mark_dict)

    return {
        FIGURE_HANDLE_KEY: figure_object,
        AXES_OBJECTS_KEY: axes_objects
    }
def unzip_1day_tar_file(
        tar_file_name, field_names, spc_date_string, top_target_directory_name,
        refl_heights_m_asl=None):
    """Unzips 1-day tar file (containing raw MYRORSS data for one SPC date).

    :param tar_file_name: Path to input file.
    :param field_names: 1-D list with names of radar fields.
    :param spc_date_string: SPC date (format "yyyymmdd").
    :param top_target_directory_name: Name of top-level directory for unzipped
        MYRORSS files.  This method will create a subdirectory therein for the
        SPC date.
    :param refl_heights_m_asl: 1-D numpy array of reflectivity heights (metres
        above sea level).
    :return: target_directory_name: Path to output directory.
    """

    # Verification.
    _ = time_conversion.spc_date_string_to_unix_sec(spc_date_string)
    error_checking.assert_is_string_list(field_names)
    error_checking.assert_is_numpy_array(
        numpy.asarray(field_names), num_dimensions=1)
    error_checking.assert_is_string(top_target_directory_name)

    # Put azimuthal-shear fields (which are allowed to be missing) at the end.
    # This way, if the tar command errors out due to missing data, it will do so
    # after unzipping all the non-missing data.
    field_names_removed = []
    for this_field_name in AZIMUTHAL_RADAR_FIELD_NAMES:
        if this_field_name in field_names:
            field_names.remove(this_field_name)
            field_names_removed.append(this_field_name)

    for this_field_name in field_names_removed:
        field_names.append(this_field_name)

    field_to_heights_dict_m_asl = (
        myrorss_and_mrms_utils.fields_and_refl_heights_to_dict(
            field_names=field_names, data_source=radar_utils.MYRORSS_SOURCE_ID,
            refl_heights_m_asl=refl_heights_m_asl))

    target_directory_name = '{0:s}/{1:s}/{2:s}'.format(
        top_target_directory_name, spc_date_string[:4], spc_date_string
    )

    field_names = list(field_to_heights_dict_m_asl.keys())
    directory_names_to_unzip = []

    for this_field_name in field_names:
        these_heights_m_asl = field_to_heights_dict_m_asl[this_field_name]

        for this_height_m_asl in these_heights_m_asl:
            directory_names_to_unzip.append(
                myrorss_and_mrms_io.get_relative_dir_for_raw_files(
                    field_name=this_field_name,
                    data_source=radar_utils.MYRORSS_SOURCE_ID,
                    height_m_asl=this_height_m_asl))

    unzipping.unzip_tar(
        tar_file_name,
        target_directory_name=target_directory_name,
        file_and_dir_names_to_unzip=directory_names_to_unzip)

    return target_directory_name
def read_target_values(netcdf_file_name, target_names=None):
    """Reads target values from NetCDF file.

    E = number of examples (storm objects)
    T = number of target variables

    :param netcdf_file_name: Path to input file.
    :param target_names: 1-D list with names of target variables to read.  If
        None, will read all target variables.
    :return: storm_label_dict: Dictionary with the following keys.
    storm_label_dict['full_id_strings']: length-E list of full storm IDs.
    storm_label_dict['valid_times_unix_sec']: length-E numpy array of valid
        times.
    storm_label_dict['target_names']: length-T list with names of target
        variables.
    storm_label_dict['target_matrix']: E-by-T of target values (integer class
        labels).
    """

    netcdf_dataset = netcdf_io.open_netcdf(
        netcdf_file_name=netcdf_file_name, raise_error_if_fails=True)

    try:
        full_id_strings = netCDF4.chartostring(
            netcdf_dataset.variables[FULL_IDS_KEY][:]
        )
    except KeyError:
        full_id_strings = netCDF4.chartostring(
            netcdf_dataset.variables['storm_ids'][:]
        )

    valid_times_unix_sec = numpy.array(
        netcdf_dataset.variables[VALID_TIMES_KEY][:], dtype=int
    )

    if target_names is None:
        target_names = list(netcdf_dataset.variables.keys())
        target_names.remove(FULL_IDS_KEY)
        target_names.remove(VALID_TIMES_KEY)

    error_checking.assert_is_string_list(target_names)
    error_checking.assert_is_numpy_array(
        numpy.array(target_names), num_dimensions=1
    )

    num_storm_objects = len(full_id_strings)
    target_matrix = None

    for this_target_name in target_names:
        these_target_values = numpy.array(
            netcdf_dataset.variables[this_target_name][:], dtype=int
        )

        these_target_values = numpy.reshape(
            these_target_values, (num_storm_objects, 1)
        )

        if target_matrix is None:
            target_matrix = these_target_values + 0
        else:
            target_matrix = numpy.concatenate(
                (target_matrix, these_target_values), axis=1
            )

    netcdf_dataset.close()

    return {
        FULL_IDS_KEY: [str(f) for f in full_id_strings],
        VALID_TIMES_KEY: valid_times_unix_sec,
        TARGET_NAMES_KEY: target_names,
        TARGET_MATRIX_KEY: target_matrix
    }
Exemple #27
0
def _check_input_args(
        list_of_training_matrices, training_target_values,
        list_of_validation_matrices, validation_target_values,
        predictor_names_by_matrix):
    """Error-checks input arguments for sequential selection.

    N = number of input matrices
    T = number of training examples
    V = number of validation examples
    C_q = number of channels (predictors) in the [q]th matrix

    :param list_of_training_matrices: length-N list of matrices (numpy arrays).
        The first axis of each matrix should have length T.
    :param training_target_values: length-T numpy array of target values
        (integer class labels).
    :param list_of_validation_matrices: length-N list of numpy arrays.  The
        first axis of each matrix should have length V; otherwise,
        list_of_validation_matrices[q] should have the same dimensions as
        list_of_training_matrices[q].
    :param validation_target_values: length-V numpy array of target values
        (integer class labels).
    :param predictor_names_by_matrix: length-N list of lists.  The [q]th list
        should be a list of predictor variables in the [q]th matrix, with length
        C_q.
    :raises: ValueError: if length of `list_of_training_matrices` != length of
        `list_of_validation_matrices`.
    :raises: ValueError: if length of `list_of_training_matrices` != length of
        `predictor_names_by_matrix`.
    :raises: ValueError: if any input matrix has < 3 dimensions.
    """

    error_checking.assert_is_integer_numpy_array(training_target_values)
    error_checking.assert_is_geq_numpy_array(training_target_values, 0)
    error_checking.assert_is_integer_numpy_array(validation_target_values)
    error_checking.assert_is_geq_numpy_array(validation_target_values, 0)

    num_input_matrices = len(list_of_training_matrices)

    if len(list_of_validation_matrices) != num_input_matrices:
        error_string = (
            'Number of training matrices ({0:d}) should equal number of '
            'validation matrices ({1:d}).'
        ).format(num_input_matrices, len(list_of_validation_matrices))

        raise ValueError(error_string)

    if len(predictor_names_by_matrix) != num_input_matrices:
        error_string = (
            'Number of predictor-name lists ({0:d}) should equal number of '
            'validation matrices ({1:d}).'
        ).format(num_input_matrices, len(predictor_names_by_matrix))

        raise ValueError(error_string)

    num_training_examples = len(training_target_values)
    num_validation_examples = len(validation_target_values)

    for q in range(num_input_matrices):
        error_checking.assert_is_numpy_array_without_nan(
            list_of_training_matrices[q])
        error_checking.assert_is_numpy_array_without_nan(
            list_of_validation_matrices[q])

        this_num_dimensions = len(list_of_training_matrices[q].shape)
        if this_num_dimensions < 3:
            error_string = (
                '{0:d}th training matrix has {1:d} dimensions.  Should have at '
                'least 3.'
            ).format(q + 1, this_num_dimensions)

            raise ValueError(error_string)

        this_num_dimensions = len(list_of_validation_matrices[q].shape)
        if this_num_dimensions < 3:
            error_string = (
                '{0:d}th validation matrix has {1:d} dimensions.  Should have '
                'at least 3.'
            ).format(q + 1, this_num_dimensions)

            raise ValueError(error_string)

        error_checking.assert_is_string_list(predictor_names_by_matrix[q])
        this_num_predictors = len(predictor_names_by_matrix[q])

        these_expected_dimensions = (
            (num_training_examples,) +
            list_of_training_matrices[q].shape[1:-1] +
            (this_num_predictors,)
        )
        these_expected_dimensions = numpy.array(
            these_expected_dimensions, dtype=int)

        error_checking.assert_is_numpy_array(
            list_of_training_matrices[q],
            exact_dimensions=these_expected_dimensions)

        these_expected_dimensions = (
            (num_validation_examples,) +
            list_of_validation_matrices[q].shape[1:-1] +
            (this_num_predictors,)
        )
        these_expected_dimensions = numpy.array(
            these_expected_dimensions, dtype=int)

        error_checking.assert_is_numpy_array(
            list_of_validation_matrices[q],
            exact_dimensions=these_expected_dimensions)
def normalize_radar_images(radar_image_matrix,
                           field_names,
                           normalization_type_string,
                           normalization_param_file_name,
                           test_mode=False,
                           min_normalized_value=0.,
                           max_normalized_value=1.,
                           normalization_table=None):
    """Normalizes radar images.

    If normalization_type_string = "z", z-score normalization is done for each
    field independently.  Means and standard deviations are read from the
    normalization file.

    If normalization_type_string = "minmax", min-max normalization is done for
    each field independently, using the following equations.  Climatological
    minima and maxima are read from the normalization file.

    x_unscaled(i, j) = [x(i, j) - x_min] / [x_max - x_min]

    x_scaled(i, j) = x_unscaled(i, j) * [
        max_normalized_value - min_normalized_value
    ] + min_normalized_value

    x(i, j) = original value at pixel (i, j)
    x_min = climatological minimum for field x
    x_max = climatological max for field x
    x_unscaled(i, j) = normalized but unscaled value at pixel (i, j)
    min_normalized_value: from input args
    max_normalized_value: from input args
    x_scaled(i, j) = normalized and scaled value at pixel (i, j)

    :param radar_image_matrix: numpy array of radar images.  Dimensions may be
        E x M x N x C or E x M x N x H_r x F_r.
    :param field_names: 1-D list with names of radar fields, in the order that
        they appear in radar_image_matrix.  If radar_image_matrix is
        4-dimensional, field_names must have length C.  If radar_image_matrix is
        5-dimensional, field_names must have length F_r.  Each field name must
        be accepted by `radar_utils.check_field_name`.
    :param normalization_type_string: Normalization type (must be accepted by
        `_check_normalization_type`).
    :param normalization_param_file_name: Path to file with normalization
        params.  Will be read by `read_normalization_params_from_file`.
    :param test_mode: For testing only.  Leave this alone.
    :param min_normalized_value:
        [used only if normalization_type_string = "minmax"]
        Minimum normalized value.
    :param max_normalized_value:
        [used only if normalization_type_string = "minmax"]
        Max normalized value.
    :param normalization_table: For testing only.  Leave this alone.
    :return: radar_image_matrix: Normalized version of input, with the same
        dimensions.
    """

    error_checking.assert_is_boolean(test_mode)
    if not test_mode:
        normalization_table = read_normalization_params_from_file(
            normalization_param_file_name)[0]

    check_radar_images(radar_image_matrix=radar_image_matrix,
                       min_num_dimensions=4,
                       max_num_dimensions=5)
    num_fields = radar_image_matrix.shape[-1]

    error_checking.assert_is_string_list(field_names)
    error_checking.assert_is_numpy_array(numpy.array(field_names),
                                         exact_dimensions=numpy.array(
                                             [num_fields]))

    _check_normalization_type(normalization_type_string)
    if normalization_type_string == MINMAX_NORMALIZATION_TYPE_STRING:
        error_checking.assert_is_greater(max_normalized_value,
                                         min_normalized_value)

    for j in range(num_fields):
        if normalization_type_string == MINMAX_NORMALIZATION_TYPE_STRING:
            this_min_value = normalization_table[MIN_VALUE_COLUMN].loc[
                field_names[j]]
            this_max_value = normalization_table[MAX_VALUE_COLUMN].loc[
                field_names[j]]

            radar_image_matrix[..., j] = (
                (radar_image_matrix[..., j] - this_min_value) /
                (this_max_value - this_min_value))

            radar_image_matrix[..., j] = min_normalized_value + (
                radar_image_matrix[..., j] *
                (max_normalized_value - min_normalized_value))
        else:
            this_mean = normalization_table[MEAN_VALUE_COLUMN].loc[
                field_names[j]]
            this_standard_deviation = normalization_table[
                STANDARD_DEVIATION_COLUMN].loc[field_names[j]]

            radar_image_matrix[...,
                               j] = ((radar_image_matrix[..., j] - this_mean) /
                                     this_standard_deviation)

    return radar_image_matrix
Exemple #29
0
def find_examples(all_id_strings, desired_id_strings, allow_missing=False):
    """Finds examples with desired IDs.

    E = number of desired examples

    :param all_id_strings: 1-D list with all example IDs.
    :param desired_id_strings: length-E list of desired IDs.
    :param allow_missing: Boolean flag.  If True, will allow some desired IDs to
        be missing.  If False, will raise error if any desired ID is missing.
    :return: desired_indices: length-E numpy array with indices of desired
        examples.  Missing IDs are denoted by an index of -1.
    :raises: ValueError: if either list of IDs has non-unique entries.
    :raises: ValueError: if `allow_missing == False` and any desired ID is
        missing.
    """

    error_checking.assert_is_string_list(all_id_strings)
    error_checking.assert_is_string_list(desired_id_strings)
    error_checking.assert_is_boolean(allow_missing)

    all_id_strings_numpy = numpy.array(all_id_strings)
    desired_id_strings_numpy = numpy.array(desired_id_strings)

    these_unique_strings, these_counts = numpy.unique(all_id_strings_numpy,
                                                      return_counts=True)
    if numpy.any(these_counts > 1):
        these_indices = numpy.where(these_counts > 1)[0]

        error_string = (
            '\nall_id_strings contains {0:d} repeated entries, listed below:'
            '\n{1:s}').format(len(these_indices),
                              str(these_unique_strings[these_indices]))
        raise ValueError(error_string)

    these_unique_strings, these_counts = numpy.unique(desired_id_strings_numpy,
                                                      return_counts=True)
    if numpy.any(these_counts > 1):
        these_indices = numpy.where(these_counts > 1)[0]

        error_string = (
            '\ndesired_id_strings contains {0:d} repeated entries, listed '
            'below:\n{1:s}').format(len(these_indices),
                                    str(these_unique_strings[these_indices]))
        raise ValueError(error_string)

    sort_indices = numpy.argsort(all_id_strings_numpy)
    desired_indices = numpy.searchsorted(all_id_strings_numpy[sort_indices],
                                         desired_id_strings_numpy,
                                         side='left').astype(int)

    desired_indices = sort_indices[desired_indices]
    desired_indices = numpy.maximum(desired_indices, 0)
    desired_indices = numpy.minimum(desired_indices, len(all_id_strings) - 1)

    if allow_missing:
        bad_indices = numpy.where(
            all_id_strings_numpy[desired_indices] != desired_id_strings_numpy
        )[0]
        desired_indices[bad_indices] = -1
        return desired_indices

    if numpy.array_equal(all_id_strings_numpy[desired_indices],
                         desired_id_strings_numpy):
        return desired_indices

    missing_flags = (all_id_strings_numpy[desired_indices] !=
                     desired_id_strings_numpy)

    error_string = (
        '{0:d} of {1:d} desired IDs (listed below) are missing:\n{2:s}'
    ).format(numpy.sum(missing_flags), len(desired_id_strings),
             str(desired_id_strings_numpy[missing_flags]))

    raise ValueError(error_string)
def normalize_soundings(sounding_matrix,
                        field_names,
                        normalization_type_string,
                        normalization_param_file_name,
                        test_mode=False,
                        min_normalized_value=0.,
                        max_normalized_value=1.,
                        normalization_table=None):
    """Normalizes soundings.

    This method uses the same equations as `normalize_radar_images`.

    :param sounding_matrix: numpy array (E x H_s x F_s) of soundings.
    :param field_names: list (length F_s) of field names, in the order that they
        appear in `sounding_matrix`.
    :param normalization_type_string: Normalization type (must be accepted by
        `_check_normalization_type`).
    :param normalization_param_file_name: Path to file with normalization
        params.  Will be read by `read_normalization_params_from_file`.
    :param test_mode: For testing only.  Leave this alone.
    :param min_normalized_value:
        [used only if normalization_type_string = "minmax"]
        Minimum normalized value.
    :param max_normalized_value:
        [used only if normalization_type_string = "minmax"]
        Max normalized value.
    :param normalization_table: For testing only.  Leave this alone.
    :return: sounding_matrix: Normalized version of input, with the same
        dimensions.
    """

    error_checking.assert_is_boolean(test_mode)
    if not test_mode:
        normalization_table = read_normalization_params_from_file(
            normalization_param_file_name)[2]

    error_checking.assert_is_string_list(field_names)
    error_checking.assert_is_numpy_array(numpy.array(field_names),
                                         num_dimensions=1)

    num_fields = len(field_names)
    check_soundings(sounding_matrix=sounding_matrix, num_fields=num_fields)
    _check_normalization_type(normalization_type_string)

    if normalization_type_string == MINMAX_NORMALIZATION_TYPE_STRING:
        error_checking.assert_is_greater(max_normalized_value,
                                         min_normalized_value)

    for j in range(num_fields):
        if normalization_type_string == MINMAX_NORMALIZATION_TYPE_STRING:
            this_min_value = normalization_table[MIN_VALUE_COLUMN].loc[
                field_names[j]]
            this_max_value = normalization_table[MAX_VALUE_COLUMN].loc[
                field_names[j]]

            sounding_matrix[...,
                            j] = ((sounding_matrix[..., j] - this_min_value) /
                                  (this_max_value - this_min_value))
            sounding_matrix[..., j] = min_normalized_value + (
                sounding_matrix[..., j] *
                (max_normalized_value - min_normalized_value))
        else:
            this_mean = normalization_table[MEAN_VALUE_COLUMN].loc[
                field_names[j]]
            this_standard_deviation = normalization_table[
                STANDARD_DEVIATION_COLUMN].loc[field_names[j]]

            sounding_matrix[..., j] = ((sounding_matrix[..., j] - this_mean) /
                                       this_standard_deviation)

    return sounding_matrix