Ejemplo n.º 1
0
def get_connected_input_layers(model_object, target_layer_name):
    """Gets input layers connected to target layer.

    :param model_object: Instance of `keras.models.Model` or
        `keras.models.Sequential`.
    :param target_layer_name: Name of target layer.
    :return: input_layer_objects: 1-D list of input layers (instances of
        `keras.layers.Input`) connected to target layer.
    """

    error_checking.assert_is_string(target_layer_name)

    prev_layer_objects = [model_object.get_layer(name=target_layer_name)]
    input_layer_objects = []

    while len(prev_layer_objects) > 0:
        new_prev_layer_objects = []

        for l in prev_layer_objects:
            these_prev_node_objects = l._inbound_nodes
            these_prev_layer_objects = []

            for n in these_prev_node_objects:
                these_prev_layer_objects += n.inbound_layers

            if len(these_prev_layer_objects) == 0:
                input_layer_objects.append(l)
            else:
                new_prev_layer_objects += these_prev_layer_objects

        prev_layer_objects = [l for l in new_prev_layer_objects]

    return input_layer_objects
def check_metadata(component_type_string,
                   target_class=None,
                   layer_name=None,
                   ideal_activation=None,
                   neuron_indices=None,
                   channel_index=None):
    """Error-checks metadata for saliency calculations.

    :param component_type_string: Component type (must be accepted by
        `model_interpretation.check_component_type`).
    :param target_class: See doc for `get_saliency_maps_for_class_activation`.
    :param layer_name: See doc for `get_saliency_maps_for_neuron_activation` or
        `get_saliency_maps_for_channel_activation`.
    :param ideal_activation: Same.
    :param neuron_indices: See doc for
        `get_saliency_maps_for_neuron_activation`.
    :param channel_index: See doc for `get_saliency_maps_for_class_activation`.

    :return: metadata_dict: Dictionary with the following keys.
    metadata_dict['component_type_string']: See input doc.
    metadata_dict['target_class']: Same.
    metadata_dict['layer_name']: Same.
    metadata_dict['ideal_activation']: Same.
    metadata_dict['neuron_indices']: Same.
    metadata_dict['channel_index']: Same.
    """

    model_interpretation.check_component_type(component_type_string)
    if (component_type_string ==
            model_interpretation.CLASS_COMPONENT_TYPE_STRING):
        error_checking.assert_is_integer(target_class)
        error_checking.assert_is_geq(target_class, 0)

    if component_type_string in [
            model_interpretation.NEURON_COMPONENT_TYPE_STRING,
            model_interpretation.CHANNEL_COMPONENT_TYPE_STRING
    ]:
        error_checking.assert_is_string(layer_name)
        if ideal_activation is not None:
            error_checking.assert_is_greater(ideal_activation, 0.)

    if (component_type_string ==
            model_interpretation.NEURON_COMPONENT_TYPE_STRING):
        error_checking.assert_is_integer_numpy_array(neuron_indices)
        error_checking.assert_is_geq_numpy_array(neuron_indices, 0)
        error_checking.assert_is_numpy_array(neuron_indices, num_dimensions=1)

    if (component_type_string ==
            model_interpretation.CHANNEL_COMPONENT_TYPE_STRING):
        error_checking.assert_is_integer(channel_index)
        error_checking.assert_is_geq(channel_index, 0)

    return {
        COMPONENT_TYPE_KEY: component_type_string,
        TARGET_CLASS_KEY: target_class,
        LAYER_NAME_KEY: layer_name,
        IDEAL_ACTIVATION_KEY: ideal_activation,
        NEURON_INDICES_KEY: neuron_indices,
        CHANNEL_INDEX_KEY: channel_index
    }
Ejemplo n.º 3
0
def find_match_file(top_directory_name, valid_time_unix_sec,
                    raise_error_if_missing=False):
    """Finds match file.

    A "match file" matches storm objects in one dataset (e.g., MYRORSS or
    GridRad) to those in another dataset, at one time step.

    :param top_directory_name: Name of top-level directory.
    :param valid_time_unix_sec: Valid time.
    :param raise_error_if_missing: See doc for `find_file`.
    :return: match_file_name: Path to match file.  If file is missing and
        `raise_error_if_missing = False`, this will be the *expected* path.
    :raises: ValueError: if file is missing and `raise_error_if_missing = True`.
    """

    error_checking.assert_is_string(top_directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    spc_date_string = time_conversion.time_to_spc_date_string(
        valid_time_unix_sec)

    match_file_name = '{0:s}/{1:s}/{2:s}/storm-matches_{3:s}.p'.format(
        top_directory_name, spc_date_string[:4], spc_date_string,
        time_conversion.unix_sec_to_string(
            valid_time_unix_sec, FILE_NAME_TIME_FORMAT)
    )

    if raise_error_if_missing and not os.path.isfile(match_file_name):
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            match_file_name)
        raise ValueError(error_string)

    return match_file_name
Ejemplo n.º 4
0
def field_name_orig_to_new(field_name_orig, data_source):
    """Converts field name from original to new format.

    "Original format" = in original data source (examples: MYRORSS, MRMS,
    GridRad).

    "New format" = GewitterGefahr format, which is Pythonic and includes units
    at the end.

    :param field_name_orig: Name of radar field in original format.
    :param data_source: Data source (string).
    :return: field_name: Name of radar field in new format.
    """

    error_checking.assert_is_string(field_name_orig)
    check_data_source(data_source)

    if data_source == MYRORSS_SOURCE_ID:
        all_orig_field_names = RADAR_FIELD_NAMES_MYRORSS_PADDED
    elif data_source == MRMS_SOURCE_ID:
        all_orig_field_names = RADAR_FIELD_NAMES_MRMS_PADDED
    elif data_source == GRIDRAD_SOURCE_ID:
        all_orig_field_names = RADAR_FIELD_NAMES_GRIDRAD_PADDED

    found_flags = [s == field_name_orig for s in all_orig_field_names]
    return RADAR_FIELD_NAMES[numpy.where(found_flags)[0][0]]
Ejemplo n.º 5
0
def find_metafile(model_file_name, raise_error_if_missing=True):
    """Finds metafile for CNN.

    :param model_file_name: Path to model itself (see doc for `read_model`).
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        `raise_error_if_missing = True`, this method will error out.
    :return: metafile_name: Path to metafile.  If file is missing and
        `raise_error_if_missing = False`, this will be the expected path.
    :raises: ValueError: if file is missing and `raise_error_if_missing = True`.
    """

    error_checking.assert_is_string(model_file_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0]
    )

    if not os.path.isfile(metafile_name):
        metafile_name = metafile_name.replace(
            '/glade/work/ryanlage', '/condo/swatwork/ralager'
        )

    if not os.path.isfile(metafile_name) and raise_error_if_missing:
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            metafile_name)
        raise ValueError(error_string)

    return metafile_name
Ejemplo n.º 6
0
def unzip_tar(tar_file_name, target_directory_name=None,
              file_and_dir_names_to_unzip=None):
    """Unzips tar file.

    :param tar_file_name: Path to input file.
    :param target_directory_name: Path to output directory.
    :param file_and_dir_names_to_unzip: List of files and directories to extract
        from the tar file.  Each list element should be a relative path inside
        the tar file.  After unzipping, the same relative path will exist inside
        `target_directory_name`.
    :raises: ValueError: if the Unix command fails.
    """

    error_checking.assert_is_string(tar_file_name)
    error_checking.assert_is_string_list(file_and_dir_names_to_unzip)
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=target_directory_name)

    unix_command_string = 'tar -C "{0:s}" -xvf "{1:s}"'.format(
        target_directory_name, tar_file_name)
    for this_relative_path in file_and_dir_names_to_unzip:
        unix_command_string += ' "' + this_relative_path + '"'

    exit_code = os.system(unix_command_string)
    if exit_code != 0:
        raise ValueError('\nUnix command failed (log messages shown above '
                         'should explain why).')
Ejemplo n.º 7
0
def find_local_raw_5minute_file(station_id=None,
                                month_unix_sec=None,
                                top_directory_name=None,
                                raise_error_if_missing=True):
    """Finds raw 5-minute file on local machine.

    This file should contain 5-minute METARs for one station-month.

    :param station_id: String ID for station.
    :param month_unix_sec: Month in Unix format.
    :param top_directory_name: Top-level directory for raw 1-minute files.
    :param raise_error_if_missing: Boolean flag.  If True and file is missing,
        this method will raise an error.
    :return: raw_1minute_file_name: File path.  If raise_error_if_missing =
        False and file is missing, this will be the *expected* path.
    :raises: ValueError: if raise_error_if_missing = True and file is missing.
    """

    error_checking.assert_is_string(station_id)
    error_checking.assert_is_string(top_directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    pathless_file_name = _get_pathless_raw_5minute_file_name(
        station_id, month_unix_sec)
    raw_5minute_file_name = '{0:s}/{1:s}/{2:s}'.format(top_directory_name,
                                                       station_id,
                                                       pathless_file_name)

    if raise_error_if_missing and not os.path.isfile(raw_5minute_file_name):
        raise ValueError(
            'Cannot find raw 5-minute file.  Expected at location: ' +
            raw_5minute_file_name)

    return raw_5minute_file_name
Ejemplo n.º 8
0
def find_file(unix_time_sec, top_directory_name, raise_error_if_missing=True):
    """Finds GridRad file on local machine.

    Each GridRad file contains all fields at all heights for one valid time.

    :param unix_time_sec: Valid time.
    :param top_directory_name: Name of top-level directory with GridRad.
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        raise_error_if_missing = True, will raise error.  If file is missing and
        raise_error_if_missing = False, will return *expected* path to file.
    :return: gridrad_file_name: Path to GridRad file.
    :raises: ValueError: if raise_error_if_missing = True and file is missing.
    """

    error_checking.assert_is_string(top_directory_name)

    spc_date_string = time_conversion.time_to_spc_date_string(unix_time_sec)
    gridrad_file_name = '{0:s}/{1:s}/{2:s}/{3:s}'.format(
        top_directory_name, spc_date_string[:4], spc_date_string,
        _get_pathless_file_name(unix_time_sec))

    if raise_error_if_missing and not os.path.isfile(gridrad_file_name):
        error_string = ('Cannot find GridRad file.  Expected at: "{0:s}"'
                        ).format(gridrad_file_name)

        raise ValueError(error_string)

    return gridrad_file_name
Ejemplo n.º 9
0
def find_grid_metafile(prediction_dir_name, raise_error_if_missing=True):
    """Finds file with metadata for grid.

    This file is needed only if prediction files are split by space (one per
    grid cell).

    :param prediction_dir_name: Name of directory with prediction files.  The
        metafile is expected here.
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        `raise_error_if_missing == True`, will throw error.  If file is missing
        and `raise_error_if_missing == False`, will return *expected* file path.
    :return: grid_metafile_name: File path.
    :raises: ValueError: if file is missing
        and `raise_error_if_missing == True`.
    """

    error_checking.assert_is_string(prediction_dir_name)
    grid_metafile_name = '{0:s}/grid_metadata.nc'.format(prediction_dir_name)

    if raise_error_if_missing and not os.path.isfile(grid_metafile_name):
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            grid_metafile_name)
        raise ValueError(error_string)

    return grid_metafile_name
Ejemplo n.º 10
0
def run_guided_gradcam(orig_model_object,
                       list_of_input_matrices,
                       target_layer_name,
                       class_activation_matrix,
                       new_model_object=None):
    """Runs guided Grad-CAM.

    M = number of rows in grid
    N = number of columns in grid
    C = number of channels

    :param orig_model_object: Original model (trained instance of
        `keras.models.Model` or `keras.models.Sequential`).
    :param list_of_input_matrices: See doc for `run_gradcam`.
    :param target_layer_name: Same.
    :param class_activation_matrix: Same.
    :param new_model_object: New model (created by `_change_backprop_function`),
        to be used for guided backprop.
    :return: ggradcam_output_matrix: M-by-N-by-C numpy array of output values.
    :return: new_model_object: See input doc.
    """

    # Check input args.
    error_checking.assert_is_string(target_layer_name)
    error_checking.assert_is_list(list_of_input_matrices)
    error_checking.assert_is_numpy_array_without_nan(class_activation_matrix)

    for q in range(len(list_of_input_matrices)):
        error_checking.assert_is_numpy_array(list_of_input_matrices[q])

        if list_of_input_matrices[q].shape[0] != 1:
            list_of_input_matrices[q] = numpy.expand_dims(
                list_of_input_matrices[q], axis=0)

    # Do the dirty work.
    if new_model_object is None:
        _register_guided_backprop()
        new_model_object = _change_backprop_function(
            model_object=orig_model_object)

    input_index = _find_relevant_input_matrix(
        list_of_input_matrices=list_of_input_matrices,
        num_spatial_dim=len(class_activation_matrix.shape))

    saliency_function = _make_saliency_function(model_object=new_model_object,
                                                layer_name=target_layer_name,
                                                input_index=input_index)

    saliency_matrix = saliency_function(list_of_input_matrices + [0])[0]
    print 'Minimum saliency = {0:.4e} ... max saliency = {1:.4e}'.format(
        numpy.min(saliency_matrix), numpy.max(saliency_matrix))

    ggradcam_output_matrix = saliency_matrix * class_activation_matrix[
        ..., numpy.newaxis]
    ggradcam_output_matrix = ggradcam_output_matrix[0, ...]

    # ggradcam_output_matrix = _normalize_guided_gradcam_output(
    #     ggradcam_output_matrix[0, ...])

    return ggradcam_output_matrix, new_model_object
Ejemplo n.º 11
0
def check_metadata(activation_layer_name, vector_output_layer_name,
                   output_neuron_indices, ideal_activation):
    """Checks metadata for errors.

    :param activation_layer_name: Name of activation layer.
    :param vector_output_layer_name: Name of layer that outputs predictions for
        vector target variables.
    :param output_neuron_indices: length-2 numpy array with indices of output
        neuron (height index, channel index).  Class activation will be computed
        with respect to the output of this neuron.
    :param ideal_activation: Ideal neuron activation, used to define loss
        function.  The loss function will be
        (output_neuron_activation - ideal_activation)**2.
    """

    error_checking.assert_is_string(activation_layer_name)
    error_checking.assert_is_string(vector_output_layer_name)

    error_checking.assert_is_integer_numpy_array(output_neuron_indices)
    error_checking.assert_is_geq_numpy_array(output_neuron_indices, 0)
    error_checking.assert_is_numpy_array(output_neuron_indices,
                                         exact_dimensions=numpy.array(
                                             [2], dtype=int))

    error_checking.assert_is_not_nan(ideal_activation)
def write_file(
        pickle_file_name, activation_matrix, storm_ids, storm_times_unix_sec,
        model_file_name, component_type_string, target_class=None,
        layer_name=None, neuron_index_matrix=None, channel_indices=None):
    """Writes activations to Pickle file.

    E = number of examples (storm objects)
    C = number of model components (classes, neurons, or channels) for which
        activations were computed

    :param pickle_file_name: Path to output file.
    :param activation_matrix: E-by-C numpy array of activations, where
        activation_matrix[i, j] = activation of the [j]th model component for
        the [i]th example.
    :param storm_ids: length-E list of storm IDs.
    :param storm_times_unix_sec: length-E numpy array of storm times.
    :param model_file_name: Path to file with trained model.
    :param component_type_string: See doc for `check_metadata`.
    :param target_class: Same.
    :param layer_name: Same.
    :param neuron_index_matrix: Same.
    :param channel_indices: Same.
    """

    num_components = check_metadata(
        component_type_string=component_type_string, target_class=target_class,
        layer_name=layer_name, neuron_index_matrix=neuron_index_matrix,
        channel_indices=channel_indices)
    error_checking.assert_is_string(model_file_name)

    error_checking.assert_is_string_list(storm_ids)
    error_checking.assert_is_numpy_array(
        numpy.array(storm_ids), num_dimensions=1)
    num_examples = len(storm_ids)

    error_checking.assert_is_integer_numpy_array(storm_times_unix_sec)
    error_checking.assert_is_numpy_array(
        storm_times_unix_sec, exact_dimensions=numpy.array([num_examples]))

    error_checking.assert_is_numpy_array_without_nan(activation_matrix)
    error_checking.assert_is_numpy_array(
        activation_matrix,
        exact_dimensions=numpy.array([num_examples, num_components]))

    metadata_dict = {
        STORM_IDS_KEY: storm_ids,
        STORM_TIMES_KEY: storm_times_unix_sec,
        MODEL_FILE_NAME_KEY: model_file_name,
        COMPONENT_TYPE_KEY: component_type_string,
        TARGET_CLASS_KEY: target_class,
        LAYER_NAME_KEY: layer_name,
        NEURON_INDICES_KEY: neuron_index_matrix,
        CHANNEL_INDICES_KEY: channel_indices,
    }

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(activation_matrix, pickle_file_handle)
    pickle.dump(metadata_dict, pickle_file_handle)
    pickle_file_handle.close()
Ejemplo n.º 13
0
def find_prediction_file(top_directory_name, spc_date_string,
                         raise_error_if_missing=False):
    """Finds file with upconvnet predictions (reconstructed radar images).

    :param top_directory_name: Name of top-level directory with upconvnet
        predictions.
    :param spc_date_string: SPC date (format "yyyymmdd").
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        `raise_error_if_missing = True`, this method will error out.
    :return: prediction_file_name: Path to prediction file.  If file is missing
        and `raise_error_if_missing = False`, this will be the expected path.
    :raises: ValueError: if file is missing and `raise_error_if_missing = True`.
    """

    error_checking.assert_is_string(top_directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)
    time_conversion.spc_date_string_to_unix_sec(spc_date_string)

    prediction_file_name = (
        '{0:s}/{1:s}/{2:s}_{3:s}.p'
    ).format(
        top_directory_name, spc_date_string[:4], PATHLESS_FILE_NAME_PREFIX,
        spc_date_string
    )

    if raise_error_if_missing and not os.path.isfile(prediction_file_name):
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            prediction_file_name)
        raise ValueError(error_string)

    return prediction_file_name
Ejemplo n.º 14
0
def find_local_raw_file(year,
                        directory_name=None,
                        raise_error_if_missing=True):
    """Finds raw file on local machine.

    This file should contain all storm reports for one year.

    :param year: [integer] Will look for file from this year.
    :param directory_name: Name of directory with Storm Events files.
    :param raise_error_if_missing: Boolean flag.  If True and file is missing,
        this method will raise an error.
    :return: raw_file_name: File path.  If raise_error_if_missing = False and
        file is missing, this will be the *expected* path.
    :raises: ValueError: if raise_error_if_missing = True and file is missing.
    """

    error_checking.assert_is_integer(year)
    error_checking.assert_is_string(directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    raw_file_name = '{0:s}/{1:s}{2:s}{3:s}'.format(
        directory_name, PATHLESS_RAW_FILE_PREFIX, _year_number_to_string(year),
        RAW_FILE_EXTENSION)

    if raise_error_if_missing and not os.path.isfile(raw_file_name):
        raise ValueError('Cannot find raw file.  Expected at location: ' +
                         raw_file_name)

    return raw_file_name
Ejemplo n.º 15
0
def find_model_file(base_model_file_name, raise_error_if_missing=True):
    """Finds file containing isotonic-regression model(s).

    This file should be written by `write_model_for_each_class`.

    :param base_model_file_name: Path to file containing base model (e.g., CNN).
    :param raise_error_if_missing: Boolean flag.  If isotonic-regression file is
        missing and `raise_error_if_missing = True`, this method will error out.
    :return: isotonic_file_name: Path to metafile.  If isotonic-regression file
        is missing and `raise_error_if_missing = False`, this will be the
        *expected* path.
    :raises: ValueError: if isotonic-regression file is missing and
        `raise_error_if_missing = True`.
    """

    error_checking.assert_is_string(base_model_file_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    isotonic_file_name = '{0:s}/isotonic_regression_models.p'.format(
        os.path.split(base_model_file_name)[0])
    if not os.path.isfile(isotonic_file_name) and raise_error_if_missing:
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            isotonic_file_name)
        raise ValueError(error_string)

    return isotonic_file_name
Ejemplo n.º 16
0
def _check_convolution_options(
        num_kernel_rows, num_rows_per_stride, padding_type_string,
        num_filters, num_kernel_dimensions, num_kernel_columns=None,
        num_columns_per_stride=None, num_kernel_heights=None,
        num_heights_per_stride=None):
    """Checks input args for 1-D, 2-D, or 3-D convolution layer.

    :param num_kernel_rows: Number of rows in kernel.
    :param num_rows_per_stride: Number of rows per stride (number of rows moved
        by the kernel at once).
    :param padding_type_string: Padding type (must be in
        `VALID_PADDING_TYPE_STRINGS`).
    :param num_filters: Number of output filters (channels).
    :param num_kernel_dimensions: Number of dimensions in kernel.
    :param num_kernel_columns: [used only if num_kernel_dimensions > 1]
        Number of columns in kernel.
    :param num_columns_per_stride: [used only if num_kernel_dimensions > 1]
        Number of columns per stride.
    :param num_kernel_heights: [used only if num_kernel_dimensions = 3]
        Number of heights in kernel.
    :param num_heights_per_stride: [used only if num_kernel_dimensions = 3]
        Number of heights per stride.
    :raises: ValueError: if
        `padding_type_string not in VALID_PADDING_TYPE_STRINGS`.
    """

    error_checking.assert_is_integer(num_kernel_rows)
    error_checking.assert_is_geq(num_kernel_rows, 3)
    error_checking.assert_is_integer(num_rows_per_stride)
    error_checking.assert_is_geq(num_rows_per_stride, 1)
    error_checking.assert_is_leq(num_rows_per_stride, num_kernel_rows)

    error_checking.assert_is_string(padding_type_string)
    if padding_type_string not in VALID_PADDING_TYPE_STRINGS:
        error_string = (
            '\n{0:s}\nValid padding types (listed above) do not include '
            '"{1:s}".'
        ).format(str(VALID_PADDING_TYPE_STRINGS), padding_type_string)

        raise ValueError(error_string)

    error_checking.assert_is_integer(num_filters)
    error_checking.assert_is_geq(num_filters, 1)
    error_checking.assert_is_integer(num_kernel_dimensions)
    error_checking.assert_is_geq(num_kernel_dimensions, 1)
    error_checking.assert_is_leq(num_kernel_dimensions, 3)

    if num_kernel_dimensions >= 2:
        error_checking.assert_is_integer(num_kernel_columns)
        error_checking.assert_is_geq(num_kernel_columns, 3)
        error_checking.assert_is_integer(num_columns_per_stride)
        error_checking.assert_is_geq(num_columns_per_stride, 1)
        error_checking.assert_is_leq(num_columns_per_stride, num_kernel_columns)

    if num_kernel_dimensions == 3:
        error_checking.assert_is_integer(num_kernel_heights)
        error_checking.assert_is_geq(num_kernel_heights, 3)
        error_checking.assert_is_integer(num_heights_per_stride)
        error_checking.assert_is_geq(num_heights_per_stride, 1)
        error_checking.assert_is_leq(num_heights_per_stride, num_kernel_heights)
Ejemplo n.º 17
0
def find_file(directory_name, year, raise_error_if_missing=True):
    """Finds NetCDF file with RRTM data.

    :param directory_name: Name of directory where file is expected.
    :param year: Year (integer).
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        `raise_error_if_missing == True`, will throw error.  If file is missing
        and `raise_error_if_missing == False`, will return *expected* file path.
    :return: rrtm_file_name: File path.
    :raises: ValueError: if file is missing
        and `raise_error_if_missing == True`.
    """

    error_checking.assert_is_string(directory_name)
    error_checking.assert_is_integer(year)
    error_checking.assert_is_boolean(raise_error_if_missing)

    rrtm_file_name = '{0:s}/rrtm_output_{1:04d}.nc'.format(
        directory_name, year)

    if raise_error_if_missing and not os.path.isfile(rrtm_file_name):
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            rrtm_file_name)
        raise ValueError(error_string)

    return rrtm_file_name
Ejemplo n.º 18
0
def read_target_values(netcdf_file_name, target_name):
    """Reads target values from NetCDF file.

    N = number of storm objects

    :param netcdf_file_name: Path to input file.
    :param target_name: Name of target variable.
    :return: storm_label_dict: Dictionary with the following keys.
    storm_label_dict['storm_ids']: length-N list of storm IDs.
    storm_label_dict['valid_times_unix_sec']: length-N numpy array of valid
        times.
    storm_label_dict['target_values']: length-N numpy array with values of
        `target_name`.
    """

    error_checking.assert_is_string(target_name)
    netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name=netcdf_file_name,
                                           raise_error_if_fails=True)

    storm_ids = netCDF4.chartostring(
        netcdf_dataset.variables[STORM_IDS_KEY][:])
    valid_times_unix_sec = numpy.array(
        netcdf_dataset.variables[VALID_TIMES_KEY][:], dtype=int)
    target_values = numpy.array(netcdf_dataset.variables[target_name][:],
                                dtype=int)

    netcdf_dataset.close()

    return {
        STORM_IDS_KEY: [str(s) for s in storm_ids],
        VALID_TIMES_KEY: valid_times_unix_sec,
        TARGET_VALUES_KEY: target_values
    }
Ejemplo n.º 19
0
def label_axes(axes_object,
               label_string,
               font_size=DEFAULT_LABEL_FONT_SIZE,
               font_colour=DEFAULT_LABEL_FONT_COLOUR,
               x_coord_normalized=DEFAULT_LABEL_X_NORMALIZED,
               y_coord_normalized=DEFAULT_LABEL_Y_NORMALIZED):
    """Adds text label to axes.

    :param axes_object: Axes (instance of
        `matplotlib.axes._subplots.AxesSubplot`).
    :param label_string: Label.
    :param font_size: Font size.
    :param font_colour: Font colour.
    :param x_coord_normalized: Normalized x-coordinate (from 0...1, where 1 is
        the right side).
    :param y_coord_normalized: Normalized y-coordinate (from 0...1, where 1 is
        the top).
    """

    error_checking.assert_is_string(label_string)
    # error_checking.assert_is_geq(x_coord_normalized, 0.)
    # error_checking.assert_is_leq(x_coord_normalized, 1.)
    # error_checking.assert_is_geq(y_coord_normalized, 0.)
    # error_checking.assert_is_leq(y_coord_normalized, 1.)

    axes_object.text(x_coord_normalized,
                     y_coord_normalized,
                     label_string,
                     fontsize=font_size,
                     color=colour_from_numpy_to_tuple(font_colour),
                     horizontalalignment='right',
                     verticalalignment='bottom',
                     transform=axes_object.transAxes)
Ejemplo n.º 20
0
def download_raw_file(unix_time_sec,
                      secondary_source,
                      top_local_directory_name,
                      protocol,
                      user_name=None,
                      password=None,
                      raise_error_if_fails=True):
    """Downloads raw file from either FTP or HTTP server.

    :param unix_time_sec: Valid time.
    :param secondary_source: String ID for secondary data source.
    :param top_local_directory_name: Name of top-level directory with raw MADIS
        files on local machine.
    :param protocol: Protocol (either "http" or "ftp").
    :param user_name: User name on FTP or HTTP server.  To login anonymously,
        leave this as None.
    :param password: Password on FTP or HTTP server.  To login anonymously,
        leave this as None.
    :param raise_error_if_fails: Boolean flag.  If True and download fails, this
        method will raise an error.
    :return: local_gzip_file_name: Local path to file that was just downloaded.
        If download failed but raise_error_if_fails = False, this will be None.
    :raises: ValueError: if protocol is neither "ftp" nor "http".
    """

    error_checking.assert_is_string(protocol)
    if protocol not in ['ftp', 'http']:
        error_string = (
            'Protocol should be either "ftp" or "http", not "{0:s}"'.format(
                protocol))
        raise ValueError(error_string)

    raw_wind_io.check_data_sources(raw_wind_io.MADIS_DATA_SOURCE,
                                   secondary_source)
    online_file_name = _get_online_file_name(unix_time_sec=unix_time_sec,
                                             secondary_source=secondary_source,
                                             protocol=protocol)

    local_gzip_file_name = find_local_raw_file(
        unix_time_sec=unix_time_sec,
        secondary_source=secondary_source,
        top_directory_name=top_local_directory_name,
        raise_error_if_missing=False)

    if protocol == 'ftp':
        return downloads.download_file_via_ftp(
            server_name=FTP_SERVER_NAME,
            user_name=user_name,
            password=password,
            ftp_file_name=online_file_name,
            local_file_name=local_gzip_file_name,
            raise_error_if_fails=raise_error_if_fails)

    return downloads.download_files_via_http(
        online_file_names=[online_file_name],
        local_file_names=[local_gzip_file_name],
        user_name=user_name,
        password=password,
        host_name=HTTP_HOST_NAME,
        raise_error_if_fails=raise_error_if_fails)[0]
Ejemplo n.º 21
0
def merge_winds_and_station_metadata(wind_table, station_metadata_table,
                                     station_id):
    """Merges wind data with metadata for observing stations.

    :param wind_table: pandas DataFrame created by
        read_1minute_winds_from_raw_file, read_5minute_winds_from_raw_file, or
        `raw_wind_io.sustained_and_gust_to_uv_max`.
    :param station_metadata_table: pandas DataFrame created by
        read_station_metadata_from_raw_file.
    :param station_id: String ID for station in wind_table.
    :return: wind_table: Same as input, but with the following extra columns.
    wind_table.station_id: String ID for station.
    wind_table.station_name: Verbose name for station.
    wind_table.latitude_deg: Latitude (deg N).
    wind_table.longitude_deg: Longitude (deg E).
    wind_table.elevation_m_asl: Elevation (metres above sea level).
    """

    error_checking.assert_is_string(station_id)

    station_id_list = [station_id] * len(wind_table.index)
    argument_dict = {raw_wind_io.STATION_ID_COLUMN: station_id_list}
    wind_table = wind_table.assign(**argument_dict)

    return wind_table.merge(station_metadata_table[METADATA_COLUMNS_TO_MERGE],
                            on=raw_wind_io.STATION_ID_COLUMN,
                            how='inner')
Ejemplo n.º 22
0
def mkdir_recursive_if_necessary(directory_name=None, file_name=None):
    """Creates directory if necessary (i.e., doesn't already exist).

    This method checks for the argument `directory_name` first.  If
    `directory_name` is None, this method checks for `file_name` and extracts
    the directory.

    :param directory_name: Path to local directory.
    :param file_name: Path to local file.
    """

    if directory_name is None:
        error_checking.assert_is_string(file_name)
        directory_name = os.path.dirname(file_name)
    else:
        error_checking.assert_is_string(directory_name)

    if directory_name == '':
        return

    try:
        os.makedirs(directory_name)
    except OSError as this_error:
        if this_error.errno == errno.EEXIST and os.path.isdir(directory_name):
            pass
        else:
            raise
Ejemplo n.º 23
0
def find_file(valid_time_unix_sec,
              top_directory_name,
              raise_error_if_missing=True):
    """Finds file (text file in WPC format) on local machine.

    This file should contain positions of cyclones, anticyclones, fronts, etc.
    for a single valid time.

    :param valid_time_unix_sec: Valid time.
    :param top_directory_name: Name of top-level directory with WPC bulletins.
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        raise_error_if_missing = True, this method will error out.  If file is
        missing and raise_error_if_missing = False, this method will return the
        *expected* path to the file.
    :return: bulletin_file_name: Path to file.  If file is missing and
        raise_error_if_missing = False, this is the *expected* path.
    :raises: ValueError: if file is missing and raise_error_if_missing = True.
    """

    error_checking.assert_is_string(top_directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)
    valid_time_string = time_conversion.unix_sec_to_string(
        valid_time_unix_sec, TIME_FORMAT_IN_FILE_NAME)

    bulletin_file_name = '{0:s}/{1:s}/{2:s}_{3:s}'.format(
        top_directory_name, valid_time_string[:4], PATHLESS_FILE_NAME_PREFIX,
        valid_time_string)

    if raise_error_if_missing and not os.path.isfile(bulletin_file_name):
        error_string = ('Cannot find file.  Expected at location: "{0:s}"'.
                        format(bulletin_file_name))
        raise ValueError(error_string)

    return bulletin_file_name
Ejemplo n.º 24
0
def check_metadata(layer_name, neuron_indices, ideal_activation,
                   num_iterations, learning_rate, l2_weight):
    """Checks metadata for errors.

    :param layer_name: Name of layer with relevant neuron.
    :param neuron_indices: 1-D numpy array with indices of relevant neuron.
        Must have length D - 1, where D = number of dimensions in layer output.
        The first dimension is the batch dimension, which always has length
        `None` in Keras.
    :param ideal_activation: Ideal neuron activation, used to define loss
        function.  The loss function will be
        (neuron_activation - ideal_activation)**2.
    :param num_iterations: Number of iterations for gradient descent.
    :param learning_rate: Learning rate for gradient descent.
    :param l2_weight: L2 weight (penalty for difference between initial and
        final predictor matrix) in loss function.
    """

    error_checking.assert_is_string(layer_name)

    error_checking.assert_is_integer_numpy_array(neuron_indices)
    error_checking.assert_is_geq_numpy_array(neuron_indices, 0)
    error_checking.assert_is_numpy_array(neuron_indices, num_dimensions=1)

    error_checking.assert_is_not_nan(ideal_activation)

    error_checking.assert_is_integer(num_iterations)
    error_checking.assert_is_greater(num_iterations, 0)

    error_checking.assert_is_greater(learning_rate, 0.)
    error_checking.assert_is_less_than(learning_rate, 1.)

    error_checking.assert_is_geq(l2_weight, 0.)
def check_component_metadata(
        component_type_string, target_class=None, layer_name=None,
        neuron_indices=None, channel_index=None):
    """Checks metadata for model component.

    :param component_type_string: Component type (must be accepted by
        `check_component_type`).
    :param target_class: [used only if component_type_string = "class"]
        Target class.  Integer from 0...(K - 1), where K = number of classes.
    :param layer_name:
        [used only if component_type_string = "neuron" or "channel"]
        Name of layer containing neuron or channel.
    :param neuron_indices: [used only if component_type_string = "neuron"]
        1-D numpy array with indices of neuron.
    :param channel_index: [used only if component_type_string = "channel"]
        Index of channel.
    """

    check_component_type(component_type_string)
    if component_type_string == CLASS_COMPONENT_TYPE_STRING:
        error_checking.assert_is_integer(target_class)
        error_checking.assert_is_geq(target_class, 0)

    if component_type_string in [NEURON_COMPONENT_TYPE_STRING,
                                 CHANNEL_COMPONENT_TYPE_STRING]:
        error_checking.assert_is_string(layer_name)

    if component_type_string == NEURON_COMPONENT_TYPE_STRING:
        error_checking.assert_is_integer_numpy_array(neuron_indices)
        error_checking.assert_is_geq_numpy_array(neuron_indices, 0)
        error_checking.assert_is_numpy_array(neuron_indices, num_dimensions=1)

    if component_type_string == CHANNEL_COMPONENT_TYPE_STRING:
        error_checking.assert_is_integer(channel_index)
        error_checking.assert_is_geq(channel_index, 0)
def _check_basemap_args(min_latitude_deg, max_latitude_deg, min_longitude_deg,
                        max_longitude_deg, resolution_string):
    """Error-checks input args for creating basemap.

    Latitudes must be in deg N, and longitudes must be in deg E.

    Both output values are in deg E, with positive values (180-360) in the
    western hemisphere.  The inputs may be positive or negative in WH.

    :param min_latitude_deg: Minimum latitude in map (bottom-left corner).
    :param max_latitude_deg: Max latitude in map (top-right corner).
    :param min_longitude_deg: Minimum longitude in map (bottom-left corner).
    :param max_longitude_deg: Max longitude in map (top-right corner).
    :param resolution_string: Resolution of boundaries (political borders,
        lakes, rivers, etc.) in basemap.  Options are "c" for crude, "l" for
        low, "i" for intermediate, "h" for high, and "f" for full.
    :return: min_longitude_deg: Minimum longitude (deg E, positive in western
        hemisphere).
    :return: max_longitude_deg: Max longitude (deg E, positive in western
        hemisphere).
    """

    error_checking.assert_is_valid_latitude(min_latitude_deg)
    error_checking.assert_is_valid_latitude(max_latitude_deg)
    error_checking.assert_is_greater(max_latitude_deg, min_latitude_deg)

    min_longitude_deg = lng_conversion.convert_lng_positive_in_west(
        min_longitude_deg)
    max_longitude_deg = lng_conversion.convert_lng_positive_in_west(
        max_longitude_deg)

    error_checking.assert_is_greater(max_longitude_deg, min_longitude_deg)
    error_checking.assert_is_string(resolution_string)

    return min_longitude_deg, max_longitude_deg
def find_file(year, directory_name, raise_error_if_missing=True):
    """Finds Storm Events file.

    This file should contain all storm reports for one year.

    :param year: Year (integer).
    :param directory_name: Name of directory with Storm Events files.
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        raise_error_if_missing = True, this method will error out.
    :return: storm_event_file_name: Path to Storm Events file.  If file is
        missing and raise_error_if_missing = False, this will be the *expected*
        path.
    :raises: ValueError: if file is missing and raise_error_if_missing = True.
    """

    error_checking.assert_is_integer(year)
    error_checking.assert_is_string(directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    storm_event_file_name = '{0:s}/{1:s}{2:s}{3:s}'.format(
        directory_name, PATHLESS_FILE_PREFIX, _year_number_to_string(year),
        FILE_EXTENSION)

    if raise_error_if_missing and not os.path.isfile(storm_event_file_name):
        error_string = ('Cannot find Storm Events file.  Expected at: {0:s}'.
                        format(storm_event_file_name))
        raise ValueError(error_string)

    return storm_event_file_name
Ejemplo n.º 28
0
def find_processed_file(directory_name, year, raise_error_if_missing=True):
    """Finds processed file with tornado reports.

    See `write_processed_file` for the definition of a "processed file".

    :param directory_name: Name of directory.
    :param year: Year (integer).
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        raise_error_if_missing = True, this method will error out.
    :return: processed_file_name: Path to file.  If file is missing and
        raise_error_if_missing = True, this will be the *expected* path.
    :raises: ValueError: if file is missing and raise_error_if_missing = True.
    """

    error_checking.assert_is_string(directory_name)
    error_checking.assert_is_integer(year)
    error_checking.assert_is_boolean(raise_error_if_missing)

    processed_file_name = '{0:s}/tornado_reports_{1:04d}.csv'.format(
        directory_name, year)

    if raise_error_if_missing and not os.path.isfile(processed_file_name):
        error_string = (
            'Cannot find processed file with tornado reports.  Expected at: '
            '{0:s}').format(processed_file_name)
        raise ValueError(error_string)

    return processed_file_name
Ejemplo n.º 29
0
def check_field_name(field_name, require_standard=False):
    """Ensures that name of model field is recognized.

    :param field_name: Field name in GewitterGefahr format (not the original
        NetCDF format).
    :param require_standard: Boolean flag.  If True, `field_name` must be in
        `STANDARD_FIELD_NAMES`.  If False, `field_name` must be in
        `FIELD_NAMES`.
    :raises: ValueError: if field name is unrecognized.
    """

    error_checking.assert_is_string(field_name)
    error_checking.assert_is_boolean(require_standard)

    if require_standard:
        valid_field_names = STANDARD_FIELD_NAMES
    else:
        valid_field_names = FIELD_NAMES

    if field_name not in valid_field_names:
        error_string = (
            '\n\n' + str(valid_field_names) +
            '\n\nValid field names (listed above) do not include "' +
            field_name + '".')
        raise ValueError(error_string)
Ejemplo n.º 30
0
def _check_input_data_for_learning(
        input_table, feature_names, target_name=None):
    """Checks input data (to machine-learning model) for errors.

    :param input_table: pandas DataFrame, where each row is one example (data
        point).
    :param feature_names: 1-D list with names of features (predictor variables).
        Each feature must be a column of input_table.
    :param target_name: Name of target variable (predictand).  Must be a column
        of input_table.  All values must be 0 or 1.
    """

    error_checking.assert_is_string_list(feature_names)
    error_checking.assert_is_numpy_array(
        numpy.array(feature_names), num_dimensions=1)

    if target_name is None:
        error_checking.assert_columns_in_dataframe(input_table, feature_names)
        return

    error_checking.assert_is_string(target_name)
    error_checking.assert_columns_in_dataframe(
        input_table, feature_names + [target_name])

    target_values = input_table[target_name].values
    error_checking.assert_is_integer_numpy_array(target_values)
    error_checking.assert_is_geq_numpy_array(target_values, 0)
    error_checking.assert_is_leq_numpy_array(target_values, 1)