Beispiel #1
0
def fit_mvn_for_each_class(feature_table,
                           class_labels,
                           num_classes,
                           assume_diagonal_covar_matrix=False):
    """For each class, fits data to a multivariate normal distribution.

    N = number of examples
    M = number of features (input variables)
    K = number of classes

    :param feature_table: pandas DataFrame with N rows and M columns.  Column
        names are feature names.
    :param class_labels: length-N numpy array of class labels.  Should be
        integers ranging from 0...[num_classes - 1].
    :param num_classes: Number of classes.
    :param assume_diagonal_covar_matrix: See documentation for
        fit_multivariate_normal.
    :return: list_of_mvn_dictionaries: length-K list of dictionaries, each with
        the following keys.
    list_of_mvn_dictionaries[k]['prior_class_probability']: Prior probability of
        [k]th class.  This is the frequency of value (k - 1) in `class_labels`.
    list_of_mvn_dictionaries[k]['orig_feature_table']: Original feature table
        (before transforming marginals to normal distribution) for [k]th class.
    list_of_mvn_dictionaries[k]['feature_names']: length-M list of feature names
        (same for each class).
    list_of_mvn_dictionaries[k]['feature_means']: length-M numpy array with mean
        value of each feature, given the [k]th class.
    list_of_mvn_dictionaries[k]['covariance_matrix']: M-by-M numpy array.
        Covariance matrix, given the [k]th class.
    list_of_mvn_dictionaries[k]['covar_matrix_inverse']: Inverse of covariance
        matrix for [k]th class.
    list_of_mvn_dictionaries[k]['covar_matrix_determinant']: Determinant of
        covariance matrix for [k]th class.
    :raises: ValueError: if any class is not represented in `class_labels`.
    """

    num_examples = len(feature_table.index)

    error_checking.assert_is_integer(num_classes)
    error_checking.assert_is_geq(num_classes, 2)
    error_checking.assert_is_integer_numpy_array(class_labels)
    error_checking.assert_is_numpy_array(class_labels,
                                         exact_dimensions=numpy.array(
                                             [num_examples]))
    error_checking.assert_is_geq_numpy_array(class_labels, 0)
    error_checking.assert_is_less_than_numpy_array(class_labels, num_classes)

    list_of_mvn_dictionaries = []
    for k in range(num_classes):
        these_flags = class_labels == k
        if not numpy.any(these_flags):
            error_string = ('Class {0:d} (label {1:d}) does not exist in the '
                            'input data.').format(k + 1, k)
            raise ValueError(error_string)

        these_indices = numpy.where(these_flags)[0]
        this_dict = fit_multivariate_normal(
            feature_table.iloc[these_indices],
            assume_diagonal_covar_matrix=assume_diagonal_covar_matrix)

        this_dict.update({
            PRIOR_CLASS_PROBABILITY_KEY:
            float(len(these_indices)) / num_examples
        })
        this_dict.update(
            {ORIG_FEATURE_TABLE_KEY: feature_table.iloc[these_indices]})

        list_of_mvn_dictionaries.append(this_dict)

    return list_of_mvn_dictionaries
Beispiel #2
0
def get_contingency_table_extremes(storm_activations, storm_target_values,
                                   num_hits, num_misses, num_false_alarms,
                                   num_correct_nulls):
    """Returns "contingency-table extremes".

    Specifically, this method returns the following:

    - best hits (positive examples with the highest activations)
    - worst misses (positive examples with the lowest activations)
    - worst false alarms (negative examples with the highest activations)
    - best correct nulls (negative examples with the lowest activations)

    DEFINITIONS

    One "example" is one storm object.
    A "negative example" is a storm object with target = 0.
    A "positive example" is a storm object with target = 1.
    The target variable must be binary.

    E = number of examples

    :param storm_activations: length-E numpy array of model activations.
    :param storm_target_values: length-E numpy array of target values.  These
        must be integers from 0...1.
    :param num_hits: Number of best hits.
    :param num_misses: Number of worst misses.
    :param num_false_alarms: Number of worst false alarms.
    :param num_correct_nulls: Number of best correct nulls.
    :return: ct_extreme_dict: Dictionary with the following keys.
    ct_extreme_dict['hit_indices']: 1-D numpy array with indices of best hits.
    ct_extreme_dict['miss_indices']: 1-D numpy array with indices of worst
        misses.
    ct_extreme_dict['false_alarm_indices']: 1-D numpy array with indices of
        worst false alarms.
    ct_extreme_dict['correct_null_indices']: 1-D numpy array with indices of
        best correct nulls.
    """

    error_checking.assert_is_numpy_array(storm_activations, num_dimensions=1)
    error_checking.assert_is_integer_numpy_array(storm_target_values)
    error_checking.assert_is_geq_numpy_array(storm_target_values, 0)
    error_checking.assert_is_leq_numpy_array(storm_target_values, 1)

    num_storm_objects = len(storm_activations)
    error_checking.assert_is_numpy_array(storm_target_values,
                                         exact_dimensions=numpy.array(
                                             [num_storm_objects]))

    error_checking.assert_is_integer(num_hits)
    error_checking.assert_is_geq(num_hits, 0)
    error_checking.assert_is_integer(num_misses)
    error_checking.assert_is_geq(num_misses, 0)
    error_checking.assert_is_integer(num_false_alarms)
    error_checking.assert_is_geq(num_false_alarms, 0)
    error_checking.assert_is_integer(num_correct_nulls)
    error_checking.assert_is_geq(num_correct_nulls, 0)
    error_checking.assert_is_greater(
        num_hits + num_misses + num_false_alarms + num_correct_nulls, 0)

    positive_indices = numpy.where(storm_target_values == 1)[0]
    num_hits = min([num_hits, len(positive_indices)])
    num_misses = min([num_misses, len(positive_indices)])

    if num_hits > 0:
        these_indices = numpy.argsort(
            storm_activations[positive_indices])[::-1]
        hit_indices = positive_indices[these_indices][:num_hits]
    else:
        hit_indices = numpy.array([], dtype=int)

    if num_misses > 0:
        these_indices = numpy.argsort(storm_activations[positive_indices])
        miss_indices = positive_indices[these_indices][:num_misses]
    else:
        miss_indices = numpy.array([], dtype=int)

    negative_indices = numpy.where(storm_target_values == 0)[0]
    num_false_alarms = min([num_false_alarms, len(negative_indices)])
    num_correct_nulls = min([num_correct_nulls, len(negative_indices)])

    if num_false_alarms > 0:
        these_indices = numpy.argsort(
            storm_activations[negative_indices])[::-1]
        false_alarm_indices = negative_indices[
            these_indices][:num_false_alarms]
    else:
        false_alarm_indices = numpy.array([], dtype=int)

    if num_correct_nulls > 0:
        these_indices = numpy.argsort(storm_activations[negative_indices])
        correct_null_indices = negative_indices[
            these_indices][:num_correct_nulls]
    else:
        correct_null_indices = numpy.array([], dtype=int)

    return {
        HIT_INDICES_KEY: hit_indices,
        MISS_INDICES_KEY: miss_indices,
        FALSE_ALARM_INDICES_KEY: false_alarm_indices,
        CORRECT_NULL_INDICES_KEY: correct_null_indices
    }
Beispiel #3
0
def _run(input_file_name, predictor_colour_map_name,
         min_colour_prctile_for_predictors, max_colour_prctile_for_predictors,
         saliency_colour_map_name, max_colour_prctile_for_saliency,
         saliency_contour_line_width, num_saliency_contours, output_dir_name):
    """Plots saliency maps.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param predictor_colour_map_name: Same.
    :param min_colour_prctile_for_predictors: Same.
    :param max_colour_prctile_for_predictors: Same.
    :param saliency_colour_map_name: Same.
    :param max_colour_prctile_for_saliency: Same.
    :param saliency_contour_line_width: Same.
    :param num_saliency_contours: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    error_checking.assert_is_geq(min_colour_prctile_for_predictors, 0.)
    error_checking.assert_is_leq(max_colour_prctile_for_predictors, 100.)
    error_checking.assert_is_greater(max_colour_prctile_for_predictors,
                                     min_colour_prctile_for_predictors)

    error_checking.assert_is_geq(max_colour_prctile_for_saliency, 0.)
    error_checking.assert_is_leq(max_colour_prctile_for_saliency, 100.)

    error_checking.assert_is_geq(num_saliency_contours, 2)
    num_saliency_contours = 1 + int(
        number_rounding.floor_to_nearest(num_saliency_contours, 2))
    half_num_saliency_contours = (num_saliency_contours - 1) / 2

    predictor_colour_map_object = pyplot.cm.get_cmap(predictor_colour_map_name)
    saliency_colour_map_object = pyplot.cm.get_cmap(saliency_colour_map_name)

    print 'Reading data from: "{0:s}"...'.format(input_file_name)
    predictor_matrix, saliency_matrix, saliency_metadata_dict = (
        saliency_maps.read_file(input_file_name))

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=saliency_metadata_dict[
            saliency_maps.MODEL_FILE_NAME_KEY])

    print 'Reading metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    narr_predictor_names = model_metadata_dict[
        traditional_cnn.NARR_PREDICTOR_NAMES_KEY]
    num_predictors = len(narr_predictor_names)
    num_examples = predictor_matrix.shape[0]

    for i in range(num_examples):
        this_min_cval_by_predictor = numpy.full(num_predictors, numpy.nan)
        this_max_cval_by_predictor = this_min_cval_by_predictor + 0.

        for k in range(num_predictors):
            this_min_cval_by_predictor[k] = numpy.percentile(
                predictor_matrix[i, ..., k], min_colour_prctile_for_predictors)
            this_max_cval_by_predictor[k] = numpy.percentile(
                predictor_matrix[i, ..., k], max_colour_prctile_for_predictors)

        _, these_axes_objects = example_plotting.plot_many_predictors_sans_barbs(
            predictor_matrix=predictor_matrix[i, ...],
            predictor_names=narr_predictor_names,
            cmap_object_by_predictor=[predictor_colour_map_object] *
            num_predictors,
            min_colour_value_by_predictor=this_min_cval_by_predictor,
            max_colour_value_by_predictor=this_max_cval_by_predictor)

        this_max_abs_contour_level = numpy.percentile(
            numpy.absolute(saliency_matrix[i, ...]),
            max_colour_prctile_for_saliency)

        this_contour_interval = (this_max_abs_contour_level /
                                 half_num_saliency_contours)

        saliency_plotting.plot_many_2d_grids(
            saliency_matrix_3d=saliency_matrix[i, ...],
            axes_objects_2d_list=these_axes_objects,
            colour_map_object=saliency_colour_map_object,
            max_absolute_contour_level=this_max_abs_contour_level,
            contour_interval=this_contour_interval,
            line_width=saliency_contour_line_width)

        this_figure_file_name = '{0:s}/example{1:06d}_saliency.jpg'.format(
            output_dir_name, i)

        print 'Saving figure to: "{0:s}"...'.format(this_figure_file_name)
        pyplot.savefig(this_figure_file_name, dpi=FIGURE_RESOLUTION_DPI)
        pyplot.close()
def create_2d_net(
        num_input_features, first_spatial_dimensions, upsampling_factors,
        num_output_channels,
        l1_weight=DEFAULT_L1_WEIGHT, l2_weight=DEFAULT_L2_WEIGHT,
        use_transposed_conv=True, activation_function_name=None,
        alpha_for_elu=DEFAULT_ALPHA_FOR_ELU,
        alpha_for_relu=DEFAULT_ALPHA_FOR_RELU,
        use_activn_for_last_layer=False,
        use_batch_norm=True, use_batch_norm_for_last_layer=True):
    """Creates (but does not train) upconvnet with 2 spatial dimensions.

    L = number of main (transposed-conv or upsampling) layers

    :param num_input_features: Length of input feature vector.
    :param first_spatial_dimensions: length-2 numpy array of dimensions in first
        main layer.  The order should be (num_rows, num_columns).  Before it is
        passed to the first main layer, the feature vector will be reshaped into
        a grid with these dimensions.
    :param upsampling_factors: length-L numpy array of upsampling factors.
    :param num_output_channels: See doc for `create_3d_net`.
    :param l1_weight: Same.
    :param l2_weight: Same.
    :param use_transposed_conv: Same.
    :param activation_function_name: Same.
    :param alpha_for_elu: Same.
    :param alpha_for_relu: Same.
    :param use_activn_for_last_layer: Same.
    :param use_batch_norm: Same.
    :param use_batch_norm_for_last_layer: Same.
    :return: model_object: Same.
    """

    # TODO(thunderhoser): This method assumes that the original CNN does
    # edge-padding.

    # Check input args.
    error_checking.assert_is_integer(num_input_features)
    error_checking.assert_is_greater(num_input_features, 0)
    error_checking.assert_is_integer(num_output_channels)
    error_checking.assert_is_greater(num_output_channels, 0)
    error_checking.assert_is_geq(l1_weight, 0.)
    error_checking.assert_is_geq(l2_weight, 0.)

    error_checking.assert_is_boolean(use_transposed_conv)
    error_checking.assert_is_boolean(use_activn_for_last_layer)
    error_checking.assert_is_boolean(use_batch_norm)
    error_checking.assert_is_boolean(use_batch_norm_for_last_layer)

    error_checking.assert_is_numpy_array(
        first_spatial_dimensions, exact_dimensions=numpy.array([2], dtype=int)
    )
    error_checking.assert_is_integer_numpy_array(first_spatial_dimensions)
    error_checking.assert_is_greater_numpy_array(first_spatial_dimensions, 0)

    error_checking.assert_is_numpy_array(upsampling_factors, num_dimensions=1)
    error_checking.assert_is_integer_numpy_array(upsampling_factors)
    error_checking.assert_is_geq_numpy_array(upsampling_factors, 1)

    # Set up CNN architecture.
    regularizer_object = keras.regularizers.l1_l2(l1=l1_weight, l2=l2_weight)
    input_layer_object = keras.layers.Input(shape=(num_input_features,))

    current_num_filters = int(numpy.round(
        num_input_features / numpy.prod(first_spatial_dimensions)
    ))
    first_dimensions = numpy.concatenate((
        first_spatial_dimensions, numpy.array([current_num_filters], dtype=int)
    ))
    layer_object = keras.layers.Reshape(
        target_shape=first_dimensions
    )(input_layer_object)

    num_main_layers = len(upsampling_factors)
    kernel_size_tuple = (CONV_FILTER_SIZE, CONV_FILTER_SIZE)

    for i in range(num_main_layers):
        if i == num_main_layers - 1:
            current_num_filters = num_output_channels + 0

            # layer_object = keras.layers.ZeroPadding2D(
            #     padding=((1, 0), (1, 0)), data_format='channels_last'
            # )(layer_object)

        elif upsampling_factors[i] == 1:
            current_num_filters = int(numpy.round(current_num_filters / 2))

        this_stride_tuple = (upsampling_factors[i], upsampling_factors[i])

        if use_transposed_conv:
            layer_object = keras.layers.Conv2DTranspose(
                filters=current_num_filters, kernel_size=kernel_size_tuple,
                strides=this_stride_tuple, padding='same',
                data_format='channels_last', dilation_rate=(1, 1),
                activation=None, use_bias=True,
                kernel_initializer='glorot_uniform', bias_initializer='zeros',
                kernel_regularizer=regularizer_object
            )(layer_object)
        else:
            if upsampling_factors[i] > 1:
                try:
                    layer_object = keras.layers.UpSampling2D(
                        size=this_stride_tuple, data_format='channels_last',
                        interpolation='bilinear'
                    )(layer_object)
                except:
                    layer_object = keras.layers.UpSampling2D(
                        size=this_stride_tuple, data_format='channels_last'
                    )(layer_object)

            layer_object = keras.layers.Conv2D(
                filters=current_num_filters, kernel_size=kernel_size_tuple,
                strides=(1, 1), padding='same', data_format='channels_last',
                dilation_rate=(1, 1), activation=None, use_bias=True,
                kernel_initializer='glorot_uniform', bias_initializer='zeros',
                kernel_regularizer=regularizer_object
            )(layer_object)

        use_activation_here = (
            activation_function_name is not None and
            (i < num_main_layers - 1 or use_activn_for_last_layer)
        )

        if use_activation_here:
            layer_object = architecture_utils.get_activation_layer(
                activation_function_string=activation_function_name,
                alpha_for_elu=alpha_for_elu, alpha_for_relu=alpha_for_relu
            )(layer_object)

        use_batch_norm_here = (
            use_batch_norm and
            (i < num_main_layers - 1 or use_batch_norm_for_last_layer)
        )

        if use_batch_norm_here:
            layer_object = (
                architecture_utils.get_batch_norm_layer()(layer_object)
            )

    # Compile CNN.
    model_object = keras.models.Model(
        inputs=input_layer_object, outputs=layer_object)
    model_object.compile(
        loss=keras.losses.mean_squared_error, optimizer=keras.optimizers.Adam()
    )

    model_object.summary()
    return model_object
def create_model(option_dict,
                 loss_function,
                 up_flux_channel_index=None,
                 down_flux_channel_index=None):
    """Creates CNN (convolutional neural net).

    This method sets up the architecture, loss function, and optimizer -- and
    compiles the model -- but does not train it.

    C = number of convolutional layers
    D = number of dense layers

    If you do not want dense layers, make `dense_layer_neuron_nums` and
    `dense_layer_dropout_rates` be None.

    :param option_dict: Dictionary with the following keys.
    option_dict['num_heights']: Number of height levels.
    option_dict['num_input_channels']: Number of input channels.
    option_dict['conv_layer_channel_nums']: length-C numpy array with number of
        channels (filters) produced by each conv layer.  The last value in the
        array, conv_layer_channel_nums[-1], is the number of output channels
        (profiles to be predicted).
    option_dict['conv_layer_dropout_rates']: length-C numpy array with dropout
        rate for each conv layer.  Use NaN if you do not want dropout for a
        particular layer.
    option_dict['conv_layer_filter_sizes']: length-C numpy array with filter
        size (number of heights) for each conv layer.
    option_dict['dense_layer_neuron_nums']: length-D numpy array with number of
        neurons (features) produced by each dense layer.  The last value in the
        array, dense_layer_neuron_nums[-1], is the number of output scalars (to
        be predicted).
    option_dict['dense_layer_dropout_rates']: length-D numpy array with dropout
        rate for each dense layer.  Use NaN if you do not want dropout for a
        particular layer.
    option_dict['inner_activ_function_name']: Name of activation function for
        all inner (non-output) layers.  Must be accepted by
        `architecture_utils.check_activation_function`.
    option_dict['inner_activ_function_alpha']: Alpha (slope parameter) for
        activation function for all inner layers.  Applies only to ReLU and eLU.
    option_dict['output_activ_function_name']: Same as
        `inner_activ_function_name` but for output layers (profiles and
        scalars).
    option_dict['output_activ_function_alpha']: Same as
        `inner_activ_function_alpha` but for output layers (profiles and
        scalars).
    option_dict['l1_weight']: Weight for L_1 regularization.
    option_dict['l2_weight']: Weight for L_2 regularization.
    option_dict['use_batch_normalization']: Boolean flag.  If True, will use
        batch normalization after each inner (non-output) layer.
    option_dict['zero_out_top_heating_rate']: Boolean flag.  If True, will
        always predict 0 K day^-1 for top heating rate.
    option_dict['heating_rate_channel_index']: Channel index for heating rate.
        Used only if `zero_out_top_heating_rate = True`.

    :param loss_function: Function handle.
    :param up_flux_channel_index:
        [used only if loss function is constrained MSE]
        Channel index for upwelling flux.
    :param down_flux_channel_index:
        [used only if loss function is constrained MSE]
        Channel index for downwelling flux.
    :return: model_object: Untrained instance of `keras.models.Model`.
    """

    option_dict = _check_architecture_args(option_dict)

    num_heights = option_dict[NUM_HEIGHTS_KEY]
    num_input_channels = option_dict[NUM_INPUT_CHANNELS_KEY]
    conv_layer_channel_nums = option_dict[CONV_LAYER_CHANNEL_NUMS_KEY]
    conv_layer_dropout_rates = option_dict[CONV_LAYER_DROPOUT_RATES_KEY]
    conv_layer_filter_sizes = option_dict[CONV_LAYER_FILTER_SIZES_KEY]
    dense_layer_neuron_nums = option_dict[DENSE_LAYER_NEURON_NUMS_KEY]
    dense_layer_dropout_rates = option_dict[DENSE_LAYER_DROPOUT_RATES_KEY]
    inner_activ_function_name = option_dict[INNER_ACTIV_FUNCTION_KEY]
    inner_activ_function_alpha = option_dict[INNER_ACTIV_FUNCTION_ALPHA_KEY]
    output_activ_function_name = option_dict[OUTPUT_ACTIV_FUNCTION_KEY]
    output_activ_function_alpha = option_dict[OUTPUT_ACTIV_FUNCTION_ALPHA_KEY]
    l1_weight = option_dict[L1_WEIGHT_KEY]
    l2_weight = option_dict[L2_WEIGHT_KEY]
    use_batch_normalization = option_dict[USE_BATCH_NORM_KEY]
    zero_out_top_heating_rate = option_dict[ZERO_OUT_TOP_HR_KEY]
    heating_rate_channel_index = option_dict[HEATING_RATE_INDEX_KEY]

    any_dense_layers = dense_layer_neuron_nums is not None
    is_loss_constrained_mse = (
        neural_net.determine_if_loss_constrained_mse(loss_function))

    if not any_dense_layers:
        assert not is_loss_constrained_mse

    if is_loss_constrained_mse:
        error_checking.assert_is_integer(up_flux_channel_index)
        error_checking.assert_is_geq(up_flux_channel_index, 0)
        error_checking.assert_is_less_than(up_flux_channel_index,
                                           conv_layer_channel_nums[-1])

        error_checking.assert_is_integer(down_flux_channel_index)
        error_checking.assert_is_geq(down_flux_channel_index, 0)
        error_checking.assert_is_less_than(down_flux_channel_index,
                                           conv_layer_channel_nums[-1])

    input_layer_object = keras.layers.Input(shape=(num_heights,
                                                   num_input_channels))
    regularizer_object = architecture_utils.get_weight_regularizer(
        l1_weight=l1_weight, l2_weight=l2_weight)

    num_conv_layers = len(conv_layer_channel_nums)
    conv_output_layer_object = None
    dense_input_layer_object = None

    for i in range(num_conv_layers):
        if conv_output_layer_object is None:
            this_input_layer_object = input_layer_object
        else:
            this_input_layer_object = conv_output_layer_object

        if i == num_conv_layers - 1:
            dense_input_layer_object = conv_output_layer_object

        conv_output_layer_object = architecture_utils.get_1d_conv_layer(
            num_kernel_rows=conv_layer_filter_sizes[i],
            num_rows_per_stride=1,
            num_filters=conv_layer_channel_nums[i],
            padding_type_string=architecture_utils.YES_PADDING_STRING,
            weight_regularizer=regularizer_object)(this_input_layer_object)

        if i == num_conv_layers - 1:
            conv_output_layer_object = architecture_utils.get_activation_layer(
                activation_function_string=output_activ_function_name,
                alpha_for_relu=output_activ_function_alpha,
                alpha_for_elu=output_activ_function_alpha,
                layer_name='conv_output')(conv_output_layer_object)
        else:
            conv_output_layer_object = architecture_utils.get_activation_layer(
                activation_function_string=inner_activ_function_name,
                alpha_for_relu=inner_activ_function_alpha,
                alpha_for_elu=inner_activ_function_alpha)(
                    conv_output_layer_object)

        if conv_layer_dropout_rates[i] > 0:
            conv_output_layer_object = architecture_utils.get_dropout_layer(
                dropout_fraction=conv_layer_dropout_rates[i])(
                    conv_output_layer_object)

        if use_batch_normalization and i != num_conv_layers - 1:
            conv_output_layer_object = (
                architecture_utils.get_batch_norm_layer()(
                    conv_output_layer_object))

    if any_dense_layers:
        num_dense_layers = len(dense_layer_neuron_nums)
        dense_output_layer_object = architecture_utils.get_flattening_layer()(
            dense_input_layer_object)

        for i in range(num_dense_layers):
            dense_output_layer_object = architecture_utils.get_dense_layer(
                num_output_units=dense_layer_neuron_nums[i])(
                    dense_output_layer_object)

            if i == num_dense_layers - 1:
                dense_output_layer_object = (
                    architecture_utils.get_activation_layer(
                        activation_function_string=output_activ_function_name,
                        alpha_for_relu=output_activ_function_alpha,
                        alpha_for_elu=output_activ_function_alpha,
                        layer_name=None if is_loss_constrained_mse else
                        'dense_output')(dense_output_layer_object))
            else:
                dense_output_layer_object = (
                    architecture_utils.get_activation_layer(
                        activation_function_string=inner_activ_function_name,
                        alpha_for_relu=inner_activ_function_alpha,
                        alpha_for_elu=inner_activ_function_alpha)(
                            dense_output_layer_object))

            if dense_layer_dropout_rates[i] > 0:
                dense_output_layer_object = (
                    architecture_utils.get_dropout_layer(
                        dropout_fraction=dense_layer_dropout_rates[i])(
                            dense_output_layer_object))

            if use_batch_normalization and i != num_dense_layers - 1:
                dense_output_layer_object = (
                    architecture_utils.get_batch_norm_layer()(
                        dense_output_layer_object))
    else:
        dense_output_layer_object = None

    if is_loss_constrained_mse:
        k = up_flux_channel_index + 0

        highest_up_flux_layer_object = keras.layers.Lambda(
            lambda x: x[:, -1, k:(k + 1)])(conv_output_layer_object)

        k = down_flux_channel_index + 0

        lowest_down_flux_layer_object = keras.layers.Lambda(
            lambda x: x[:, -0, k:(k + 1)])(conv_output_layer_object)

        this_list = [
            highest_up_flux_layer_object, lowest_down_flux_layer_object,
            dense_output_layer_object
        ]

        dense_output_layer_object = keras.layers.Concatenate(
            axis=-1, name='dense_output')(this_list)

    if zero_out_top_heating_rate:
        this_function = _zero_top_heating_rate_function(
            heating_rate_channel_index)
        conv_output_layer_object = keras.layers.Lambda(this_function)(
            conv_output_layer_object)

    if any_dense_layers:
        model_object = keras.models.Model(
            inputs=input_layer_object,
            outputs=[conv_output_layer_object, dense_output_layer_object])
    else:
        model_object = keras.models.Model(inputs=input_layer_object,
                                          outputs=conv_output_layer_object)

    if is_loss_constrained_mse:
        loss_dict = {
            'conv_output': keras.losses.mse,
            'dense_output': loss_function
        }

        model_object.compile(loss=loss_dict,
                             optimizer=keras.optimizers.Adam(),
                             metrics=neural_net.METRIC_FUNCTION_LIST)
    else:
        model_object.compile(loss=loss_function,
                             optimizer=keras.optimizers.Adam(),
                             metrics=neural_net.METRIC_FUNCTION_LIST)

    model_object.summary()
    return model_object
def _get_error_matrix(cost_matrix, is_cost_auc, confidence_level,
                      backwards_flag, multipass_flag):
    """Creates error matrix (used to plot error bars).

    S = number of steps in permutation test
    B = number of bootstrap replicates

    :param cost_matrix: S-by-B numpy array of costs.
    :param is_cost_auc: Boolean flag.  If True, cost function is AUC (area under
        receiver-operating-characteristic curve).
    :param confidence_level: Confidence level (in range 0...1).
    :param backwards_flag: Boolean flag, indicating whether the test is forward
        or backwards.
    :param multipass_flag: Boolean flag, indicating whether the test is
        single-pass or multi-pass.
    :return: error_matrix: 2-by-S numpy array, where the first row contains
        negative errors and second row contains positive errors.
    :return: significant_flags: length-S numpy array of Boolean flags.  If
        significant_flags[i] = True, the [i]th step has a significantly
        different cost than the [i + 1]th step.
    """

    num_steps = cost_matrix.shape[0]
    significant_flags = numpy.full(num_steps, False, dtype=bool)

    for i in range(num_steps - 1):
        if backwards_flag:
            these_diffs = cost_matrix[i + 1, :] - cost_matrix[i, :]
        else:
            these_diffs = cost_matrix[i, :] - cost_matrix[i + 1, :]

        if not is_cost_auc:
            these_diffs *= -1

        this_percentile = percentileofscore(a=these_diffs,
                                            score=0.,
                                            kind='mean')
        this_flag = this_percentile <= 100 * (1. - confidence_level)

        if multipass_flag:
            significant_flags[i] = this_flag
        else:
            significant_flags[i + 1] = this_flag

    error_checking.assert_is_geq(confidence_level, 0.9)
    error_checking.assert_is_less_than(confidence_level, 1.)

    mean_costs = numpy.mean(cost_matrix, axis=-1)
    min_costs = numpy.percentile(cost_matrix,
                                 50 * (1. - confidence_level),
                                 axis=-1)
    max_costs = numpy.percentile(cost_matrix,
                                 50 * (1. + confidence_level),
                                 axis=-1)

    negative_errors = mean_costs - min_costs
    positive_errors = max_costs - mean_costs

    negative_errors = numpy.reshape(negative_errors, (1, negative_errors.size))
    positive_errors = numpy.reshape(positive_errors, (1, positive_errors.size))
    error_matrix = numpy.vstack((negative_errors, positive_errors))

    return error_matrix, significant_flags
Beispiel #7
0
def _run(main_activation_file_name, first_example_index, last_example_index,
         aux_activation_file_name, tornado_dir_name, top_tracking_dir_name,
         top_myrorss_dir_name, radar_field_name, radar_height_m_asl,
         latitude_buffer_deg, longitude_buffer_deg, top_output_dir_name):
    """Plots examples (storm objects) with surrounding context.

    This is effectively the main method.

    :param main_activation_file_name: See documentation at top of file.
    :param first_example_index: Same.
    :param last_example_index: Same.
    :param aux_activation_file_name: Same.
    :param tornado_dir_name: Same.
    :param top_tracking_dir_name: Same.
    :param top_myrorss_dir_name: Same.
    :param radar_field_name: Same.
    :param radar_height_m_asl: Same.
    :param latitude_buffer_deg: Same.
    :param longitude_buffer_deg: Same.
    :param top_output_dir_name: Same.
    :raises: ValueError: if activation file contains activations of some
        intermediate model component, rather than final predictions.
    :raises: ValueError: if target variable is not related to tornadogenesis.
    """

    if aux_activation_file_name in ['', 'None']:
        aux_activation_file_name = None

    if first_example_index == -1 or last_example_index == -1:
        first_example_index = None
        last_example_index = None

    print('Reading data from: "{0:s}"...'.format(main_activation_file_name))
    activation_matrix, activation_dict = model_activation.read_file(
        main_activation_file_name
    )

    component_type_string = activation_dict[model_activation.COMPONENT_TYPE_KEY]

    if (component_type_string !=
            model_interpretation.CLASS_COMPONENT_TYPE_STRING):
        error_string = (
            'Activation file should contain final predictions (component type '
            '"{0:s}").  Instead, component type is "{1:s}".'
        ).format(
            model_interpretation.CLASS_COMPONENT_TYPE_STRING,
            component_type_string
        )

        raise ValueError(error_string)

    forecast_probabilities = numpy.squeeze(activation_matrix)
    full_storm_id_strings = activation_dict[model_activation.FULL_IDS_KEY]
    storm_times_unix_sec = activation_dict[model_activation.STORM_TIMES_KEY]

    if first_example_index is not None:
        error_checking.assert_is_geq(last_example_index, first_example_index)

        example_indices = numpy.linspace(
            first_example_index, last_example_index,
            num=last_example_index - first_example_index + 1, dtype=int
        )

        forecast_probabilities = forecast_probabilities[example_indices]
        full_storm_id_strings = [
            full_storm_id_strings[k] for k in example_indices
        ]
        storm_times_unix_sec = storm_times_unix_sec[example_indices]

    num_storm_objects = len(forecast_probabilities)

    model_file_name = activation_dict[model_activation.MODEL_FILE_NAME_KEY]
    model_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0]
    )

    print('Reading model metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)

    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]
    target_name = training_option_dict[trainval_io.TARGET_NAME_KEY]
    target_param_dict = target_val_utils.target_name_to_params(target_name)
    event_type_string = target_param_dict[target_val_utils.EVENT_TYPE_KEY]

    if event_type_string != linkage.TORNADO_EVENT_STRING:
        error_string = (
            'Target variable should be related to tornadogenesis.  Instead, got'
            ' "{0:s}".'
        ).format(target_name)

        raise ValueError(error_string)

    if aux_activation_file_name is None:
        aux_forecast_probabilities = None
        aux_activation_dict = None
    else:
        print('Reading data from: "{0:s}"...'.format(aux_activation_file_name))
        this_matrix, aux_activation_dict = model_activation.read_file(
            aux_activation_file_name
        )

        aux_forecast_probabilities = numpy.squeeze(this_matrix)

    print(SEPARATOR_STRING)

    for i in range(num_storm_objects):
        _plot_one_example(
            full_id_string=full_storm_id_strings[i],
            storm_time_unix_sec=storm_times_unix_sec[i],
            target_name=target_name,
            forecast_probability=forecast_probabilities[i],
            tornado_dir_name=tornado_dir_name,
            top_tracking_dir_name=top_tracking_dir_name,
            top_myrorss_dir_name=top_myrorss_dir_name,
            radar_field_name=radar_field_name,
            radar_height_m_asl=radar_height_m_asl,
            latitude_buffer_deg=latitude_buffer_deg,
            longitude_buffer_deg=longitude_buffer_deg,
            top_output_dir_name=top_output_dir_name,
            aux_forecast_probabilities=aux_forecast_probabilities,
            aux_activation_dict=aux_activation_dict
        )

        if i != num_storm_objects - 1:
            print(SEPARATOR_STRING)
Beispiel #8
0
def quick_train_3d(model_object,
                   output_file_name,
                   num_examples_per_batch,
                   num_epochs,
                   num_training_batches_per_epoch,
                   training_start_time_unix_sec,
                   training_end_time_unix_sec,
                   top_training_dir_name,
                   narr_predictor_names,
                   num_classes,
                   num_rows_in_half_grid,
                   num_columns_in_half_grid,
                   num_validation_batches_per_epoch=None,
                   validation_start_time_unix_sec=None,
                   validation_end_time_unix_sec=None,
                   top_validation_dir_name=None):
    """Trains CNN with 3-D examples stored in processed files.

    These "processed files" are created by
    `training_validation_io.write_downsized_3d_examples`.

    :param model_object: See doc for `train_with_3d_examples`.
    :param output_file_name: Same.
    :param num_examples_per_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param training_start_time_unix_sec: See doc for
        `training_validation_io.quick_downsized_3d_example_gen`.
    :param training_end_time_unix_sec: Same.
    :param top_training_dir_name: Same.
    :param narr_predictor_names: Same.
    :param num_classes: Same.
    :param num_rows_in_half_grid: Same.
    :param num_columns_in_half_grid: Same.
    :param num_validation_batches_per_epoch: Same.
    :param validation_start_time_unix_sec: See doc for
        `training_validation_io.quick_downsized_3d_example_gen`.
    :param validation_end_time_unix_sec: Same.
    :param top_validation_dir_name: Same.
    """

    error_checking.assert_is_integer(num_epochs)
    error_checking.assert_is_geq(num_epochs, 1)
    error_checking.assert_is_integer(num_training_batches_per_epoch)
    error_checking.assert_is_geq(num_training_batches_per_epoch, 1)
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    if num_validation_batches_per_epoch is None:
        checkpoint_object = ModelCheckpoint(output_file_name,
                                            monitor='loss',
                                            verbose=1,
                                            save_best_only=False,
                                            save_weights_only=False,
                                            mode='min',
                                            period=1)

        model_object.fit_generator(
            generator=trainval_io.quick_downsized_3d_example_gen(
                num_examples_per_batch=num_examples_per_batch,
                first_target_time_unix_sec=training_start_time_unix_sec,
                last_target_time_unix_sec=training_end_time_unix_sec,
                top_input_dir_name=top_training_dir_name,
                narr_predictor_names=narr_predictor_names,
                num_classes=num_classes,
                num_rows_in_half_grid=num_rows_in_half_grid,
                num_columns_in_half_grid=num_columns_in_half_grid),
            steps_per_epoch=num_training_batches_per_epoch,
            epochs=num_epochs,
            verbose=1,
            class_weight=None,
            callbacks=[checkpoint_object])

    else:
        error_checking.assert_is_integer(num_validation_batches_per_epoch)
        error_checking.assert_is_geq(num_validation_batches_per_epoch, 1)

        checkpoint_object = ModelCheckpoint(output_file_name,
                                            monitor='val_loss',
                                            verbose=1,
                                            save_best_only=True,
                                            save_weights_only=False,
                                            mode='min',
                                            period=1)

        model_object.fit_generator(
            generator=trainval_io.quick_downsized_3d_example_gen(
                num_examples_per_batch=num_examples_per_batch,
                first_target_time_unix_sec=training_start_time_unix_sec,
                last_target_time_unix_sec=training_end_time_unix_sec,
                top_input_dir_name=top_training_dir_name,
                narr_predictor_names=narr_predictor_names,
                num_classes=num_classes,
                num_rows_in_half_grid=num_rows_in_half_grid,
                num_columns_in_half_grid=num_columns_in_half_grid),
            steps_per_epoch=num_training_batches_per_epoch,
            epochs=num_epochs,
            verbose=1,
            class_weight=None,
            callbacks=[checkpoint_object],
            validation_data=trainval_io.quick_downsized_3d_example_gen(
                num_examples_per_batch=num_examples_per_batch,
                first_target_time_unix_sec=validation_start_time_unix_sec,
                last_target_time_unix_sec=validation_end_time_unix_sec,
                top_input_dir_name=top_validation_dir_name,
                narr_predictor_names=narr_predictor_names,
                num_classes=num_classes,
                num_rows_in_half_grid=num_rows_in_half_grid,
                num_columns_in_half_grid=num_columns_in_half_grid),
            validation_steps=num_validation_batches_per_epoch)
Beispiel #9
0
def train_with_4d_examples(model_object,
                           output_file_name,
                           num_examples_per_batch,
                           num_epochs,
                           num_training_batches_per_epoch,
                           num_examples_per_target_time,
                           predictor_time_step_offsets,
                           num_lead_time_steps,
                           training_start_time_unix_sec,
                           training_end_time_unix_sec,
                           top_narr_directory_name,
                           top_frontal_grid_dir_name,
                           narr_predictor_names,
                           pressure_level_mb,
                           dilation_distance_metres,
                           class_fractions,
                           num_rows_in_half_grid,
                           num_columns_in_half_grid,
                           weight_loss_function=True,
                           num_validation_batches_per_epoch=None,
                           validation_start_time_unix_sec=None,
                           validation_end_time_unix_sec=None,
                           narr_mask_matrix=None):
    """Trains CNN, using 4-D examples created on the fly.

    :param model_object: See doc for `train_with_3d_examples`.
    :param output_file_name: Same.
    :param num_examples_per_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_examples_per_target_time: Same.
    :param predictor_time_step_offsets: length-T numpy array of offsets between
        predictor times and (target time - lead time).
    :param num_lead_time_steps: Number of time steps separating latest predictor
        time from target time.
    :param training_start_time_unix_sec: See doc for `train_with_3d_examples`.
    :param training_end_time_unix_sec: Same.
    :param top_narr_directory_name: Same.
    :param top_frontal_grid_dir_name: Same.
    :param narr_predictor_names: Same.
    :param pressure_level_mb: Same.
    :param dilation_distance_metres: Same.
    :param class_fractions: Same.
    :param num_rows_in_half_grid: Same.
    :param num_columns_in_half_grid: Same.
    :param weight_loss_function: Boolean flag.  If True, classes will be
        weighted differently in the loss function (class weights inversely
        proportional to `class_fractions`).
    :param num_validation_batches_per_epoch: See doc for
        `train_with_3d_examples`.
    :param validation_start_time_unix_sec: Same.
    :param validation_end_time_unix_sec: Same.
    :param narr_mask_matrix: Same.
    """

    error_checking.assert_is_integer(num_epochs)
    error_checking.assert_is_geq(num_epochs, 1)
    error_checking.assert_is_integer(num_training_batches_per_epoch)
    error_checking.assert_is_geq(num_training_batches_per_epoch, 1)
    error_checking.assert_is_boolean(weight_loss_function)
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    if weight_loss_function:
        class_weight_dict = ml_utils.get_class_weight_dict(class_fractions)
    else:
        class_weight_dict = None

    if num_validation_batches_per_epoch is None:
        checkpoint_object = ModelCheckpoint(output_file_name,
                                            monitor='loss',
                                            verbose=1,
                                            save_best_only=False,
                                            save_weights_only=False,
                                            mode='min',
                                            period=1)

        model_object.fit_generator(
            generator=trainval_io.downsized_4d_example_generator(
                num_examples_per_batch=num_examples_per_batch,
                num_examples_per_target_time=num_examples_per_target_time,
                first_target_time_unix_sec=training_start_time_unix_sec,
                last_target_time_unix_sec=training_end_time_unix_sec,
                predictor_time_step_offsets=predictor_time_step_offsets,
                num_lead_time_steps=num_lead_time_steps,
                top_narr_directory_name=top_narr_directory_name,
                top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                narr_predictor_names=narr_predictor_names,
                pressure_level_mb=pressure_level_mb,
                dilation_distance_metres=dilation_distance_metres,
                class_fractions=class_fractions,
                num_rows_in_half_grid=num_rows_in_half_grid,
                num_columns_in_half_grid=num_columns_in_half_grid,
                narr_mask_matrix=narr_mask_matrix),
            steps_per_epoch=num_training_batches_per_epoch,
            epochs=num_epochs,
            verbose=1,
            class_weight=class_weight_dict,
            callbacks=[checkpoint_object])

    else:
        error_checking.assert_is_integer(num_validation_batches_per_epoch)
        error_checking.assert_is_geq(num_validation_batches_per_epoch, 1)

        checkpoint_object = ModelCheckpoint(output_file_name,
                                            monitor='val_loss',
                                            verbose=1,
                                            save_best_only=True,
                                            save_weights_only=False,
                                            mode='min',
                                            period=1)

        model_object.fit_generator(
            generator=trainval_io.downsized_4d_example_generator(
                num_examples_per_batch=num_examples_per_batch,
                num_examples_per_target_time=num_examples_per_target_time,
                first_target_time_unix_sec=training_start_time_unix_sec,
                last_target_time_unix_sec=training_end_time_unix_sec,
                predictor_time_step_offsets=predictor_time_step_offsets,
                num_lead_time_steps=num_lead_time_steps,
                top_narr_directory_name=top_narr_directory_name,
                top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                narr_predictor_names=narr_predictor_names,
                pressure_level_mb=pressure_level_mb,
                dilation_distance_metres=dilation_distance_metres,
                class_fractions=class_fractions,
                num_rows_in_half_grid=num_rows_in_half_grid,
                num_columns_in_half_grid=num_columns_in_half_grid,
                narr_mask_matrix=narr_mask_matrix),
            steps_per_epoch=num_training_batches_per_epoch,
            epochs=num_epochs,
            verbose=1,
            class_weight=class_weight_dict,
            callbacks=[checkpoint_object],
            validation_data=trainval_io.downsized_4d_example_generator(
                num_examples_per_batch=num_examples_per_batch,
                num_examples_per_target_time=num_examples_per_target_time,
                first_target_time_unix_sec=validation_start_time_unix_sec,
                last_target_time_unix_sec=validation_end_time_unix_sec,
                predictor_time_step_offsets=predictor_time_step_offsets,
                num_lead_time_steps=num_lead_time_steps,
                top_narr_directory_name=top_narr_directory_name,
                top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                narr_predictor_names=narr_predictor_names,
                pressure_level_mb=pressure_level_mb,
                dilation_distance_metres=dilation_distance_metres,
                class_fractions=class_fractions,
                num_rows_in_half_grid=num_rows_in_half_grid,
                num_columns_in_half_grid=num_columns_in_half_grid,
                narr_mask_matrix=narr_mask_matrix),
            validation_steps=num_validation_batches_per_epoch)
def plot_many_soundings(list_of_metpy_dictionaries,
                        title_strings,
                        num_panel_rows,
                        output_file_name,
                        temp_directory_name=None,
                        option_dict=None):
    """Creates paneled figure with many soundings.

    N = number of soundings to plot

    :param list_of_metpy_dictionaries: length-N list of dictionaries.  Each
        dictionary must satisfy the input format for `sounding_dict_for_metpy`
        in `plot_sounding`.
    :param title_strings: length-N list of titles.
    :param num_panel_rows: Number of rows in paneled figure.
    :param output_file_name: Path to output (image) file.
    :param temp_directory_name: Name of temporary directory.  Each panel will be
        stored here, then deleted after the panels have been concatenated into
        the final image.  If `temp_directory_name is None`, will use the default
        temp directory on the local machine.
    :param option_dict: See doc for `plot_sounding`.
    """

    error_checking.assert_is_numpy_array(numpy.array(title_strings),
                                         num_dimensions=1)
    num_soundings = len(title_strings)

    error_checking.assert_is_list(list_of_metpy_dictionaries)
    error_checking.assert_is_geq(len(list_of_metpy_dictionaries),
                                 num_soundings)
    error_checking.assert_is_leq(len(list_of_metpy_dictionaries),
                                 num_soundings)

    error_checking.assert_is_integer(num_panel_rows)
    error_checking.assert_is_geq(num_panel_rows, 1)
    error_checking.assert_is_leq(num_panel_rows, num_soundings)

    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)
    if temp_directory_name is not None:
        file_system_utils.mkdir_recursive_if_necessary(
            directory_name=temp_directory_name)

    temp_file_names = [None] * num_soundings
    num_panel_columns = int(numpy.ceil(float(num_soundings) / num_panel_rows))

    for i in range(num_panel_rows):
        for j in range(num_panel_columns):
            this_sounding_index = i * num_panel_columns + j
            if this_sounding_index >= num_soundings:
                break

            plot_sounding(sounding_dict_for_metpy=list_of_metpy_dictionaries[
                this_sounding_index],
                          title_string=title_strings[this_sounding_index],
                          option_dict=option_dict)

            temp_file_names[this_sounding_index] = '{0:s}.jpg'.format(
                tempfile.NamedTemporaryFile(dir=temp_directory_name,
                                            delete=False).name)

            print('Saving sounding to: "{0:s}"...'.format(
                temp_file_names[this_sounding_index]))

            pyplot.savefig(temp_file_names[this_sounding_index],
                           dpi=DOTS_PER_INCH)
            pyplot.close()

            imagemagick_utils.trim_whitespace(
                input_file_name=temp_file_names[this_sounding_index],
                output_file_name=temp_file_names[this_sounding_index],
                border_width_pixels=SINGLE_IMAGE_BORDER_WIDTH_PX)

            imagemagick_utils.resize_image(
                input_file_name=temp_file_names[this_sounding_index],
                output_file_name=temp_file_names[this_sounding_index],
                output_size_pixels=SINGLE_IMAGE_SIZE_PX)

    print('Concatenating panels into one figure: "{0:s}"...'.format(
        output_file_name))

    imagemagick_utils.concatenate_images(
        input_file_names=temp_file_names,
        output_file_name=output_file_name,
        num_panel_rows=num_panel_rows,
        num_panel_columns=num_panel_columns,
        border_width_pixels=PANELED_IMAGE_BORDER_WIDTH_PX)

    for i in range(num_soundings):
        os.remove(temp_file_names[i])
Beispiel #11
0
def train_with_3d_examples(model_object,
                           output_file_name,
                           num_examples_per_batch,
                           num_epochs,
                           num_training_batches_per_epoch,
                           num_examples_per_target_time,
                           training_start_time_unix_sec,
                           training_end_time_unix_sec,
                           top_narr_directory_name,
                           top_frontal_grid_dir_name,
                           narr_predictor_names,
                           pressure_level_mb,
                           dilation_distance_metres,
                           class_fractions,
                           num_rows_in_half_grid,
                           num_columns_in_half_grid,
                           weight_loss_function=True,
                           num_validation_batches_per_epoch=None,
                           validation_start_time_unix_sec=None,
                           validation_end_time_unix_sec=None,
                           narr_mask_matrix=None):
    """Trains CNN, using 3-D examples created on the fly.

    :param model_object: Instance of `keras.models.Sequential`.
    :param output_file_name: Path to output file (HDF5 format).  The model will
        be saved here after every epoch.
    :param num_examples_per_batch: Number of examples per batch.  This argument
        is known as "batch_size" in Keras.
    :param num_epochs: Number of epochs.
    :param num_training_batches_per_epoch: Number of training batches per epoch.
    :param num_examples_per_target_time: See doc for
        `training_validation_io.downsized_3d_example_generator`.
    :param training_start_time_unix_sec: Same.
    :param training_end_time_unix_sec: Same.
    :param top_narr_directory_name: Same.
    :param top_frontal_grid_dir_name: Same.
    :param narr_predictor_names: Same.
    :param pressure_level_mb: Same.
    :param dilation_distance_metres: Same.
    :param class_fractions: Same.
    :param num_rows_in_half_grid: Same.
    :param num_columns_in_half_grid: Same.
    :param weight_loss_function: Boolean flag.  If True, classes will be
        weighted differently in the loss function (class weights inversely
        proportional to `class_fractions`).
    :param num_validation_batches_per_epoch: Number of validation batches per
        epoch.
    :param validation_start_time_unix_sec: See doc for
        `training_validation_io.downsized_3d_example_generator`.
    :param validation_end_time_unix_sec: Same.
    :param narr_mask_matrix: Same.
    """

    error_checking.assert_is_integer(num_epochs)
    error_checking.assert_is_geq(num_epochs, 1)
    error_checking.assert_is_integer(num_training_batches_per_epoch)
    error_checking.assert_is_geq(num_training_batches_per_epoch, 1)
    error_checking.assert_is_boolean(weight_loss_function)
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    if weight_loss_function:
        class_weight_dict = ml_utils.get_class_weight_dict(class_fractions)
    else:
        class_weight_dict = None

    if num_validation_batches_per_epoch is None:
        checkpoint_object = ModelCheckpoint(output_file_name,
                                            monitor='loss',
                                            verbose=1,
                                            save_best_only=False,
                                            save_weights_only=False,
                                            mode='min',
                                            period=1)

        model_object.fit_generator(
            generator=trainval_io.downsized_3d_example_generator(
                num_examples_per_batch=num_examples_per_batch,
                num_examples_per_target_time=num_examples_per_target_time,
                first_target_time_unix_sec=training_start_time_unix_sec,
                last_target_time_unix_sec=training_end_time_unix_sec,
                top_narr_directory_name=top_narr_directory_name,
                top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                narr_predictor_names=narr_predictor_names,
                pressure_level_mb=pressure_level_mb,
                dilation_distance_metres=dilation_distance_metres,
                class_fractions=class_fractions,
                num_rows_in_half_grid=num_rows_in_half_grid,
                num_columns_in_half_grid=num_columns_in_half_grid,
                narr_mask_matrix=narr_mask_matrix),
            steps_per_epoch=num_training_batches_per_epoch,
            epochs=num_epochs,
            verbose=1,
            class_weight=class_weight_dict,
            callbacks=[checkpoint_object])

    else:
        error_checking.assert_is_integer(num_validation_batches_per_epoch)
        error_checking.assert_is_geq(num_validation_batches_per_epoch, 1)

        checkpoint_object = ModelCheckpoint(output_file_name,
                                            monitor='val_loss',
                                            verbose=1,
                                            save_best_only=True,
                                            save_weights_only=False,
                                            mode='min',
                                            period=1)

        model_object.fit_generator(
            generator=trainval_io.downsized_3d_example_generator(
                num_examples_per_batch=num_examples_per_batch,
                num_examples_per_target_time=num_examples_per_target_time,
                first_target_time_unix_sec=training_start_time_unix_sec,
                last_target_time_unix_sec=training_end_time_unix_sec,
                top_narr_directory_name=top_narr_directory_name,
                top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                narr_predictor_names=narr_predictor_names,
                pressure_level_mb=pressure_level_mb,
                dilation_distance_metres=dilation_distance_metres,
                class_fractions=class_fractions,
                num_rows_in_half_grid=num_rows_in_half_grid,
                num_columns_in_half_grid=num_columns_in_half_grid,
                narr_mask_matrix=narr_mask_matrix),
            steps_per_epoch=num_training_batches_per_epoch,
            epochs=num_epochs,
            verbose=1,
            class_weight=class_weight_dict,
            callbacks=[checkpoint_object],
            validation_data=trainval_io.downsized_3d_example_generator(
                num_examples_per_batch=num_examples_per_batch,
                num_examples_per_target_time=num_examples_per_target_time,
                first_target_time_unix_sec=validation_start_time_unix_sec,
                last_target_time_unix_sec=validation_end_time_unix_sec,
                top_narr_directory_name=top_narr_directory_name,
                top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                narr_predictor_names=narr_predictor_names,
                pressure_level_mb=pressure_level_mb,
                dilation_distance_metres=dilation_distance_metres,
                class_fractions=class_fractions,
                num_rows_in_half_grid=num_rows_in_half_grid,
                num_columns_in_half_grid=num_columns_in_half_grid,
                narr_mask_matrix=narr_mask_matrix),
            validation_steps=num_validation_batches_per_epoch)
Beispiel #12
0
def find_ungridded_file(directory_name,
                        raise_error_if_missing=True,
                        months_in_subset=None,
                        hours_in_subset=None,
                        grid_row=None,
                        grid_column=None):
    """Finds file with ungridded predictions.

    If file is a temporal subset, `months_in_subset` or `hours_in_subset` must
    be specified.

    If file is a spatial subset, `grid_row` and `grid_column` must be specified.

    :param directory_name: Directory name.
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        `raise_error_if_missing = True`, this method will error out.
    :param months_in_subset: 1-D numpy array of months in subset (range 1...12).
    :param hours_in_subset: 1-D numpy array of hours in subset (range 0...23).
    :param grid_row: Grid row in subset (integer).
    :param grid_column: Grid column in subset (integer).
    :return: prediction_file_name: Path to prediction file.  If file is missing
        and `raise_error_if_missing = False`, this will be the expected path.
    :raises: ValueError: if file is missing and `raise_error_if_missing = True`.
    """

    is_temporal_subset = False

    if months_in_subset is not None:
        is_temporal_subset = True
        hours_in_subset = None
        grid_row = None
        grid_column = None

        error_checking.assert_is_integer_numpy_array(months_in_subset)
        error_checking.assert_is_numpy_array(months_in_subset,
                                             num_dimensions=1)
        error_checking.assert_is_geq_numpy_array(months_in_subset, 1)
        error_checking.assert_is_leq_numpy_array(months_in_subset, 12)

    if hours_in_subset is not None:
        is_temporal_subset = True
        grid_row = None
        grid_column = None

        error_checking.assert_is_integer_numpy_array(hours_in_subset)
        error_checking.assert_is_numpy_array(hours_in_subset, num_dimensions=1)
        error_checking.assert_is_geq_numpy_array(hours_in_subset, 0)
        error_checking.assert_is_leq_numpy_array(hours_in_subset, 23)

    is_spatial_subset = (not is_temporal_subset and grid_row is not None
                         and grid_column is not None)

    if is_spatial_subset:
        error_checking.assert_is_integer(grid_row)
        error_checking.assert_is_geq(grid_row, 0)
        error_checking.assert_is_integer(grid_column)
        error_checking.assert_is_geq(grid_column, 0)

    prediction_file_name = '{0:s}/{1:s}'.format(directory_name,
                                                UNGRIDDED_FILE_NAME_PREFIX)

    if months_in_subset is not None:
        month_array_string = '-'.join(
            ['{0:02d}'.format(m) for m in months_in_subset])
        prediction_file_name += '_months={0:s}'.format(month_array_string)

    if hours_in_subset is not None:
        hour_array_string = '-'.join(
            ['{0:02d}'.format(h) for h in hours_in_subset])
        prediction_file_name += '_hours={0:s}'.format(hour_array_string)

    if is_spatial_subset:
        prediction_file_name += '_grid-row={0:04d}_grid-column={1:04d}'.format(
            grid_row, grid_column)

    prediction_file_name += '.nc'

    if raise_error_if_missing and not os.path.isfile(prediction_file_name):
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            prediction_file_name)
        raise ValueError(error_string)

    return prediction_file_name
def _run(input_file_name, plot_significance, diff_colour_map_name,
         max_colour_percentile, top_output_dir_name):
    """Plots results of backwards optimization.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param plot_significance: Same.
    :param diff_colour_map_name: Same.
    :param max_colour_percentile: Same.
    :param top_output_dir_name: Same.
    """

    before_optimization_dir_name = '{0:s}/before_optimization'.format(
        top_output_dir_name)
    after_optimization_dir_name = '{0:s}/after_optimization'.format(
        top_output_dir_name)
    difference_dir_name = '{0:s}/difference'.format(top_output_dir_name)

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=before_optimization_dir_name)
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=after_optimization_dir_name)
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=difference_dir_name)

    error_checking.assert_is_geq(max_colour_percentile, 0.)
    error_checking.assert_is_leq(max_colour_percentile, 100.)
    diff_colour_map_object = pyplot.cm.get_cmap(diff_colour_map_name)

    print('Reading data from: "{0:s}"...'.format(input_file_name))

    try:
        backwards_opt_dict = backwards_opt.read_standard_file(input_file_name)
        list_of_optimized_matrices = backwards_opt_dict[
            backwards_opt.OPTIMIZED_MATRICES_KEY]
        list_of_input_matrices = backwards_opt_dict[
            backwards_opt.INIT_FUNCTION_KEY]

        full_storm_id_strings = backwards_opt_dict[backwards_opt.FULL_IDS_KEY]
        storm_times_unix_sec = backwards_opt_dict[
            backwards_opt.STORM_TIMES_KEY]

        storm_time_strings = [
            time_conversion.unix_sec_to_string(t,
                                               plot_input_examples.TIME_FORMAT)
            for t in storm_times_unix_sec
        ]

    except ValueError:
        backwards_opt_dict = backwards_opt.read_pmm_file(input_file_name)
        list_of_input_matrices = backwards_opt_dict[
            backwards_opt.MEAN_INPUT_MATRICES_KEY]
        list_of_optimized_matrices = backwards_opt_dict[
            backwards_opt.MEAN_OPTIMIZED_MATRICES_KEY]

        for i in range(len(list_of_input_matrices)):
            list_of_input_matrices[i] = numpy.expand_dims(
                list_of_input_matrices[i], axis=0)
            list_of_optimized_matrices[i] = numpy.expand_dims(
                list_of_optimized_matrices[i], axis=0)

        full_storm_id_strings = [None]
        storm_times_unix_sec = [None]
        storm_time_strings = [None]

    pmm_flag = (full_storm_id_strings[0] is None
                and storm_time_strings[0] is None)

    model_file_name = backwards_opt_dict[backwards_opt.MODEL_FILE_KEY]
    model_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0])

    print('Reading metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)
    print(SEPARATOR_STRING)

    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]
    include_soundings = (training_option_dict[trainval_io.SOUNDING_FIELDS_KEY]
                         is not None)

    if include_soundings:
        _plot_bwo_for_soundings(
            input_sounding_matrix=list_of_input_matrices[-1],
            optimized_sounding_matrix=list_of_optimized_matrices[-1],
            training_option_dict=training_option_dict,
            pmm_flag=pmm_flag,
            backwards_opt_dict=backwards_opt_dict,
            top_output_dir_name=top_output_dir_name)

        print(SEPARATOR_STRING)

    # TODO(thunderhoser): Make sure to not plot soundings here.
    plot_input_examples.plot_examples(
        list_of_predictor_matrices=list_of_input_matrices,
        model_metadata_dict=model_metadata_dict,
        output_dir_name=before_optimization_dir_name,
        allow_whitespace=True,
        pmm_flag=pmm_flag,
        full_storm_id_strings=full_storm_id_strings,
        storm_times_unix_sec=storm_times_unix_sec)
    print(SEPARATOR_STRING)

    plot_input_examples.plot_examples(
        list_of_predictor_matrices=list_of_optimized_matrices,
        model_metadata_dict=model_metadata_dict,
        output_dir_name=after_optimization_dir_name,
        allow_whitespace=True,
        pmm_flag=pmm_flag,
        full_storm_id_strings=full_storm_id_strings,
        storm_times_unix_sec=storm_times_unix_sec)
    print(SEPARATOR_STRING)

    monte_carlo_dict = (
        backwards_opt_dict[backwards_opt.MONTE_CARLO_DICT_KEY]
        if plot_significance
        and backwards_opt.MONTE_CARLO_DICT_KEY in backwards_opt_dict else None)

    num_examples = list_of_optimized_matrices[0].shape[0]
    num_radar_matrices = (len(list_of_optimized_matrices) -
                          int(include_soundings))

    for i in range(num_examples):
        # TODO(thunderhoser): Make BWO file always store initial matrices, even
        # if they are created by a function.

        for j in range(num_radar_matrices):
            if monte_carlo_dict is None:
                this_significance_matrix = None
            else:
                this_significance_matrix = numpy.logical_or(
                    monte_carlo_dict[monte_carlo.TRIAL_PMM_MATRICES_KEY][j][
                        i, ...] <
                    monte_carlo_dict[monte_carlo.MIN_MATRICES_KEY][j][i, ...],
                    monte_carlo_dict[monte_carlo.TRIAL_PMM_MATRICES_KEY][j][
                        i, ...] >
                    monte_carlo_dict[monte_carlo.MAX_MATRICES_KEY][j][i, ...])

            this_difference_matrix = (list_of_optimized_matrices[j][i, ...] -
                                      list_of_input_matrices[j][i, ...])

            this_num_spatial_dim = len(list_of_input_matrices[j].shape) - 2

            if this_num_spatial_dim == 3:
                _plot_3d_radar_difference(
                    difference_matrix=this_difference_matrix,
                    colour_map_object=diff_colour_map_object,
                    max_colour_percentile=max_colour_percentile,
                    model_metadata_dict=model_metadata_dict,
                    backwards_opt_dict=backwards_opt_dict,
                    output_dir_name=difference_dir_name,
                    example_index=i,
                    significance_matrix=this_significance_matrix)
            else:
                _plot_2d_radar_difference(
                    difference_matrix=this_difference_matrix,
                    colour_map_object=diff_colour_map_object,
                    max_colour_percentile=max_colour_percentile,
                    model_metadata_dict=model_metadata_dict,
                    backwards_opt_dict=backwards_opt_dict,
                    output_dir_name=difference_dir_name,
                    example_index=i,
                    significance_matrix=this_significance_matrix)
Beispiel #14
0
def plot_attributes_diagram(figure_object,
                            axes_object,
                            mean_predictions,
                            mean_observations,
                            mean_value_in_training,
                            min_value_to_plot,
                            max_value_to_plot,
                            line_colour=RELIABILITY_LINE_COLOUR,
                            line_style='solid',
                            line_width=RELIABILITY_LINE_WIDTH,
                            example_counts=None,
                            inv_mean_observations=None,
                            inv_example_counts=None):
    """Plots attributes diagram.

    If `example_counts is None`, will not plot histogram of predicted values.

    If `inv_mean_observations is None` and `inv_example_counts is None`, will
    not plot histogram of observed values.

    B = number of bins

    :param figure_object: Will plot on this figure (instance of
        `matplotlib.figure.Figure`).
    :param axes_object: Will plot on these axes (instance of
        `matplotlib.axes._subplots.AxesSubplot`).
    :param mean_predictions: length-B numpy array of mean predicted values.
    :param mean_observations: length-B numpy array of mean observed values.
    :param mean_value_in_training: Mean of target variable in training data.
    :param min_value_to_plot: Minimum value in plot (for both x- and y-axes).
    :param max_value_to_plot: Max value in plot (for both x- and y-axes).
        If None, will be determined automatically.
    :param line_colour: See doc for `_plot_reliability_curve`.
    :param line_width: Same.
    :param line_style: Same.
    :param example_counts: length-B numpy array with number of examples in each
        bin.
    :param inv_mean_observations: length-B numpy array of mean observed values
        for inverted reliability curve.
    :param inv_example_counts: length-B numpy array of example counts for
        inverted reliability curve.
    :return: main_line_handle: See doc for `_plot_reliability_curve`.
    """

    # Check input args.
    error_checking.assert_is_numpy_array(mean_predictions, num_dimensions=1)

    num_bins = len(mean_predictions)
    expected_dim = numpy.array([num_bins], dtype=int)
    error_checking.assert_is_numpy_array(mean_observations,
                                         exact_dimensions=expected_dim)

    plot_prediction_histogram = example_counts is not None

    if plot_prediction_histogram:
        error_checking.assert_is_integer_numpy_array(example_counts)
        error_checking.assert_is_geq_numpy_array(example_counts, 0)
        error_checking.assert_is_numpy_array(example_counts,
                                             exact_dimensions=expected_dim)

    error_checking.assert_is_not_nan(mean_value_in_training)
    error_checking.assert_is_geq(max_value_to_plot, min_value_to_plot)
    if max_value_to_plot == min_value_to_plot:
        max_value_to_plot = min_value_to_plot + 1.

    plot_obs_histogram = not (inv_mean_observations is None
                              and inv_example_counts is None)

    if plot_obs_histogram:
        error_checking.assert_is_numpy_array(inv_mean_observations,
                                             exact_dimensions=expected_dim)

        error_checking.assert_is_integer_numpy_array(inv_example_counts)
        error_checking.assert_is_geq_numpy_array(inv_example_counts, 0)
        error_checking.assert_is_numpy_array(inv_example_counts,
                                             exact_dimensions=expected_dim)

    _plot_attr_diagram_background(
        axes_object=axes_object,
        mean_value_in_training=mean_value_in_training,
        min_value_in_plot=min_value_to_plot,
        max_value_in_plot=max_value_to_plot)

    if plot_prediction_histogram:
        plot_inset_histogram(figure_object=figure_object,
                             bin_centers=mean_predictions,
                             bin_counts=example_counts,
                             has_predictions=True,
                             bar_colour=line_colour)

    if plot_obs_histogram:
        plot_inset_histogram(figure_object=figure_object,
                             bin_centers=inv_mean_observations,
                             bin_counts=inv_example_counts,
                             has_predictions=False,
                             bar_colour=line_colour)

    return _plot_reliability_curve(axes_object=axes_object,
                                   mean_predictions=mean_predictions,
                                   mean_observations=mean_observations,
                                   min_value_to_plot=min_value_to_plot,
                                   max_value_to_plot=max_value_to_plot,
                                   line_colour=line_colour,
                                   line_style=line_style,
                                   line_width=line_width)
Beispiel #15
0
def train_logistic_regression(
        training_table, feature_names, target_name, replace_missing,
        standardize, transform_via_svd, fit_intercept,
        replacement_method=feature_trans.MEAN_VALUE_REPLACEMENT_METHOD,
        fraction_of_explained_variance_for_svd=
        DEFAULT_EXP_VARIANCE_FRACTION_FOR_SVD,
        convergence_tolerance=DEFAULT_CONVERGENCE_TOL_FOR_LOGISTIC,
        penalty_multiplier=DEFAULT_PENALTY_MULT_FOR_LOGISTIC,
        l1_weight=DEFAULT_L1_WEIGHT,
        max_num_epochs=DEFAULT_MAX_NUM_EPOCHS_FOR_LOGISTIC):
    """Trains logistic-regression model for binary classification.

    :param training_table: See documentation for _check_training_data.
    :param feature_names: See doc for _check_training_data.
    :param target_name: See doc for _check_training_data.
    :param replace_missing: See documentation for _preprocess_data_for_learning.
    :param standardize: See doc for _preprocess_data_for_learning.
    :param transform_via_svd: See doc for _preprocess_data_for_learning.
    :param fit_intercept: Boolean flag.  If True, will fit the intercept (bias)
        coefficient.  If False, will assume intercept = 0.
    :param replacement_method: See doc for _preprocess_data_for_learning.
    :param fraction_of_explained_variance_for_svd: See doc for
        _preprocess_data_for_learning.
    :param convergence_tolerance: Stopping criterion.  Training will stop when
        `loss > previous_loss - convergence_tolerance`.
    :param penalty_multiplier: Coefficient used to multiply L1 and L2 penalties
        in loss function.
    :param l1_weight: Weight for L1 regularization penalty.  L2 weight will be
        `1 - l1_weight`.
    :param max_num_epochs: Max number of training epochs (passes over training
        data).
    :return: model_object: Trained model (instance of
        `sklearn.linear_model.SGDClassifier`).
    :return: replacement_dict: See doc for _preprocess_data_for_learning.
    :return: standardization_dict: See doc for _preprocess_data_for_learning.
    :return: svd_dictionary: See doc for _preprocess_data_for_learning.
    """

    _check_input_data_for_learning(
        input_table=training_table, feature_names=feature_names,
        target_name=target_name)

    (preprocessed_training_table, preprocessed_feature_names, replacement_dict,
     standardization_dict, svd_dictionary) = _preprocess_data_for_learning(
         input_table=training_table, feature_names=feature_names,
         learning_phase=TRAINING_PHASE, replace_missing=replace_missing,
         standardize=standardize, transform_via_svd=transform_via_svd,
         replacement_method=replacement_method,
         fraction_of_explained_variance_for_svd=
         fraction_of_explained_variance_for_svd)

    error_checking.assert_is_boolean(fit_intercept)
    error_checking.assert_is_greater(convergence_tolerance, 0.)
    error_checking.assert_is_greater(penalty_multiplier, 0.)
    error_checking.assert_is_geq(l1_weight, 0.)
    error_checking.assert_is_leq(l1_weight, 1.)
    error_checking.assert_is_integer(max_num_epochs)
    error_checking.assert_is_greater(max_num_epochs, 0)

    model_object = sklearn.linear_model.SGDClassifier(
        loss='log', penalty='elasticnet', alpha=penalty_multiplier,
        l1_ratio=l1_weight, fit_intercept=fit_intercept, verbose=1,
        n_iter=max_num_epochs)

    # model_object = sklearn.linear_model.SGDClassifier(
    #     loss='log', penalty='elasticnet', alpha=penalty_multiplier,
    #     l1_ratio=l1_weight, fit_intercept=fit_intercept,
    #     max_iter=max_num_epochs, tol=convergence_tolerance, verbose=1)

    model_object.fit(
        preprocessed_training_table.as_matrix(
            columns=preprocessed_feature_names),
        preprocessed_training_table[target_name].values)
    return model_object, replacement_dict, standardization_dict, svd_dictionary
Beispiel #16
0
def get_latlng_grid_points_in_radius(test_latitude_deg,
                                     test_longitude_deg,
                                     effective_radius_metres,
                                     grid_point_latitudes_deg=None,
                                     grid_point_longitudes_deg=None,
                                     grid_point_dict=None):
    """Finds lat-long grid points within radius of test point.

    One of the following sets of input args must be specified:

    - grid_point_latitudes_deg and grid_point_longitudes_deg
    - grid_point_dict

    M = number of rows (unique grid-point latitudes)
    N = number of columns (unique grid-point longitudes)
    K = number of grid points within radius of test point

    :param test_latitude_deg: Latitude (deg N) of test point.
    :param test_longitude_deg: Longitude (deg E) of test point.
    :param effective_radius_metres: Effective radius (will find all grid points
        within this radius of test point).
    :param grid_point_latitudes_deg: length-M numpy array with latitudes (deg N)
        of grid points.
    :param grid_point_longitudes_deg: length-N numpy array with longitudes
        (deg E) of grid points.
    :param grid_point_dict: Dictionary created by a previous run of this method
        (see output documentation).
    :return: rows_in_radius: length-K numpy array with row indices of grid
        points near test point.
    :return: columns_in_radius: Same but for columns.
    :return: grid_point_dict: Dictionary with the following keys.
    grid_point_dict['grid_point_x_matrix_metres']: M-by-N numpy array with
        x-coordinates of grid points.
    grid_point_dict['grid_point_y_matrix_metres']: M-by-N numpy array with
        y-coordinates of grid points.
    grid_point_dict['projection_object']: Instance of `pyproj.Proj`, which can
        be used to convert future test points from lat-long to x-y coordinates.
    """

    if grid_point_dict is None:
        (grid_point_lat_matrix_deg,
         grid_point_lng_matrix_deg) = latlng_vectors_to_matrices(
             unique_latitudes_deg=grid_point_latitudes_deg,
             unique_longitudes_deg=grid_point_longitudes_deg)

        projection_object = projections.init_azimuthal_equidistant_projection(
            central_latitude_deg=numpy.mean(grid_point_latitudes_deg),
            central_longitude_deg=numpy.mean(grid_point_longitudes_deg))

        (grid_point_x_matrix_metres,
         grid_point_y_matrix_metres) = projections.project_latlng_to_xy(
             latitudes_deg=grid_point_lat_matrix_deg,
             longitudes_deg=grid_point_lng_matrix_deg,
             projection_object=projection_object)

        grid_point_dict = {
            X_COORD_MATRIX_KEY: grid_point_x_matrix_metres,
            Y_COORD_MATRIX_KEY: grid_point_y_matrix_metres,
            PROJECTION_KEY: projection_object
        }

    error_checking.assert_is_valid_latitude(test_latitude_deg)
    error_checking.assert_is_geq(effective_radius_metres, 0.)
    test_longitude_deg = lng_conversion.convert_lng_positive_in_west(
        longitudes_deg=numpy.array([test_longitude_deg]), allow_nan=False)[0]

    (test_x_coords_metres,
     test_y_coords_metres) = projections.project_latlng_to_xy(
         latitudes_deg=numpy.array([test_latitude_deg]),
         longitudes_deg=numpy.array([test_longitude_deg]),
         projection_object=grid_point_dict[PROJECTION_KEY])
    test_x_coord_metres = test_x_coords_metres[0]
    test_y_coord_metres = test_y_coords_metres[0]

    valid_x_flags = numpy.absolute(
        grid_point_dict[X_COORD_MATRIX_KEY] -
        test_x_coord_metres) <= effective_radius_metres
    valid_y_flags = numpy.absolute(
        grid_point_dict[Y_COORD_MATRIX_KEY] -
        test_y_coord_metres) <= effective_radius_metres
    rows_to_try, columns_to_try = numpy.where(
        numpy.logical_and(valid_x_flags, valid_y_flags))

    distances_to_try_metres = numpy.sqrt(
        (grid_point_dict[X_COORD_MATRIX_KEY][rows_to_try, columns_to_try] -
         test_x_coord_metres)**2 +
        (grid_point_dict[Y_COORD_MATRIX_KEY][rows_to_try, columns_to_try] -
         test_y_coord_metres)**2)
    valid_indices = numpy.where(
        distances_to_try_metres <= effective_radius_metres)[0]

    return (rows_to_try[valid_indices], columns_to_try[valid_indices],
            grid_point_dict)
Beispiel #17
0
def train_neural_net(
        training_table, feature_names, target_name, replace_missing,
        standardize, transform_via_svd,
        replacement_method=feature_trans.MEAN_VALUE_REPLACEMENT_METHOD,
        fraction_of_explained_variance_for_svd=
        DEFAULT_EXP_VARIANCE_FRACTION_FOR_SVD,
        hidden_layer_sizes=DEFAULT_HIDDEN_LAYER_SIZES_FOR_NN,
        hidden_layer_activation_function=DEFAULT_ACTIVATION_FUNCTION_FOR_NN,
        solver=DEFAULT_SOLVER_FOR_NN, l2_weight=DEFAULT_L2_WEIGHT_FOR_NN,
        num_examples_per_batch=DEFAULT_BATCH_SIZE_FOR_NN,
        learning_rate=DEFAULT_LEARNING_RATE_FOR_NN,
        max_num_epochs=DEFAULT_MAX_NUM_EPOCHS_FOR_NN,
        convergence_tolerance=DEFAULT_CONVERGENCE_TOLERANCE_FOR_NN,
        allow_early_stopping=True,
        early_stopping_fraction=DEFAULT_EARLY_STOPPING_FRACTION_FOR_NN):
    """Trains a neural net for binary classification.

    H = number of hidden layers

    :param training_table: See documentation for _check_training_data.
    :param feature_names: See doc for _check_training_data.
    :param target_name: See doc for _check_training_data.
    :param replace_missing: See documentation for _preprocess_data_for_learning.
    :param standardize: See doc for _preprocess_data_for_learning.
    :param transform_via_svd: See doc for _preprocess_data_for_learning.
    :param replacement_method: See doc for _preprocess_data_for_learning.
    :param fraction_of_explained_variance_for_svd: See doc for
        _preprocess_data_for_learning.
    :param hidden_layer_sizes: length-H numpy array, where the [i]th element is
        the number of nodes in the [i]th hidden layer.
    :param hidden_layer_activation_function: Activation function for hidden
        layers.  See `sklearn.neural_network.MLPClassifier` documentation for
        valid options.
    :param solver:  Solver.  Valid options are "sgd" and "adam".
    :param l2_weight: Weight for L2 penalty.
    :param num_examples_per_batch: Number of examples per training batch.
    :param learning_rate: Learning rate.
    :param max_num_epochs: Max number of training epochs (passes over training
        data).
    :param convergence_tolerance: Stopping criterion.  Training will stop when
        loss has improved by < `convergence_tolerance` for each of two
        consecutive epochs.
    :param allow_early_stopping: Boolean flag.  If True, some training data will
        be set aside as "validation data" to check for early stopping.  In this
        case, training will stop when loss has improved by <
        `convergence_tolerance` for each of two consecutive epochs.
    :param early_stopping_fraction: Fraction of training examples to use when
        checking early-stopping criterion.
    :return: model_object: Trained model (instance of
        `sklearn.neural_network.MLPClassifier`).
    :return: replacement_dict: See doc for _preprocess_data_for_learning.
    :return: standardization_dict: See doc for _preprocess_data_for_learning.
    :return: svd_dictionary: See doc for _preprocess_data_for_learning.
    :raises: ValueError: if `solver not in VALID_SOLVERS_FOR_NN`.
    """

    _check_input_data_for_learning(
        input_table=training_table, feature_names=feature_names,
        target_name=target_name)

    (preprocessed_training_table, preprocessed_feature_names, replacement_dict,
     standardization_dict, svd_dictionary) = _preprocess_data_for_learning(
         input_table=training_table, feature_names=feature_names,
         learning_phase=TRAINING_PHASE, replace_missing=replace_missing,
         standardize=standardize, transform_via_svd=transform_via_svd,
         replacement_method=replacement_method,
         fraction_of_explained_variance_for_svd=
         fraction_of_explained_variance_for_svd)

    error_checking.assert_is_integer_numpy_array(hidden_layer_sizes)
    error_checking.assert_is_numpy_array(hidden_layer_sizes, num_dimensions=1)
    error_checking.assert_is_greater_numpy_array(hidden_layer_sizes, 0)

    error_checking.assert_is_string(solver)
    if solver not in VALID_SOLVERS_FOR_NN:
        error_string = (
            '\n\n{0:s}\n\nValid solvers (listed above) do not include "{1:s}".'
        ).format(str(VALID_SOLVERS_FOR_NN), solver)

        raise ValueError(error_string)

    error_checking.assert_is_integer(num_examples_per_batch)
    error_checking.assert_is_geq(num_examples_per_batch, 2)
    error_checking.assert_is_greater(learning_rate, 0.)
    error_checking.assert_is_leq(learning_rate, 1.)
    error_checking.assert_is_integer(max_num_epochs)
    error_checking.assert_is_greater(max_num_epochs, 0)
    error_checking.assert_is_greater(convergence_tolerance, 0.)
    error_checking.assert_is_boolean(allow_early_stopping)

    if allow_early_stopping:
        error_checking.assert_is_greater(early_stopping_fraction, 0.)
        error_checking.assert_is_less_than(early_stopping_fraction, 0.5)

    model_object = sklearn.neural_network.MLPClassifier(
        hidden_layer_sizes=hidden_layer_sizes,
        activation=hidden_layer_activation_function, solver=solver,
        alpha=l2_weight, batch_size=num_examples_per_batch,
        learning_rate_init=learning_rate, max_iter=max_num_epochs,
        tol=convergence_tolerance, verbose=3,
        early_stopping=allow_early_stopping,
        validation_fraction=early_stopping_fraction)

    model_object.fit(
        preprocessed_training_table.as_matrix(
            columns=preprocessed_feature_names),
        preprocessed_training_table[target_name].values)
    return model_object, replacement_dict, standardization_dict, svd_dictionary
Beispiel #18
0
def find_events_in_grid_cell(event_x_coords_metres, event_y_coords_metres,
                             grid_edge_x_coords_metres,
                             grid_edge_y_coords_metres, row_index,
                             column_index, verbose):
    """Finds events in a certain grid cell.

    E = number of events
    M = number of rows in grid
    N = number of columns in grid

    :param event_x_coords_metres: length-E numpy array of x-coordinates.
    :param event_y_coords_metres: length-E numpy array of y-coordinates.
    :param grid_edge_x_coords_metres: length-(N + 1) numpy array with
        x-coordinates at edges of grid cells.
    :param grid_edge_y_coords_metres: length-(M + 1) numpy array with
        y-coordinates at edges of grid cells.
    :param row_index: Will find events in [i]th row of grid, where
        i = `row_index.`
    :param column_index: Will find events in [j]th column of grid, where
        j = `column_index.`
    :param verbose: Boolean flag.  If True, messages will be printed to command
        window.
    :return: desired_indices: 1-D numpy array with indices of events in desired
        grid cell.
    """

    error_checking.assert_is_numpy_array_without_nan(event_x_coords_metres)
    error_checking.assert_is_numpy_array(event_x_coords_metres,
                                         num_dimensions=1)

    num_events = len(event_x_coords_metres)
    these_expected_dim = numpy.array([num_events], dtype=int)

    error_checking.assert_is_numpy_array_without_nan(event_y_coords_metres)
    error_checking.assert_is_numpy_array(event_y_coords_metres,
                                         exact_dimensions=these_expected_dim)

    error_checking.assert_is_numpy_array(grid_edge_x_coords_metres,
                                         num_dimensions=1)
    error_checking.assert_is_greater_numpy_array(
        numpy.diff(grid_edge_x_coords_metres), 0)

    error_checking.assert_is_numpy_array(grid_edge_y_coords_metres,
                                         num_dimensions=1)
    error_checking.assert_is_greater_numpy_array(
        numpy.diff(grid_edge_y_coords_metres), 0)

    error_checking.assert_is_integer(row_index)
    error_checking.assert_is_geq(row_index, 0)
    error_checking.assert_is_integer(column_index)
    error_checking.assert_is_geq(column_index, 0)
    error_checking.assert_is_boolean(verbose)

    x_min_metres = grid_edge_x_coords_metres[column_index]
    x_max_metres = grid_edge_x_coords_metres[column_index + 1]
    y_min_metres = grid_edge_y_coords_metres[row_index]
    y_max_metres = grid_edge_y_coords_metres[row_index + 1]

    if row_index == len(grid_edge_y_coords_metres) - 2:
        y_max_metres += TOLERANCE
    if column_index == len(grid_edge_x_coords_metres) - 2:
        x_max_metres += TOLERANCE

    # TODO(thunderhoser): If need be, I could speed this up by computing
    # `row_flags` only once per row and `column_flags` only once per column.
    row_flags = numpy.logical_and(event_y_coords_metres >= y_min_metres,
                                  event_y_coords_metres < y_max_metres)

    if not numpy.any(row_flags):
        if verbose:
            print('0 of {0:d} events are in grid cell ({1:d}, {2:d})!'.format(
                num_events, row_index, column_index))

        return numpy.array([], dtype=int)

    column_flags = numpy.logical_and(event_x_coords_metres >= x_min_metres,
                                     event_x_coords_metres < x_max_metres)

    if not numpy.any(column_flags):
        if verbose:
            print('0 of {0:d} events are in grid cell ({1:d}, {2:d})!'.format(
                num_events, row_index, column_index))

        return numpy.array([], dtype=int)

    desired_indices = numpy.where(numpy.logical_and(row_flags,
                                                    column_flags))[0]

    if verbose:
        print('{0:d} of {1:d} events are in grid cell ({2:d}, {3:d})!'.format(
            len(desired_indices), num_events, row_index, column_index))

    return desired_indices
def permute_one_predictor(
        predictor_matrices, separate_heights, matrix_index, predictor_index,
        permuted_values=None):
    """Permutes values of one predictor.

    Specifically, will permute values of the [j]th predictor in the [i]th
    matrix, where i = `matrix_index` and j = `predictor_index`.

    T = number of input tensors to the model
    E = number of examples

    :param predictor_matrices: length-T list of numpy arrays, where the first
        axis of each has length E.
    :param separate_heights: Boolean flag.  If True, for arrays with 3 spatial
        dimensions, each predictor/height pair will be shuffled independently.
        If False, for arrays with 3 spatial dimensions, each predictor will be
        shuffled independently.
    :param matrix_index: See discussion above.
    :param predictor_index: See discussion above.
    :param permuted_values: numpy array of permuted values with which to replace
        clean values.  If None, permuted values will be created randomly on the
        fly.
    :return: predictor_matrices: Same as input but after permutation.
    :return: permuted_values: numpy array of permuted values with which clean
        values were replaced.
    """

    # Check input args.
    error_checking.assert_is_list(predictor_matrices)
    for this_matrix in predictor_matrices:
        error_checking.assert_is_numpy_array_without_nan(this_matrix)

    error_checking.assert_is_boolean(separate_heights)
    error_checking.assert_is_integer(matrix_index)
    error_checking.assert_is_geq(matrix_index, 0)
    error_checking.assert_is_integer(predictor_index)
    error_checking.assert_is_geq(predictor_index, 0)

    if permuted_values is not None:
        error_checking.assert_is_numpy_array_without_nan(permuted_values)

    # Do dirty work.
    i = matrix_index
    j = predictor_index
    num_spatial_dim = len(predictor_matrices[i].shape) - 2

    if num_spatial_dim == 3 and separate_heights:
        predictor_matrices[i], original_shape = flatten_last_two_dim(
            predictor_matrices[i]
        )
    else:
        original_shape = None

    if permuted_values is None:
        random_indices = numpy.random.permutation(
            predictor_matrices[i].shape[0]
        )
        predictor_matrices[i][..., j] = (
            predictor_matrices[i][random_indices, ..., j]
        )

        # predictor_matrices[i][..., j] = numpy.take(
        #     predictor_matrices[i][..., j],
        #     indices=numpy.random.permutation(predictor_matrices[i].shape[0]),
        #     axis=0
        # )
    else:
        predictor_matrices[i][..., j] = permuted_values

    permuted_values = predictor_matrices[i][..., j]

    if original_shape is not None:
        predictor_matrices[i] = numpy.reshape(
            predictor_matrices[i], original_shape, order='F'
        )

    return predictor_matrices, permuted_values
def _run(gradcam_file_names, composite_names, colour_map_name,
         max_colour_value, num_contours, smoothing_radius_grid_cells,
         output_dir_name):
    """Makes figure with class-activation maps for MYRORSS model.

    This is effectively the main method.

    :param gradcam_file_names: See documentation at top of file.
    :param composite_names: Same.
    :param colour_map_name: Same.
    :param max_colour_value: Same.
    :param num_contours: Same.
    :param smoothing_radius_grid_cells: Same.
    :param output_dir_name: Same.
    """

    # Process input args.
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    if smoothing_radius_grid_cells <= 0:
        smoothing_radius_grid_cells = None

    colour_map_object = pyplot.cm.get_cmap(colour_map_name)
    error_checking.assert_is_geq(num_contours, 10)
    error_checking.assert_is_greater(max_colour_value,
                                     10**MIN_COLOUR_VALUE_LOG10)

    num_composites = len(gradcam_file_names)
    expected_dim = numpy.array([num_composites], dtype=int)
    error_checking.assert_is_numpy_array(numpy.array(composite_names),
                                         exact_dimensions=expected_dim)

    composite_names_abbrev = [
        n.replace('_', '-').lower() for n in composite_names
    ]
    composite_names_verbose = [
        '({0:s}) {1:s}'.format(chr(ord('a') + i),
                               composite_names[i].replace('_', ' '))
        for i in range(num_composites)
    ]

    panel_file_names = [None] * num_composites

    for i in range(num_composites):
        panel_file_names[i] = _plot_one_composite(
            gradcam_file_name=gradcam_file_names[i],
            composite_name_abbrev=composite_names_abbrev[i],
            composite_name_verbose=composite_names_verbose[i],
            colour_map_object=colour_map_object,
            max_colour_value=max_colour_value,
            num_contours=num_contours,
            smoothing_radius_grid_cells=smoothing_radius_grid_cells,
            output_dir_name=output_dir_name)

        print('\n')

    figure_file_name = '{0:s}/gradcam_concat.jpg'.format(output_dir_name)
    print('Concatenating panels to: "{0:s}"...'.format(figure_file_name))

    num_panel_rows = int(numpy.floor(numpy.sqrt(num_composites)))
    num_panel_columns = int(numpy.ceil(float(num_composites) / num_panel_rows))

    imagemagick_utils.concatenate_images(input_file_names=panel_file_names,
                                         output_file_name=figure_file_name,
                                         border_width_pixels=100,
                                         num_panel_rows=num_panel_rows,
                                         num_panel_columns=num_panel_columns)
    imagemagick_utils.trim_whitespace(input_file_name=figure_file_name,
                                      output_file_name=figure_file_name,
                                      border_width_pixels=10)

    _add_colour_bar(figure_file_name=figure_file_name,
                    colour_map_object=colour_map_object,
                    max_colour_value=max_colour_value,
                    temporary_dir_name=output_dir_name)
def create_3d_net(
        num_input_features, first_spatial_dimensions, rowcol_upsampling_factors,
        height_upsampling_factors, num_output_channels,
        l1_weight=DEFAULT_L1_WEIGHT, l2_weight=DEFAULT_L2_WEIGHT,
        use_transposed_conv=True, activation_function_name=None,
        alpha_for_elu=DEFAULT_ALPHA_FOR_ELU,
        alpha_for_relu=DEFAULT_ALPHA_FOR_RELU,
        use_activn_for_last_layer=False,
        use_batch_norm=True, use_batch_norm_for_last_layer=True):
    """Creates (but does not train) upconvnet with 3 spatial dimensions.

    L = number of main (transposed-conv or upsampling) layers

    :param num_input_features: Length of input feature vector.
    :param first_spatial_dimensions: length-3 numpy array of dimensions in first
        main layer.  The order should be (num_rows, num_columns, num_heights).
        Before it is passed to the first main layer, the feature vector will be
        reshaped into a grid with these dimensions.
    :param rowcol_upsampling_factors: length-L numpy array of upsampling factors
        for horizontal dimensions.
    :param height_upsampling_factors: length-L numpy array of upsampling factors
        for vertical dimension.
    :param num_output_channels: Number of channels in output image.
    :param l1_weight: Weight of L1 regularization for conv and transposed-conv
        layers.
    :param l2_weight: Same but for L2 regularization.
    :param use_transposed_conv: Boolean flag.  If True, each upsampling will be
        done with a transposed-conv layer.  If False, each upsampling will be
        done with an upsampling layer followed by a normal conv layer.
    :param activation_function_name: Activation function.  If you do not want
        activation, make this None.  Otherwise, must be accepted by
        `architecture_utils.check_activation_function`.
    :param alpha_for_elu: See doc for
        `architecture_utils.check_activation_function`.
    :param alpha_for_relu: Same.
    :param use_activn_for_last_layer: Boolean flag.  If True, will apply
        activation function to output image.
    :param use_batch_norm: Boolean flag.  If True, will apply batch
        normalization to conv and transposed-conv layers.
    :param use_batch_norm_for_last_layer: Boolean flag.  If True, will apply
        batch normalization to output image.
    :return: model_object: Untrained model (instance of `keras.models.Model`).
    """

    # TODO(thunderhoser): This method assumes that the original CNN does
    # edge-padding.

    # Check input args.
    error_checking.assert_is_integer(num_input_features)
    error_checking.assert_is_greater(num_input_features, 0)
    error_checking.assert_is_integer(num_output_channels)
    error_checking.assert_is_greater(num_output_channels, 0)
    error_checking.assert_is_geq(l1_weight, 0.)
    error_checking.assert_is_geq(l2_weight, 0.)

    error_checking.assert_is_boolean(use_transposed_conv)
    error_checking.assert_is_boolean(use_activn_for_last_layer)
    error_checking.assert_is_boolean(use_batch_norm)
    error_checking.assert_is_boolean(use_batch_norm_for_last_layer)

    error_checking.assert_is_numpy_array(
        first_spatial_dimensions, exact_dimensions=numpy.array([3], dtype=int)
    )
    error_checking.assert_is_integer_numpy_array(first_spatial_dimensions)
    error_checking.assert_is_greater_numpy_array(first_spatial_dimensions, 0)

    error_checking.assert_is_numpy_array(
        rowcol_upsampling_factors, num_dimensions=1
    )
    error_checking.assert_is_integer_numpy_array(rowcol_upsampling_factors)
    error_checking.assert_is_geq_numpy_array(rowcol_upsampling_factors, 1)

    num_main_layers = len(rowcol_upsampling_factors)
    these_expected_dim = numpy.array([num_main_layers], dtype=int)

    error_checking.assert_is_numpy_array(
        height_upsampling_factors, exact_dimensions=these_expected_dim
    )
    error_checking.assert_is_integer_numpy_array(height_upsampling_factors)
    error_checking.assert_is_geq_numpy_array(height_upsampling_factors, 1)

    # Set up CNN architecture.
    regularizer_object = keras.regularizers.l1_l2(l1=l1_weight, l2=l2_weight)
    input_layer_object = keras.layers.Input(shape=(num_input_features,))

    current_num_filters = int(numpy.round(
        num_input_features / numpy.prod(first_spatial_dimensions)
    ))
    first_dimensions = numpy.concatenate((
        first_spatial_dimensions, numpy.array([current_num_filters], dtype=int)
    ))
    layer_object = keras.layers.Reshape(
        target_shape=first_dimensions
    )(input_layer_object)

    kernel_size_tuple = (CONV_FILTER_SIZE, CONV_FILTER_SIZE, CONV_FILTER_SIZE)

    for i in range(num_main_layers):
        if i == num_main_layers - 1:
            current_num_filters = num_output_channels + 0
        elif rowcol_upsampling_factors[i] == 1:
            current_num_filters = int(numpy.round(current_num_filters / 2))

        this_stride_tuple = (
            rowcol_upsampling_factors[i], rowcol_upsampling_factors[i],
            height_upsampling_factors[i]
        )

        if use_transposed_conv:
            layer_object = keras.layers.Conv3DTranspose(
                filters=current_num_filters, kernel_size=kernel_size_tuple,
                strides=this_stride_tuple, padding='same',
                data_format='channels_last', dilation_rate=(1, 1, 1),
                activation=None, use_bias=True,
                kernel_initializer='glorot_uniform', bias_initializer='zeros',
                kernel_regularizer=regularizer_object
            )(layer_object)
        else:
            if rowcol_upsampling_factors[i] > 1:
                try:
                    layer_object = keras.layers.UpSampling3D(
                        size=this_stride_tuple, data_format='channels_last',
                        interpolation='bilinear'
                    )(layer_object)
                except:
                    layer_object = keras.layers.UpSampling3D(
                        size=this_stride_tuple, data_format='channels_last'
                    )(layer_object)

            layer_object = keras.layers.Conv3D(
                filters=current_num_filters, kernel_size=kernel_size_tuple,
                strides=(1, 1, 1), padding='same', data_format='channels_last',
                dilation_rate=(1, 1, 1), activation=None, use_bias=True,
                kernel_initializer='glorot_uniform', bias_initializer='zeros',
                kernel_regularizer=regularizer_object
            )(layer_object)

        use_activation_here = (
            activation_function_name is not None and
            (i < num_main_layers - 1 or use_activn_for_last_layer)
        )

        if use_activation_here:
            layer_object = architecture_utils.get_activation_layer(
                activation_function_string=activation_function_name,
                alpha_for_elu=alpha_for_elu, alpha_for_relu=alpha_for_relu
            )(layer_object)

        use_batch_norm_here = (
            use_batch_norm and
            (i < num_main_layers - 1 or use_batch_norm_for_last_layer)
        )

        if use_batch_norm_here:
            layer_object = (
                architecture_utils.get_batch_norm_layer()(layer_object)
            )

    # Compile CNN.
    model_object = keras.models.Model(
        inputs=input_layer_object, outputs=layer_object)
    model_object.compile(
        loss=keras.losses.mean_squared_error, optimizer=keras.optimizers.Adam()
    )

    model_object.summary()
    return model_object
Beispiel #22
0
def rename_storms(top_input_dir_name, first_date_unix_sec, last_date_unix_sec,
                  first_id_number, max_dropout_time_seconds,
                  top_output_dir_name):
    """Renames storms.  This ensures that all storm IDs are unique.

    :param top_input_dir_name: Name of top-level directory with input files
        (processed probSevere files, readable by `storm_tracking_io.read_file`).
    :param first_date_unix_sec: First date in time period.  This method will fix
        IDs for all dates from `first_date_unix_sec`...`last_date_unix_sec`.
    :param last_date_unix_sec: See above.
    :param first_id_number: Will start with this ID.
    :param max_dropout_time_seconds: Max dropout time.  For each storm ID "s"
        found in the original data, this method will find all periods where "s"
        appears in consecutive time steps with no dropout longer than
        `max_dropout_time_seconds`.  Each such period will get a new, unique
        storm ID.
    :param top_output_dir_name: Name of top-level directory for output files
        (files with new IDs, to be written by `storm_tracking_io.write_file`).
    """

    error_checking.assert_is_integer(first_id_number)
    error_checking.assert_is_geq(first_id_number, 0)
    error_checking.assert_is_integer(max_dropout_time_seconds)
    error_checking.assert_is_greater(max_dropout_time_seconds, 0)

    (input_file_names_by_date, output_file_names_by_date,
     valid_times_by_date_unix_sec) = _find_io_files_for_renaming(
         top_input_dir_name=top_input_dir_name,
         first_date_unix_sec=first_date_unix_sec,
         last_date_unix_sec=last_date_unix_sec,
         top_output_dir_name=top_output_dir_name)

    num_dates = len(input_file_names_by_date)
    storm_object_table_by_date = [None] * num_dates
    next_id_number = first_id_number + 0

    for i in range(num_dates):
        date_needed_indices = _get_dates_needed_for_renaming_storms(
            working_date_index=i, num_dates_in_period=num_dates)

        storm_object_table_by_date = _shuffle_io_for_renaming(
            input_file_names_by_date=input_file_names_by_date,
            output_file_names_by_date=output_file_names_by_date,
            valid_times_by_date_unix_sec=valid_times_by_date_unix_sec,
            storm_object_table_by_date=storm_object_table_by_date,
            working_date_index=i)

        concat_storm_object_table = pandas.concat(
            [storm_object_table_by_date[j] for j in date_needed_indices],
            axis=0,
            ignore_index=True)

        concat_storm_object_table, next_id_number = _rename_storms_one_table(
            storm_object_table=concat_storm_object_table,
            next_id_number=next_id_number,
            max_dropout_time_seconds=max_dropout_time_seconds,
            working_date_index=i)

        for j in date_needed_indices:
            storm_object_table_by_date[j] = concat_storm_object_table.loc[
                concat_storm_object_table[DATE_INDEX_KEY] == j]

    _shuffle_io_for_renaming(
        input_file_names_by_date=input_file_names_by_date,
        output_file_names_by_date=output_file_names_by_date,
        valid_times_by_date_unix_sec=valid_times_by_date_unix_sec,
        storm_object_table_by_date=storm_object_table_by_date,
        working_date_index=None)
def write_predictions(
        pickle_file_name, denorm_recon_radar_matrices, full_storm_id_strings,
        storm_times_unix_sec, mse_by_example, upconvnet_file_name):
    """Writes predictions (reconstructed radar images) to Pickle file.

    E = number of examples

    :param pickle_file_name: Path to output file.
    :param denorm_recon_radar_matrices: 1-D list of denormalized, reconstructed
        radar images.  Each item must be a 4-D or 5-D numpy array where the
        first axis has length E.
    :param full_storm_id_strings: length-E list of storm IDs.
    :param storm_times_unix_sec: length-E numpy array of valid times.
    :param mse_by_example: length-E numpy array of mean squared errors (in
        normalized, not physical, units).
    :param upconvnet_file_name: Path to upconvnet that generated the
        reconstructed images (readable by `cnn.read_model`).
    """

    error_checking.assert_is_string_list(full_storm_id_strings)
    error_checking.assert_is_numpy_array(
        numpy.array(full_storm_id_strings), num_dimensions=1
    )
    num_examples = len(full_storm_id_strings)

    error_checking.assert_is_list(denorm_recon_radar_matrices)

    for this_matrix in denorm_recon_radar_matrices:
        error_checking.assert_is_numpy_array_without_nan(this_matrix)
        this_num_dimensions = len(this_matrix.shape)
        error_checking.assert_is_geq(this_num_dimensions, 4)
        error_checking.assert_is_leq(this_num_dimensions, 5)

        these_expected_dim = numpy.array(
            (num_examples,) + this_matrix.shape[1:], dtype=int
        )
        error_checking.assert_is_numpy_array(
            this_matrix, exact_dimensions=these_expected_dim)

    these_expected_dim = numpy.array([num_examples], dtype=int)

    error_checking.assert_is_integer_numpy_array(storm_times_unix_sec)
    error_checking.assert_is_numpy_array(
        storm_times_unix_sec, exact_dimensions=these_expected_dim)

    error_checking.assert_is_geq_numpy_array(mse_by_example, 0.)
    error_checking.assert_is_numpy_array(
        mse_by_example, exact_dimensions=these_expected_dim)

    error_checking.assert_is_string(upconvnet_file_name)

    prediction_dict = {
        RECON_IMAGE_MATRICES_KEY: denorm_recon_radar_matrices,
        MEAN_SQUARED_ERRORS_KEY: mse_by_example,
        STORM_TIMES_KEY: storm_times_unix_sec,
        FULL_STORM_IDS_KEY: full_storm_id_strings,
        UPCONVNET_FILE_KEY: upconvnet_file_name
    }

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(prediction_dict, pickle_file_handle)
    pickle_file_handle.close()
Beispiel #24
0
def _run(top_input_dir_name, main_colour_map_name, max_colour_percentile,
         output_dir_name):
    """Plots results of hyperparameter experiment with 3-D GridRad data.

    This is effectively the main method.

    :param top_input_dir_name: See documentation at top of file.
    :param main_colour_map_name: Same.
    :param max_colour_percentile: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    main_colour_map_object = pyplot.get_cmap(main_colour_map_name)
    error_checking.assert_is_geq(max_colour_percentile, 90.)
    error_checking.assert_is_leq(max_colour_percentile, 100.)

    num_dropout_rates = len(DROPOUT_RATES)
    num_l2_weights = len(L2_WEIGHTS)
    num_dense_layer_counts = len(DENSE_LAYER_COUNTS)
    num_data_aug_flags = len(DATA_AUGMENTATION_FLAGS)

    dimensions = (num_dropout_rates, num_l2_weights, num_dense_layer_counts,
                  num_data_aug_flags)

    auc_matrix = numpy.full(dimensions, numpy.nan)
    csi_matrix = numpy.full(dimensions, numpy.nan)
    pod_matrix = numpy.full(dimensions, numpy.nan)
    far_matrix = numpy.full(dimensions, numpy.nan)
    frequency_bias_matrix = numpy.full(dimensions, numpy.nan)

    for i in range(num_dropout_rates):
        for j in range(num_l2_weights):
            for k in range(num_dense_layer_counts):
                for m in range(num_data_aug_flags):
                    this_eval_file_name = (
                        '{0:s}/dropout={1:.3f}_l2={2:.6f}_'
                        'num-dense-layers={3:d}_data-aug={4:d}/validation/'
                        'model_evaluation.p').format(
                            top_input_dir_name, DROPOUT_RATES[i],
                            L2_WEIGHTS[j], DENSE_LAYER_COUNTS[k],
                            int(DATA_AUGMENTATION_FLAGS[m]))

                    if not os.path.isfile(this_eval_file_name):
                        warning_string = (
                            'Cannot find file (this may or may not be a '
                            'PROBLEM).  Expected at: "{0:s}"'
                        ).format(this_eval_file_name)

                        warnings.warn(warning_string)
                        continue

                    print('Reading data from: "{0:s}"...'.format(
                        this_eval_file_name))

                    this_evaluation_table = model_eval.read_evaluation(
                        this_eval_file_name)[model_eval.EVALUATION_TABLE_KEY]

                    auc_matrix[i, j, k, m] = numpy.nanmean(
                        this_evaluation_table[model_eval.AUC_KEY].values)
                    csi_matrix[i, j, k, m] = numpy.nanmean(
                        this_evaluation_table[model_eval.CSI_KEY].values)
                    pod_matrix[i, j, k, m] = numpy.nanmean(
                        this_evaluation_table[model_eval.POD_KEY].values)
                    far_matrix[i, j, k,
                               m] = 1. - numpy.nanmean(this_evaluation_table[
                                   model_eval.SUCCESS_RATIO_KEY].values)
                    frequency_bias_matrix[i, j, k, m] = numpy.nanmean(
                        this_evaluation_table[
                            model_eval.FREQUENCY_BIAS_KEY].values)

    print(SEPARATOR_STRING)
    best_model_index = numpy.nanargmax(numpy.ravel(auc_matrix))

    auc_file_name = '{0:s}/auc.jpg'.format(output_dir_name)
    csi_file_name = '{0:s}/csi.jpg'.format(output_dir_name)
    pod_file_name = '{0:s}/pod.jpg'.format(output_dir_name)
    far_file_name = '{0:s}/far.jpg'.format(output_dir_name)
    bias_file_name = '{0:s}/frequency_bias.jpg'.format(output_dir_name)

    _plot_one_score(
        score_matrix=auc_matrix,
        colour_map_object=main_colour_map_object,
        max_colour_value=numpy.nanpercentile(auc_matrix,
                                             max_colour_percentile),
        min_colour_value=numpy.nanpercentile(auc_matrix,
                                             100. - max_colour_percentile),
        best_model_index=best_model_index,
        is_score_bias=False,
        colour_bar_label='AUC (area under ROC curve)',
        output_file_name=auc_file_name)

    _plot_one_score(
        score_matrix=csi_matrix,
        colour_map_object=main_colour_map_object,
        max_colour_value=numpy.nanpercentile(csi_matrix,
                                             max_colour_percentile),
        min_colour_value=numpy.nanpercentile(csi_matrix,
                                             100. - max_colour_percentile),
        best_model_index=best_model_index,
        is_score_bias=False,
        colour_bar_label='CSI (critical success index)',
        output_file_name=csi_file_name)

    _plot_one_score(
        score_matrix=pod_matrix,
        colour_map_object=main_colour_map_object,
        max_colour_value=numpy.nanpercentile(pod_matrix,
                                             max_colour_percentile),
        min_colour_value=numpy.nanpercentile(pod_matrix,
                                             100. - max_colour_percentile),
        best_model_index=best_model_index,
        is_score_bias=False,
        colour_bar_label='POD (probability of detection)',
        output_file_name=pod_file_name)

    _plot_one_score(
        score_matrix=far_matrix,
        colour_map_object=main_colour_map_object,
        max_colour_value=numpy.nanpercentile(far_matrix,
                                             max_colour_percentile),
        min_colour_value=numpy.nanpercentile(far_matrix,
                                             100. - max_colour_percentile),
        best_model_index=best_model_index,
        is_score_bias=False,
        colour_bar_label='FAR (false-alarm ratio)',
        output_file_name=far_file_name)

    this_offset = numpy.nanpercentile(
        numpy.absolute(frequency_bias_matrix - 1.), max_colour_percentile)
    max_colour_value = 1. + this_offset

    _plot_one_score(score_matrix=frequency_bias_matrix,
                    colour_map_object=main_colour_map_object,
                    min_colour_value=0.,
                    max_colour_value=max_colour_value,
                    best_model_index=best_model_index,
                    is_score_bias=True,
                    colour_bar_label='Frequency bias',
                    output_file_name=bias_file_name)

    main_file_name = '{0:s}/auc_csi.jpg'.format(output_dir_name)
    print('Concatenating figures into: "{0:s}"...'.format(main_file_name))

    imagemagick_utils.concatenate_images(
        input_file_names=[csi_file_name, auc_file_name],
        output_file_name=main_file_name,
        num_panel_rows=1,
        num_panel_columns=2)

    imagemagick_utils.resize_image(input_file_name=main_file_name,
                                   output_file_name=main_file_name,
                                   output_size_pixels=CONCAT_FIGURE_SIZE_PX)

    appendix_file_name = '{0:s}/pod_far_bias.jpg'.format(output_dir_name)
    print('Concatenating figures into: "{0:s}"...'.format(appendix_file_name))

    imagemagick_utils.concatenate_images(
        input_file_names=[pod_file_name, far_file_name, bias_file_name],
        output_file_name=appendix_file_name,
        num_panel_rows=1,
        num_panel_columns=3)

    imagemagick_utils.resize_image(input_file_name=appendix_file_name,
                                   output_file_name=appendix_file_name,
                                   output_size_pixels=CONCAT_FIGURE_SIZE_PX)
def _check_architecture_args(option_dict):
    """Error-checks input args for CNN architecture.

    :param option_dict: See doc for `create_model`.
    :return: option_dict: Same as input, except defaults may have been added.
    """

    orig_option_dict = option_dict.copy()
    option_dict = DEFAULT_ARCHITECTURE_OPTION_DICT.copy()
    option_dict.update(orig_option_dict)

    error_checking.assert_is_integer(option_dict[NUM_HEIGHTS_KEY])
    error_checking.assert_is_geq(option_dict[NUM_HEIGHTS_KEY], 10)
    error_checking.assert_is_integer(option_dict[NUM_INPUT_CHANNELS_KEY])
    error_checking.assert_is_geq(option_dict[NUM_INPUT_CHANNELS_KEY], 1)

    conv_layer_channel_nums = option_dict[CONV_LAYER_CHANNEL_NUMS_KEY]
    error_checking.assert_is_integer_numpy_array(conv_layer_channel_nums)
    error_checking.assert_is_numpy_array(conv_layer_channel_nums,
                                         num_dimensions=1)
    error_checking.assert_is_geq_numpy_array(conv_layer_channel_nums, 1)

    num_conv_layers = len(conv_layer_channel_nums)
    these_dimensions = numpy.array([num_conv_layers], dtype=int)

    conv_layer_dropout_rates = option_dict[CONV_LAYER_DROPOUT_RATES_KEY]
    error_checking.assert_is_numpy_array(conv_layer_dropout_rates,
                                         exact_dimensions=these_dimensions)
    error_checking.assert_is_leq_numpy_array(conv_layer_dropout_rates,
                                             1.,
                                             allow_nan=True)

    conv_layer_filter_sizes = option_dict[CONV_LAYER_FILTER_SIZES_KEY]
    error_checking.assert_is_integer_numpy_array(conv_layer_filter_sizes)
    error_checking.assert_is_numpy_array(conv_layer_filter_sizes,
                                         exact_dimensions=these_dimensions)
    error_checking.assert_is_geq_numpy_array(conv_layer_filter_sizes, 3)

    # Make sure filter sizes are odd.
    these_filter_sizes = (
        2 * numpy.floor(conv_layer_filter_sizes.astype(float) / 2) +
        1).astype(int)
    assert numpy.array_equal(these_filter_sizes, conv_layer_filter_sizes)

    dense_layer_neuron_nums = option_dict[DENSE_LAYER_NEURON_NUMS_KEY]
    dense_layer_dropout_rates = option_dict[DENSE_LAYER_DROPOUT_RATES_KEY]
    has_dense_layers = not (dense_layer_neuron_nums is None
                            and dense_layer_dropout_rates is None)

    if has_dense_layers:
        error_checking.assert_is_integer_numpy_array(dense_layer_neuron_nums)
        error_checking.assert_is_numpy_array(dense_layer_neuron_nums,
                                             num_dimensions=1)
        error_checking.assert_is_geq_numpy_array(dense_layer_neuron_nums, 1)

        num_dense_layers = len(dense_layer_neuron_nums)
        these_dimensions = numpy.array([num_dense_layers], dtype=int)

        error_checking.assert_is_numpy_array(dense_layer_dropout_rates,
                                             exact_dimensions=these_dimensions)
        error_checking.assert_is_leq_numpy_array(dense_layer_dropout_rates,
                                                 1.,
                                                 allow_nan=True)

    error_checking.assert_is_geq(option_dict[L1_WEIGHT_KEY], 0.)
    error_checking.assert_is_geq(option_dict[L2_WEIGHT_KEY], 0.)
    error_checking.assert_is_boolean(option_dict[USE_BATCH_NORM_KEY])
    error_checking.assert_is_boolean(option_dict[ZERO_OUT_TOP_HR_KEY])

    if option_dict[ZERO_OUT_TOP_HR_KEY]:
        error_checking.assert_is_integer(option_dict[HEATING_RATE_INDEX_KEY])
        error_checking.assert_is_geq(option_dict[HEATING_RATE_INDEX_KEY], 0)

    return option_dict
Beispiel #26
0
def _run(top_storm_image_dir_name, radar_source, num_radar_dimensions,
         radar_field_names, radar_heights_m_agl, first_spc_date_string,
         last_spc_date_string, top_target_dir_name, target_names,
         top_sounding_dir_name, sounding_lag_time_sec, num_examples_per_in_file,
         top_output_dir_name, target_name_for_downsampling,
         downsampling_classes, downsampling_fractions):
    """Creates input examples and writes them to unshuffled files.

    This is effectively the main method.

    :param top_storm_image_dir_name: See documentation at top of file.
    :param radar_source: Same.
    :param num_radar_dimensions: Same.
    :param radar_field_names: Same.
    :param radar_heights_m_agl: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param top_target_dir_name: Same.
    :param target_names: Same.
    :param top_sounding_dir_name: Same.
    :param sounding_lag_time_sec: Same.
    :param num_examples_per_in_file: Same.
    :param top_output_dir_name: Same.
    :param downsampling_classes: Same.
    :param downsampling_fractions: Same.
    """

    downsample = target_name_for_downsampling not in ['', 'None']

    if downsample:
        downsampling_dict = dict(list(zip(
            downsampling_classes, downsampling_fractions
        )))
    else:
        downsampling_dict = None

    include_soundings = top_sounding_dir_name != ''
    radar_file_name_matrix = None
    az_shear_file_name_matrix = None
    reflectivity_file_name_matrix = None

    if num_radar_dimensions < 0:
        az_shear_file_name_matrix, reflectivity_file_name_matrix = (
            input_examples.find_storm_images_2d3d_myrorss(
                top_directory_name=top_storm_image_dir_name,
                first_spc_date_string=first_spc_date_string,
                last_spc_date_string=last_spc_date_string,
                reflectivity_heights_m_agl=radar_heights_m_agl)
        )

        main_file_name_matrix = copy.deepcopy(reflectivity_file_name_matrix)
    else:
        error_checking.assert_is_geq(num_radar_dimensions, 2)
        error_checking.assert_is_leq(num_radar_dimensions, 3)

        if num_radar_dimensions == 2:
            radar_file_name_matrix = input_examples.find_storm_images_2d(
                top_directory_name=top_storm_image_dir_name,
                radar_source=radar_source, radar_field_names=radar_field_names,
                reflectivity_heights_m_agl=radar_heights_m_agl,
                first_spc_date_string=first_spc_date_string,
                last_spc_date_string=last_spc_date_string)
        else:
            radar_file_name_matrix = input_examples.find_storm_images_3d(
                top_directory_name=top_storm_image_dir_name,
                radar_source=radar_source, radar_field_names=radar_field_names,
                radar_heights_m_agl=radar_heights_m_agl,
                first_spc_date_string=first_spc_date_string,
                last_spc_date_string=last_spc_date_string)

        main_file_name_matrix = copy.deepcopy(radar_file_name_matrix)

    target_file_names = input_examples.find_target_files(
        top_target_dir_name=top_target_dir_name,
        radar_file_name_matrix=main_file_name_matrix, target_names=target_names)

    if include_soundings:
        sounding_file_names = input_examples.find_sounding_files(
            top_sounding_dir_name=top_sounding_dir_name,
            radar_file_name_matrix=main_file_name_matrix,
            target_names=target_names,
            lag_time_for_convective_contamination_sec=sounding_lag_time_sec)
    else:
        sounding_file_names = None

    input_examples.create_examples(
        target_file_names=target_file_names, target_names=target_names,
        num_examples_per_in_file=num_examples_per_in_file,
        top_output_dir_name=top_output_dir_name,
        radar_file_name_matrix=radar_file_name_matrix,
        reflectivity_file_name_matrix=reflectivity_file_name_matrix,
        az_shear_file_name_matrix=az_shear_file_name_matrix,
        downsampling_dict=downsampling_dict,
        target_name_for_downsampling=target_name_for_downsampling,
        sounding_file_names=sounding_file_names)
def _read_examples(top_example_dir_name, first_time_string, last_time_string,
                   num_times, num_examples_per_time, model_metadata_dict):
    """Reads learning examples.

    These and the trained model are the main inputs to the permutation test.

    :param top_example_dir_name: See documentation at top of file.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param num_times: Same.
    :param num_examples_per_time: Same.
    :param model_metadata_dict: Dictionary with metadata for trained model
        (created by `traditional_cnn.read_model_metadata`).
    :return: predictor_matrix: E-by-M-by-N-by-C numpy array of predictor values
        (images).
    :return: target_values: length-E numpy array of target values (integer
        class labels).
    """

    error_checking.assert_is_greater(num_times, 0)
    error_checking.assert_is_geq(num_examples_per_time, 10)

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)

    example_file_names = trainval_io.find_downsized_3d_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_target_time_unix_sec=first_time_unix_sec,
        last_target_time_unix_sec=last_time_unix_sec)

    num_times = min([num_times, len(example_file_names)])
    random.shuffle(example_file_names)
    example_file_names = example_file_names[:num_times]

    predictor_matrix = None
    target_matrix = None

    for i in range(num_times):
        print 'Reading data from: "{0:s}"...'.format(example_file_names[i])

        this_example_dict = trainval_io.read_downsized_3d_examples(
            netcdf_file_name=example_file_names[i],
            predictor_names_to_keep=model_metadata_dict[
                traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
            num_half_rows_to_keep=model_metadata_dict[
                traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
            num_half_columns_to_keep=model_metadata_dict[
                traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY],
            first_time_to_keep_unix_sec=first_time_unix_sec,
            last_time_to_keep_unix_sec=last_time_unix_sec)

        this_num_examples_total = this_example_dict[
            trainval_io.PREDICTOR_MATRIX_KEY].shape[0]
        this_num_examples_to_keep = min(
            [num_examples_per_time, this_num_examples_total])

        these_example_indices = numpy.linspace(0,
                                               this_num_examples_total - 1,
                                               num=this_num_examples_total,
                                               dtype=int)
        these_example_indices = numpy.random.choice(
            these_example_indices,
            size=this_num_examples_to_keep,
            replace=False)

        this_predictor_matrix = this_example_dict[
            trainval_io.PREDICTOR_MATRIX_KEY][these_example_indices, ...]
        this_target_matrix = this_example_dict[trainval_io.TARGET_MATRIX_KEY][
            these_example_indices, ...]

        if predictor_matrix is None:
            predictor_matrix = this_predictor_matrix + 0.
            target_matrix = this_target_matrix + 0
        else:
            predictor_matrix = numpy.concatenate(
                (predictor_matrix, this_predictor_matrix), axis=0)
            target_matrix = numpy.concatenate(
                (target_matrix, this_target_matrix), axis=0)

        num_examples_by_class = numpy.sum(target_matrix, axis=0)
        print 'Number of examples in each class: {0:s}\n'.format(
            str(num_examples_by_class))

    return predictor_matrix, numpy.argmax(target_matrix, axis=1)
def plot_one_storm_cell_to_winds(
        storm_to_winds_table, storm_id, basemap_object=None, axes_object=None,
        storm_colour=storm_plotting.DEFAULT_TRACK_COLOUR,
        storm_line_width=storm_plotting.DEFAULT_TRACK_WIDTH,
        wind_barb_length=wind_plotting.DEFAULT_BARB_LENGTH,
        empty_wind_barb_radius=wind_plotting.DEFAULT_EMPTY_BARB_RADIUS,
        fill_empty_wind_barb=wind_plotting.FILL_EMPTY_BARB_DEFAULT,
        wind_colour_map=wind_plotting.DEFAULT_COLOUR_MAP,
        colour_minimum_kt=wind_plotting.DEFAULT_COLOUR_MINIMUM_KT,
        colour_maximum_kt=wind_plotting.DEFAULT_COLOUR_MAXIMUM_KT):
    """Plots wind observations linked to one storm cell.

    :param storm_to_winds_table: pandas DataFrame with columns documented in
        `link_storms_to_winds.write_storm_to_winds_table`.
    :param storm_id: String ID for storm cell.  Only this storm cell and wind
        observations linked thereto will be plotted.
    :param basemap_object: Instance of `mpl_toolkits.basemap.Basemap`.
    :param axes_object: Instance of `matplotlib.axes._subplots.AxesSubplot`.
    :param storm_colour: Colour for storm track, first storm object, and last
        storm object (in any format accepted by `matplotlib.colors`).
    :param storm_line_width: Line width for storm track, first storm object, and
        last storm object (real positive number).
    :param wind_barb_length: Length of each wind barb.
    :param empty_wind_barb_radius: Radius of circle for 0-metre-per-second wind
        barb.
    :param fill_empty_wind_barb: Boolean flag.  If fill_empty_barb = True,
        0-metre-per-second wind barb will be a filled circle.  Otherwise, it
        will be an empty circle.
    :param wind_colour_map: Instance of `matplotlib.pyplot.cm`.
    :param colour_minimum_kt: Minimum speed for colour map (kt or nautical miles
        per hour).
    :param colour_maximum_kt: Maximum speed for colour map (kt or nautical miles
        per hour).
    """

    error_checking.assert_is_string(storm_id)
    error_checking.assert_is_geq(colour_minimum_kt, 0.)
    error_checking.assert_is_greater(colour_maximum_kt, colour_minimum_kt)

    storm_cell_flags = [this_id == storm_id for this_id in storm_to_winds_table[
        tracking_io.STORM_ID_COLUMN].values]
    storm_cell_rows = numpy.where(numpy.array(storm_cell_flags))[0]

    centroid_latitudes_deg = storm_to_winds_table[
        tracking_io.CENTROID_LAT_COLUMN].values[storm_cell_rows]
    centroid_longitudes_deg = storm_to_winds_table[
        tracking_io.CENTROID_LNG_COLUMN].values[storm_cell_rows]

    storm_plotting.plot_storm_track(
        basemap_object=basemap_object, axes_object=axes_object,
        latitudes_deg=centroid_latitudes_deg,
        longitudes_deg=centroid_longitudes_deg, line_colour=storm_colour,
        line_width=storm_line_width)

    storm_times_unix_sec = storm_to_winds_table[tracking_io.TIME_COLUMN].values[
        storm_cell_rows]
    first_storm_object_row = storm_cell_rows[numpy.argmin(storm_times_unix_sec)]
    last_storm_object_row = storm_cell_rows[numpy.argmax(storm_times_unix_sec)]

    first_vertex_dict = polygons.polygon_object_to_vertex_arrays(
        storm_to_winds_table[tracking_io.POLYGON_OBJECT_LATLNG_COLUMN].values[
            first_storm_object_row])
    first_vertex_latitudes_deg = first_vertex_dict[polygons.EXTERIOR_Y_COLUMN]
    first_vertex_longitudes_deg = first_vertex_dict[polygons.EXTERIOR_X_COLUMN]

    storm_plotting.plot_unfilled_polygon(
        basemap_object=basemap_object, axes_object=axes_object,
        vertex_latitudes_deg=first_vertex_latitudes_deg,
        vertex_longitudes_deg=first_vertex_longitudes_deg,
        exterior_colour=storm_colour, exterior_line_width=storm_line_width)

    last_vertex_dict = polygons.polygon_object_to_vertex_arrays(
        storm_to_winds_table[tracking_io.POLYGON_OBJECT_LATLNG_COLUMN].values[
            last_storm_object_row])
    last_vertex_latitudes_deg = last_vertex_dict[polygons.EXTERIOR_Y_COLUMN]
    last_vertex_longitudes_deg = last_vertex_dict[polygons.EXTERIOR_X_COLUMN]

    storm_plotting.plot_unfilled_polygon(
        basemap_object=basemap_object, axes_object=axes_object,
        vertex_latitudes_deg=last_vertex_latitudes_deg,
        vertex_longitudes_deg=last_vertex_longitudes_deg,
        exterior_colour=storm_colour, exterior_line_width=storm_line_width)

    wind_latitudes_deg = numpy.array([])
    wind_longitudes_deg = numpy.array([])
    u_winds_m_s01 = numpy.array([])
    v_winds_m_s01 = numpy.array([])

    for this_row in storm_cell_rows:
        wind_latitudes_deg = numpy.concatenate((
            wind_latitudes_deg, storm_to_winds_table[
                storms_to_winds.WIND_LATITUDES_COLUMN].values[this_row]))
        wind_longitudes_deg = numpy.concatenate((
            wind_longitudes_deg, storm_to_winds_table[
                storms_to_winds.WIND_LONGITUDES_COLUMN].values[this_row]))
        u_winds_m_s01 = numpy.concatenate((
            u_winds_m_s01, storm_to_winds_table[
                storms_to_winds.U_WINDS_COLUMN].values[this_row]))
        v_winds_m_s01 = numpy.concatenate((
            v_winds_m_s01, storm_to_winds_table[
                storms_to_winds.V_WINDS_COLUMN].values[this_row]))

    wind_plotting.plot_wind_barbs(
        basemap_object=basemap_object, axes_object=axes_object,
        latitudes_deg=wind_latitudes_deg, longitudes_deg=wind_longitudes_deg,
        u_winds_m_s01=u_winds_m_s01, v_winds_m_s01=v_winds_m_s01,
        barb_length=wind_barb_length, empty_barb_radius=empty_wind_barb_radius,
        fill_empty_barb=fill_empty_wind_barb, colour_map=wind_colour_map,
        colour_minimum_kt=colour_minimum_kt,
        colour_maximum_kt=colour_maximum_kt)
Beispiel #29
0
def _run(input_file_name, num_zenith_angle_bins, num_albedo_bins,
         output_dir_name):
    """Splits predictions by time of day and time of year.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param num_zenith_angle_bins: Same.
    :param num_albedo_bins: Same.
    :param output_dir_name: Same.
    """

    # Process input args.
    error_checking.assert_is_geq(num_zenith_angle_bins, 3)
    error_checking.assert_is_geq(num_albedo_bins, 3)

    edge_zenith_angles_rad = numpy.linspace(MIN_ZENITH_ANGLE_RAD,
                                            MAX_ZENITH_ANGLE_RAD,
                                            num=num_zenith_angle_bins + 1,
                                            dtype=float)
    min_zenith_angles_rad = edge_zenith_angles_rad[:-1]
    max_zenith_angles_rad = edge_zenith_angles_rad[1:]

    edge_albedos = numpy.linspace(0, 1, num=num_albedo_bins + 1, dtype=float)
    min_albedos = edge_albedos[:-1]
    max_albedos = edge_albedos[1:]

    # Read data.
    print('Reading data from: "{0:s}"...\n'.format(input_file_name))
    prediction_dict = prediction_io.read_file(input_file_name)

    # Split by solar zenith angle.
    for k in range(num_zenith_angle_bins):
        this_prediction_dict = prediction_io.subset_by_zenith_angle(
            prediction_dict=copy.deepcopy(prediction_dict),
            min_zenith_angle_rad=min_zenith_angles_rad[k],
            max_zenith_angle_rad=max_zenith_angles_rad[k])

        this_output_file_name = prediction_io.find_file(
            directory_name=output_dir_name,
            zenith_angle_bin=k,
            raise_error_if_missing=False)
        print((
            'Writing {0:d} examples (with zenith angles {1:.4f}...{2:.4f} rad) '
            'to: "{3:s}"...').format(
                len(this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]),
                min_zenith_angles_rad[k], max_zenith_angles_rad[k],
                this_output_file_name))

        prediction_io.write_file(
            netcdf_file_name=this_output_file_name,
            scalar_target_matrix=this_prediction_dict[
                prediction_io.SCALAR_TARGETS_KEY],
            vector_target_matrix=this_prediction_dict[
                prediction_io.VECTOR_TARGETS_KEY],
            scalar_prediction_matrix=this_prediction_dict[
                prediction_io.SCALAR_PREDICTIONS_KEY],
            vector_prediction_matrix=this_prediction_dict[
                prediction_io.VECTOR_PREDICTIONS_KEY],
            heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
            example_id_strings=this_prediction_dict[
                prediction_io.EXAMPLE_IDS_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])

    print('\n')

    # Split by albedo.
    for k in range(num_albedo_bins):
        this_prediction_dict = prediction_io.subset_by_albedo(
            prediction_dict=copy.deepcopy(prediction_dict),
            min_albedo=min_albedos[k],
            max_albedo=max_albedos[k])

        this_output_file_name = prediction_io.find_file(
            directory_name=output_dir_name,
            albedo_bin=k,
            raise_error_if_missing=False)
        print(('Writing {0:d} examples (with albedos {1:.4f}...{2:.4f}) '
               'to: "{3:s}"...').format(
                   len(this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]),
                   min_albedos[k], max_albedos[k], this_output_file_name))

        prediction_io.write_file(
            netcdf_file_name=this_output_file_name,
            scalar_target_matrix=this_prediction_dict[
                prediction_io.SCALAR_TARGETS_KEY],
            vector_target_matrix=this_prediction_dict[
                prediction_io.VECTOR_TARGETS_KEY],
            scalar_prediction_matrix=this_prediction_dict[
                prediction_io.SCALAR_PREDICTIONS_KEY],
            vector_prediction_matrix=this_prediction_dict[
                prediction_io.VECTOR_PREDICTIONS_KEY],
            heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
            example_id_strings=this_prediction_dict[
                prediction_io.EXAMPLE_IDS_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])

    print('\n')

    # Split by month.
    for k in range(1, 13):
        this_prediction_dict = prediction_io.subset_by_month(
            prediction_dict=copy.deepcopy(prediction_dict), desired_month=k)

        this_output_file_name = prediction_io.find_file(
            directory_name=output_dir_name,
            month=k,
            raise_error_if_missing=False)
        print('Writing {0:d} examples to: "{1:s}"...'.format(
            len(this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]),
            this_output_file_name))

        prediction_io.write_file(
            netcdf_file_name=this_output_file_name,
            scalar_target_matrix=this_prediction_dict[
                prediction_io.SCALAR_TARGETS_KEY],
            vector_target_matrix=this_prediction_dict[
                prediction_io.VECTOR_TARGETS_KEY],
            scalar_prediction_matrix=this_prediction_dict[
                prediction_io.SCALAR_PREDICTIONS_KEY],
            vector_prediction_matrix=this_prediction_dict[
                prediction_io.VECTOR_PREDICTIONS_KEY],
            heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
            example_id_strings=this_prediction_dict[
                prediction_io.EXAMPLE_IDS_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])
Beispiel #30
0
def train_model(
        model_object, training_feature_matrix, training_target_values,
        num_iters_for_early_stopping=None, validation_feature_matrix=None,
        validation_target_values=None):
    """Trains GBT model for classification.

    T = number of training examples
    V = number of validation examples
    Z = number of features (input variables)

    :param model_object: Instance of `xgboost.XGBClassifier`.  The easiest way
        to create one is to use `create_model`.
    :param training_feature_matrix: T-by-Z numpy array of features for training.
    :param training_target_values: length-T integer numpy array of target values
        for training.  If target_values[i] = k, the [i]th example (storm object)
        belongs to the [k]th class.
    :param num_iters_for_early_stopping: Number of iterations for early
        stopping.  If validation error has not improved with the last
        `num_iters_for_early_stopping` trees added, training will be stopped.
        If you don't want on-the-fly validation, leave this argument alone.
    :param validation_feature_matrix:
        [used only if `num_iters_for_early_stopping is not None`]
        V-by-Z numpy array of features for validation.
    :param validation_target_values:
        [used only if `num_iters_for_early_stopping is not None`]
        Same as `training_target_values`, but length is V rather than T.
    """

    error_checking.assert_is_numpy_array(
        training_feature_matrix, num_dimensions=2)
    num_training_examples = training_feature_matrix.shape[0]
    num_features = training_feature_matrix.shape[1]

    error_checking.assert_is_integer_numpy_array(training_target_values)
    error_checking.assert_is_numpy_array(
        training_target_values,
        exact_dimensions=numpy.array([num_training_examples]))
    error_checking.assert_is_geq_numpy_array(training_target_values, 0)

    if num_iters_for_early_stopping is None:
        model_object.fit(
            training_feature_matrix, training_target_values,
            eval_metric='logloss', verbose=True)
    else:
        error_checking.assert_is_integer(num_iters_for_early_stopping)
        error_checking.assert_is_geq(num_iters_for_early_stopping, 1)

        error_checking.assert_is_numpy_array(
            validation_feature_matrix, num_dimensions=2)
        num_validation_examples = validation_feature_matrix.shape[0]
        error_checking.assert_is_numpy_array(
            validation_feature_matrix,
            exact_dimensions=numpy.array(
                [num_validation_examples, num_features]))

        error_checking.assert_is_integer_numpy_array(validation_target_values)
        error_checking.assert_is_numpy_array(
            validation_target_values,
            exact_dimensions=numpy.array([num_validation_examples]))
        error_checking.assert_is_geq_numpy_array(validation_target_values, 0)

        model_object.fit(
            training_feature_matrix, training_target_values,
            eval_metric='logloss', verbose=True,
            early_stopping_rounds=num_iters_for_early_stopping,
            eval_set=[(validation_feature_matrix, validation_target_values)])