Esempio n. 1
0
def _convert_one_file(input_file_name, output_file_name,
                      num_examples_per_batch):
    """Converts examples in one file from MYRORSS to GridRad format.

    :param input_file_name: Path to input file (with MYRORSS examples).  Will be
        read by `input_examples.read_example_file`.
    :param output_file_name: Path to output file (with the same examples but in
        GridRad format).  Will be written by
        `input_examples.write_example_file`.
    :param num_examples_per_batch: See documentation at top of file.
    """

    print('Reading metadata from: "{0:s}"...'.format(input_file_name))
    example_dict = input_examples.read_example_file(
        netcdf_file_name=input_file_name,
        read_all_target_vars=True,
        metadata_only=True)

    full_storm_id_strings = example_dict[input_examples.FULL_IDS_KEY]
    storm_times_unix_sec = example_dict[input_examples.STORM_TIMES_KEY]
    num_examples = len(full_storm_id_strings)

    for i in range(0, num_examples, num_examples_per_batch):
        this_first_index = i
        this_last_index = min(
            [i + num_examples_per_batch - 1, num_examples - 1])

        _convert_one_file_selected_examples(
            input_file_name=input_file_name,
            output_file_name=output_file_name,
            full_storm_id_strings=full_storm_id_strings[this_first_index:(
                this_last_index + 1)],
            storm_times_unix_sec=storm_times_unix_sec[this_first_index:(
                this_last_index + 1)],
            append_to_file=i > 0)
def _find_input_files(
        top_input_dir_name, first_spc_date_string, last_spc_date_string):
    """Finds input files (containing unshuffled examples).

    :param top_input_dir_name: See documentation at top of file.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :return: input_example_file_names: 1-D list of paths to input files.
    :return: num_input_examples: Total number of examples in these files.
    """

    input_example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_input_dir_name, shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    num_input_examples = 0

    for this_file_name in input_example_file_names:
        print 'Reading data from: "{0:s}"...'.format(this_file_name)
        this_example_dict = input_examples.read_example_file(
            netcdf_file_name=this_file_name, metadata_only=True)

        num_input_examples += len(
            this_example_dict[input_examples.STORM_IDS_KEY])

    return input_example_file_names, num_input_examples
Esempio n. 3
0
def _shuffle_one_input_file(input_example_file_name, radar_field_names,
                            num_examples_per_out_chunk,
                            output_example_file_names):
    """Shuffles examples from one input file to many output files.

    :param input_example_file_name: Path to input file.
    :param radar_field_names: See documentation at top of file.
    :param num_examples_per_out_chunk: Same.
    :param output_example_file_names: 1-D list of paths to output files.
    """

    print('Reading data from: "{0:s}"...'.format(input_example_file_name))
    example_dict = input_examples.read_example_file(
        netcdf_file_name=input_example_file_name,
        read_all_target_vars=True,
        radar_field_names_to_keep=radar_field_names)

    num_examples = len(example_dict[input_examples.FULL_IDS_KEY])
    shuffled_indices = numpy.linspace(0,
                                      num_examples - 1,
                                      num=num_examples,
                                      dtype=int)
    numpy.random.shuffle(shuffled_indices)

    example_dict = input_examples.subset_examples(
        example_dict=example_dict, indices_to_keep=shuffled_indices)

    for j in range(0, num_examples, num_examples_per_out_chunk):
        this_first_index = j
        this_last_index = min(
            [j + num_examples_per_out_chunk - 1, num_examples - 1])

        these_indices = numpy.linspace(this_first_index,
                                       this_last_index,
                                       num=this_last_index - this_first_index +
                                       1,
                                       dtype=int)

        this_example_dict = input_examples.subset_examples(
            example_dict=example_dict,
            indices_to_keep=these_indices,
            create_new_dict=True)

        this_output_file_name = random.choice(output_example_file_names)
        print('Writing shuffled examples to: "{0:s}"...'.format(
            this_output_file_name))

        input_examples.write_example_file(
            netcdf_file_name=this_output_file_name,
            example_dict=this_example_dict,
            append_to_file=os.path.isfile(this_output_file_name))
Esempio n. 4
0
def _check_training_args(model_file_name, history_file_name,
                         tensorboard_dir_name, num_epochs,
                         num_training_batches_per_epoch,
                         num_validation_batches_per_epoch,
                         training_option_dict, weight_loss_function):
    """Error-checks input arguments for training.

    :param model_file_name: Path to output file (HDF5 format).  The model will
        be saved here after each epoch.
    :param history_file_name: Path to output file (CSV format).  Training
        history (performance metrics) will be saved here after each epoch.
    :param tensorboard_dir_name: Path to output directory for TensorBoard log
        files.
    :param num_epochs: Number of epochs.
    :param num_training_batches_per_epoch: Number of training batches in each
        epoch.
    :param num_validation_batches_per_epoch: Number of validation batches in
        each epoch.
    :param training_option_dict: See doc for
        `training_validation_io.example_generator_2d_or_3d`.
    :param weight_loss_function: Boolean flag.  If False, classes will be
        weighted equally in the loss function.  If True, classes will be
        weighted differently (inversely proportional to their sampling
        fractions).
    :return: class_to_weight_dict: Dictionary, where each key is the integer ID
        for a target class (-2 for "dead storm") and each value is the weight
        for the loss function.  If None, classes will be equally weighted in the
        loss function.
    """

    orig_option_dict = training_option_dict.copy()
    training_option_dict = trainval_io.DEFAULT_OPTION_DICT.copy()
    training_option_dict.update(orig_option_dict)

    file_system_utils.mkdir_recursive_if_necessary(file_name=model_file_name)
    file_system_utils.mkdir_recursive_if_necessary(file_name=history_file_name)
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=tensorboard_dir_name)

    error_checking.assert_is_integer(num_epochs)
    error_checking.assert_is_geq(num_epochs, 1)
    error_checking.assert_is_integer(num_training_batches_per_epoch)
    error_checking.assert_is_geq(num_training_batches_per_epoch, 1)
    error_checking.assert_is_integer(num_validation_batches_per_epoch)
    error_checking.assert_is_geq(num_validation_batches_per_epoch, 0)

    error_checking.assert_is_boolean(weight_loss_function)
    if not weight_loss_function:
        return None

    class_to_sampling_fraction_dict = training_option_dict[
        trainval_io.SAMPLING_FRACTIONS_KEY]
    if class_to_sampling_fraction_dict is None:
        return None

    this_example_dict = input_examples.read_example_file(
        netcdf_file_name=training_option_dict[
            trainval_io.EXAMPLE_FILES_KEY][0],
        metadata_only=True)
    target_name = this_example_dict[input_examples.TARGET_NAME_KEY]

    return dl_utils.class_fractions_to_weights(
        sampling_fraction_by_class_dict=class_to_sampling_fraction_dict,
        target_name=target_name,
        binarize_target=training_option_dict[trainval_io.BINARIZE_TARGET_KEY])
def _run(input_model_file_name, radar_field_names, sounding_field_names,
         normalization_type_string, normalization_param_file_name,
         min_normalized_value, max_normalized_value, downsampling_keys,
         downsampling_fractions, monitor_string, weight_loss_function,
         refl_masking_threshold_dbz, x_translations_pixels,
         y_translations_pixels, ccw_rotation_angles_deg,
         noise_standard_deviation, num_noisings, flip_in_x, flip_in_y,
         top_training_dir_name, first_training_time_string,
         last_training_time_string, top_validation_dir_name,
         first_validation_time_string, last_validation_time_string,
         num_examples_per_batch, num_epochs, num_training_batches_per_epoch,
         num_validation_batches_per_epoch, output_dir_name):
    """Trains CNN with native (3-D) GridRad images.

    This is effectively the main method.

    :param input_model_file_name: See documentation at top of file.
    :param radar_field_names: Same.
    :param sounding_field_names: Same.
    :param normalization_type_string: Same.
    :param normalization_param_file_name: Same.
    :param min_normalized_value: Same.
    :param max_normalized_value: Same.
    :param downsampling_keys: Same.
    :param downsampling_fractions: Same.
    :param monitor_string: Same.
    :param weight_loss_function: Same.
    :param refl_masking_threshold_dbz: Same.
    :param x_translations_pixels: Same.
    :param y_translations_pixels: Same.
    :param ccw_rotation_angles_deg: Same.
    :param noise_standard_deviation: Same.
    :param num_noisings: Same.
    :param flip_in_x: Same.
    :param flip_in_y: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_dir_name: Same.
    """

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    if sounding_field_names[0] in ['', 'None']:
        sounding_field_names = None

    if len(downsampling_keys) > 1:
        class_to_sampling_fraction_dict = dict(
            zip(downsampling_keys, downsampling_fractions))
    else:
        class_to_sampling_fraction_dict = None

    if (len(x_translations_pixels) == 1
            and x_translations_pixels + y_translations_pixels == 0):
        x_translations_pixels = None
        y_translations_pixels = None

    if len(ccw_rotation_angles_deg) == 1 and ccw_rotation_angles_deg[0] == 0:
        ccw_rotation_angles_deg = None

    if num_noisings <= 0:
        num_noisings = 0
        noise_standard_deviation = None

    # Set output locations.
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    output_model_file_name = '{0:s}/model.h5'.format(output_dir_name)
    history_file_name = '{0:s}/model_history.csv'.format(output_dir_name)
    tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name)

    # Find training and validation files.
    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    validation_file_names = input_examples.find_many_example_files(
        top_directory_name=top_validation_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    # Read architecture.
    print 'Reading architecture from: "{0:s}"...'.format(input_model_file_name)
    model_object = cnn.read_model(input_model_file_name)
    model_object = keras.models.clone_model(model_object)

    # TODO(thunderhoser): This is a HACK.
    model_object.compile(loss=keras.losses.binary_crossentropy,
                         optimizer=keras.optimizers.Adam(),
                         metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST)

    print SEPARATOR_STRING
    model_object.summary()
    print SEPARATOR_STRING

    # Write metadata.
    this_example_dict = input_examples.read_example_file(
        netcdf_file_name=training_file_names[0], metadata_only=True)
    target_name = this_example_dict[input_examples.TARGET_NAME_KEY]

    metadata_dict = {
        cnn.TARGET_NAME_KEY: target_name,
        cnn.NUM_EPOCHS_KEY: num_epochs,
        cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch,
        cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch,
        cnn.MONITOR_STRING_KEY: monitor_string,
        cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function,
        cnn.USE_2D3D_CONVOLUTION_KEY: False,
        cnn.VALIDATION_FILES_KEY: validation_file_names,
        cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec,
        cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec
    }

    input_tensor = model_object.input
    if isinstance(input_tensor, list):
        input_tensor = input_tensor[0]

    num_grid_rows = input_tensor.get_shape().as_list()[1]
    num_grid_columns = input_tensor.get_shape().as_list()[2]

    training_option_dict = {
        trainval_io.EXAMPLE_FILES_KEY: training_file_names,
        trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec,
        trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec,
        trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_batch,
        trainval_io.RADAR_FIELDS_KEY: radar_field_names,
        trainval_io.RADAR_HEIGHTS_KEY: RADAR_HEIGHTS_M_AGL,
        trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names,
        trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL,
        trainval_io.NUM_ROWS_KEY: num_grid_rows,
        trainval_io.NUM_COLUMNS_KEY: num_grid_columns,
        trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string,
        trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name,
        trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value,
        trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value,
        trainval_io.BINARIZE_TARGET_KEY: False,
        trainval_io.SAMPLING_FRACTIONS_KEY: class_to_sampling_fraction_dict,
        trainval_io.LOOP_ONCE_KEY: False,
        trainval_io.REFLECTIVITY_MASK_KEY: refl_masking_threshold_dbz,
        trainval_io.X_TRANSLATIONS_KEY: x_translations_pixels,
        trainval_io.Y_TRANSLATIONS_KEY: y_translations_pixels,
        trainval_io.ROTATION_ANGLES_KEY: ccw_rotation_angles_deg,
        trainval_io.NOISE_STDEV_KEY: noise_standard_deviation,
        trainval_io.NUM_NOISINGS_KEY: num_noisings,
        trainval_io.FLIP_X_KEY: flip_in_x,
        trainval_io.FLIP_Y_KEY: flip_in_y
    }

    print 'Writing metadata to: "{0:s}"...'.format(model_metafile_name)
    cnn.write_model_metadata(pickle_file_name=model_metafile_name,
                             metadata_dict=metadata_dict,
                             training_option_dict=training_option_dict)

    cnn.train_cnn_2d_or_3d(
        model_object=model_object,
        model_file_name=output_model_file_name,
        history_file_name=history_file_name,
        tensorboard_dir_name=tensorboard_dir_name,
        num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_option_dict=training_option_dict,
        monitor_string=monitor_string,
        weight_loss_function=weight_loss_function,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_file_names=validation_file_names,
        first_validn_time_unix_sec=first_validation_time_unix_sec,
        last_validn_time_unix_sec=last_validation_time_unix_sec)
def _convert_one_file(input_file_name, resolution_factor, output_file_name):
    """Converts examples in one file from GridRad to MYRORSS format.

    :param input_file_name: Path to input file (with GridRad examples).  Will be
        read by `input_examples.read_example_file`.
    :param resolution_factor: See documentation at top of file.
    :param output_file_name: Path to output file (with the same examples but in
        MYRORSS format).  Will be written by
        `input_examples.write_example_file`.
    """

    print('Reading GridRad examples from: "{0:s}"...'.format(input_file_name))
    example_dict = input_examples.read_example_file(
        netcdf_file_name=input_file_name, read_all_target_vars=True)

    refl_heights_m_agl = example_dict[input_examples.RADAR_HEIGHTS_KEY] + 0
    refl_index = example_dict[input_examples.RADAR_FIELDS_KEY].index(
        radar_utils.REFL_NAME)

    reflectivity_matrix_dbz = trainval_io.upsample_reflectivity(
        reflectivity_matrix_dbz=example_dict[
            input_examples.RADAR_IMAGE_MATRIX_KEY][..., refl_index],
        upsampling_factor=resolution_factor
    )

    reflectivity_matrix_dbz = numpy.expand_dims(
        reflectivity_matrix_dbz, axis=-1)

    example_dict = input_examples.reduce_examples_3d_to_2d(
        example_dict=example_dict,
        list_of_operation_dicts=[
            LL_SHEAR_OPERATION_DICT, ML_SHEAR_OPERATION_DICT
        ]
    )

    field_names = example_dict[input_examples.RADAR_FIELDS_KEY]
    min_heights_m_asl = example_dict[input_examples.MIN_RADAR_HEIGHTS_KEY]

    ll_shear_index = numpy.where(numpy.logical_and(
        numpy.array(field_names) == radar_utils.VORTICITY_NAME,
        min_heights_m_asl ==
        LL_SHEAR_OPERATION_DICT[input_examples.MIN_HEIGHT_KEY]
    ))[0]

    ll_shear_matrix_s01 = trainval_io.upsample_reflectivity(
        reflectivity_matrix_dbz=example_dict[
            input_examples.RADAR_IMAGE_MATRIX_KEY][..., ll_shear_index],
        upsampling_factor=resolution_factor * 2
    )

    ml_shear_index = numpy.where(numpy.logical_and(
        numpy.array(field_names) == radar_utils.VORTICITY_NAME,
        min_heights_m_asl ==
        ML_SHEAR_OPERATION_DICT[input_examples.MIN_HEIGHT_KEY]
    ))[0]

    ml_shear_matrix_s01 = trainval_io.upsample_reflectivity(
        reflectivity_matrix_dbz=example_dict[
            input_examples.RADAR_IMAGE_MATRIX_KEY][..., ml_shear_index],
        upsampling_factor=resolution_factor * 2
    )

    azimuthal_shear_matrix_s01 = VORTICITY_TO_AZ_SHEAR * numpy.concatenate(
        (ll_shear_matrix_s01, ml_shear_matrix_s01), axis=-1
    )

    example_dict[input_examples.REFL_IMAGE_MATRIX_KEY] = reflectivity_matrix_dbz
    example_dict[
        input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY] = azimuthal_shear_matrix_s01

    example_dict[input_examples.RADAR_HEIGHTS_KEY] = refl_heights_m_agl
    example_dict[input_examples.RADAR_FIELDS_KEY] = [
        radar_utils.LOW_LEVEL_SHEAR_NAME, radar_utils.MID_LEVEL_SHEAR_NAME
    ]
    example_dict[input_examples.ROTATED_GRID_SPACING_KEY] /= resolution_factor

    example_dict.pop(input_examples.RADAR_IMAGE_MATRIX_KEY, None)
    example_dict.pop(input_examples.MIN_RADAR_HEIGHTS_KEY, None)
    example_dict.pop(input_examples.MAX_RADAR_HEIGHTS_KEY, None)
    example_dict.pop(input_examples.RADAR_LAYER_OPERATION_NAMES_KEY, None)

    print('Writing examples in MYRORSS format to: "{0:s}"...'.format(
        output_file_name
    ))

    input_examples.write_example_file(
        netcdf_file_name=output_file_name, example_dict=example_dict,
        append_to_file=False)
def _run(top_example_dir_name, first_spc_date_string, last_spc_date_string,
         min_percentile_level, max_percentile_level, num_radar_rows,
         num_radar_columns, output_file_name):
    """Finds normalization parameters for GridRad data.

    This is effectively the main method.

    :param top_example_dir_name: See documentation at top of file.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param min_percentile_level: Same.
    :param max_percentile_level: Same.
    :param num_radar_rows: Same.
    :param num_radar_columns: Same.
    :param output_file_name: Same.
    """

    if num_radar_rows <= 0:
        num_radar_rows = None
    if num_radar_columns <= 0:
        num_radar_columns = None

    first_time_unix_sec = time_conversion.get_start_of_spc_date(
        first_spc_date_string)
    last_time_unix_sec = time_conversion.get_end_of_spc_date(
        last_spc_date_string)

    # example_file_names = input_examples.find_many_example_files(
    #     top_directory_name=top_example_dir_name, shuffled=True,
    #     first_batch_number=0, last_batch_number=LARGE_INTEGER,
    #     raise_error_if_any_missing=False)

    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    this_example_dict = input_examples.read_example_file(
        netcdf_file_name=example_file_names[0], read_all_target_vars=True)

    sounding_field_names = this_example_dict[
        input_examples.SOUNDING_FIELDS_KEY]
    sounding_heights_m_agl = this_example_dict[
        input_examples.SOUNDING_HEIGHTS_KEY]

    if input_examples.REFL_IMAGE_MATRIX_KEY in this_example_dict:
        num_radar_dimensions = -1
    else:
        num_radar_dimensions = (len(
            this_example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY].shape) -
                                2)

    # TODO(thunderhoser): Put this in separate method.
    if num_radar_dimensions == 3:
        radar_field_names = this_example_dict[input_examples.RADAR_FIELDS_KEY]
        radar_heights_m_agl = this_example_dict[
            input_examples.RADAR_HEIGHTS_KEY]

        radar_field_name_by_pair = []
        radar_height_by_pair_m_agl = numpy.array([], dtype=int)

        for this_field_name in radar_field_names:
            radar_field_name_by_pair += ([this_field_name] *
                                         len(radar_heights_m_agl))
            radar_height_by_pair_m_agl = numpy.concatenate(
                (radar_height_by_pair_m_agl, radar_heights_m_agl))

    elif num_radar_dimensions == 2:
        radar_field_name_by_pair = this_example_dict[
            input_examples.RADAR_FIELDS_KEY]
        radar_height_by_pair_m_agl = this_example_dict[
            input_examples.RADAR_HEIGHTS_KEY]

        radar_field_names = list(set(radar_field_name_by_pair))
        radar_field_names.sort()

    else:
        az_shear_field_names = this_example_dict[
            input_examples.RADAR_FIELDS_KEY]
        radar_field_names = [radar_utils.REFL_NAME] + az_shear_field_names

        refl_heights_m_agl = this_example_dict[
            input_examples.RADAR_HEIGHTS_KEY]
        radar_field_name_by_pair = (
            [radar_utils.REFL_NAME] * len(refl_heights_m_agl) +
            az_shear_field_names)

        az_shear_heights_m_agl = numpy.full(len(az_shear_field_names),
                                            radar_utils.SHEAR_HEIGHT_M_ASL)
        radar_height_by_pair_m_agl = numpy.concatenate(
            (refl_heights_m_agl, az_shear_heights_m_agl)).astype(int)

    # Initialize parameters.
    orig_parameter_dict = {
        NUM_VALUES_KEY: 0,
        MEAN_VALUE_KEY: 0.,
        MEAN_OF_SQUARES_KEY: 0.
    }

    radar_z_score_dict_no_height = {}
    radar_z_score_dict_with_height = {}
    radar_freq_dict_no_height = {}
    num_radar_fields = len(radar_field_names)
    num_radar_field_height_pairs = len(radar_field_name_by_pair)

    for j in range(num_radar_fields):
        radar_z_score_dict_no_height[radar_field_names[j]] = copy.deepcopy(
            orig_parameter_dict)
        radar_freq_dict_no_height[radar_field_names[j]] = {}

    for k in range(num_radar_field_height_pairs):
        radar_z_score_dict_with_height[
            radar_field_name_by_pair[k],
            radar_height_by_pair_m_agl[k]] = copy.deepcopy(orig_parameter_dict)

    sounding_z_score_dict_no_height = {}
    sounding_z_score_dict_with_height = {}
    sounding_freq_dict_no_height = {}
    num_sounding_fields = len(sounding_field_names)
    num_sounding_heights = len(sounding_heights_m_agl)

    for j in range(num_sounding_fields):
        sounding_z_score_dict_no_height[sounding_field_names[j]] = (
            copy.deepcopy(orig_parameter_dict))
        sounding_freq_dict_no_height[sounding_field_names[j]] = {}

        for k in range(num_sounding_heights):
            sounding_z_score_dict_with_height[
                sounding_field_names[j],
                sounding_heights_m_agl[k]] = copy.deepcopy(orig_parameter_dict)

    for this_example_file_name in example_file_names:
        print('Reading data from: "{0:s}"...'.format(this_example_file_name))
        this_example_dict = input_examples.read_example_file(
            netcdf_file_name=this_example_file_name,
            read_all_target_vars=True,
            num_rows_to_keep=num_radar_rows,
            num_columns_to_keep=num_radar_columns,
            first_time_to_keep_unix_sec=first_time_unix_sec,
            last_time_to_keep_unix_sec=last_time_unix_sec)

        this_num_examples = len(this_example_dict[input_examples.FULL_IDS_KEY])
        if this_num_examples == 0:
            continue

        for j in range(num_radar_fields):
            print('Updating normalization params for "{0:s}"...'.format(
                radar_field_names[j]))

            if num_radar_dimensions == 3:
                this_field_index = this_example_dict[
                    input_examples.RADAR_FIELDS_KEY].index(
                        radar_field_names[j])

                this_radar_matrix = this_example_dict[
                    input_examples.RADAR_IMAGE_MATRIX_KEY][...,
                                                           this_field_index]

            elif num_radar_dimensions == 2:
                all_field_names = numpy.array(
                    this_example_dict[input_examples.RADAR_FIELDS_KEY])

                these_field_indices = numpy.where(
                    all_field_names == radar_field_names[j])[0]

                this_radar_matrix = this_example_dict[
                    input_examples.RADAR_IMAGE_MATRIX_KEY][...,
                                                           these_field_indices]

            else:
                if radar_field_names[j] == radar_utils.REFL_NAME:
                    this_radar_matrix = this_example_dict[
                        input_examples.REFL_IMAGE_MATRIX_KEY][..., 0]
                else:
                    this_field_index = this_example_dict[
                        input_examples.RADAR_FIELDS_KEY].index(
                            radar_field_names[j])

                    this_radar_matrix = this_example_dict[
                        input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY][
                            ..., this_field_index]

            radar_z_score_dict_no_height[radar_field_names[j]] = (
                _update_z_score_params(
                    z_score_param_dict=radar_z_score_dict_no_height[
                        radar_field_names[j]],
                    new_data_matrix=this_radar_matrix))

            radar_freq_dict_no_height[radar_field_names[j]] = (
                _update_frequency_dict(
                    frequency_dict=radar_freq_dict_no_height[
                        radar_field_names[j]],
                    new_data_matrix=this_radar_matrix,
                    rounding_base=RADAR_INTERVAL_DICT[radar_field_names[j]]))

        for k in range(num_radar_field_height_pairs):
            print(('Updating normalization params for "{0:s}" at {1:d} metres '
                   'AGL...').format(radar_field_name_by_pair[k],
                                    radar_height_by_pair_m_agl[k]))

            if num_radar_dimensions == 3:
                this_field_index = this_example_dict[
                    input_examples.RADAR_FIELDS_KEY].index(
                        radar_field_name_by_pair[k])

                this_height_index = numpy.where(
                    this_example_dict[input_examples.RADAR_HEIGHTS_KEY] ==
                    radar_height_by_pair_m_agl[k])[0][0]

                this_radar_matrix = this_example_dict[
                    input_examples.RADAR_IMAGE_MATRIX_KEY][...,
                                                           this_height_index,
                                                           this_field_index]

            elif num_radar_dimensions == 2:
                all_field_names = numpy.array(
                    this_example_dict[input_examples.RADAR_FIELDS_KEY])
                all_heights_m_agl = this_example_dict[
                    input_examples.RADAR_HEIGHTS_KEY]

                this_index = numpy.where(
                    numpy.logical_and(
                        all_field_names == radar_field_name_by_pair[k],
                        all_heights_m_agl ==
                        radar_height_by_pair_m_agl[k]))[0][0]

                this_radar_matrix = this_example_dict[
                    input_examples.RADAR_IMAGE_MATRIX_KEY][..., this_index]

            else:
                if radar_field_name_by_pair[k] == radar_utils.REFL_NAME:
                    this_height_index = numpy.where(
                        this_example_dict[input_examples.RADAR_HEIGHTS_KEY] ==
                        radar_height_by_pair_m_agl[k])[0][0]

                    this_radar_matrix = this_example_dict[
                        input_examples.REFL_IMAGE_MATRIX_KEY][
                            ..., this_height_index, 0]
                else:
                    this_field_index = this_example_dict[
                        input_examples.RADAR_FIELDS_KEY].index(
                            radar_field_name_by_pair[k])

                    this_radar_matrix = this_example_dict[
                        input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY][
                            ..., this_field_index]

            radar_z_score_dict_with_height[
                radar_field_name_by_pair[k],
                radar_height_by_pair_m_agl[k]] = _update_z_score_params(
                    z_score_param_dict=radar_z_score_dict_with_height[
                        radar_field_name_by_pair[k],
                        radar_height_by_pair_m_agl[k]],
                    new_data_matrix=this_radar_matrix)

        for j in range(num_sounding_fields):
            print('Updating normalization params for "{0:s}"...'.format(
                sounding_field_names[j]))

            this_field_index = this_example_dict[
                input_examples.SOUNDING_FIELDS_KEY].index(
                    sounding_field_names[j])

            this_sounding_matrix = this_example_dict[
                input_examples.SOUNDING_MATRIX_KEY][..., this_field_index]

            sounding_z_score_dict_no_height[sounding_field_names[j]] = (
                _update_z_score_params(
                    z_score_param_dict=sounding_z_score_dict_no_height[
                        sounding_field_names[j]],
                    new_data_matrix=this_sounding_matrix))

            sounding_freq_dict_no_height[sounding_field_names[j]] = (
                _update_frequency_dict(
                    frequency_dict=sounding_freq_dict_no_height[
                        sounding_field_names[j]],
                    new_data_matrix=this_sounding_matrix,
                    rounding_base=SOUNDING_INTERVAL_DICT[
                        sounding_field_names[j]]))

            for k in range(num_sounding_heights):
                this_height_index = numpy.where(
                    this_example_dict[input_examples.SOUNDING_HEIGHTS_KEY] ==
                    sounding_heights_m_agl[k])[0][0]

                this_sounding_matrix = this_example_dict[
                    input_examples.SOUNDING_MATRIX_KEY][..., this_height_index,
                                                        this_field_index]

                print(('Updating normalization params for "{0:s}" at {1:d} m '
                       'AGL...').format(sounding_field_names[j],
                                        sounding_heights_m_agl[k]))

                sounding_z_score_dict_with_height[
                    sounding_field_names[j],
                    sounding_heights_m_agl[k]] = _update_z_score_params(
                        z_score_param_dict=sounding_z_score_dict_with_height[
                            sounding_field_names[j],
                            sounding_heights_m_agl[k]],
                        new_data_matrix=this_sounding_matrix)

        print(SEPARATOR_STRING)

    # Convert dictionaries to pandas DataFrames.
    radar_table_no_height = _convert_normalization_params(
        z_score_dict_dict=radar_z_score_dict_no_height,
        frequency_dict_dict=radar_freq_dict_no_height,
        min_percentile_level=min_percentile_level,
        max_percentile_level=max_percentile_level)

    print('Normalization params for each radar field:\n{0:s}\n\n'.format(
        str(radar_table_no_height)))

    radar_table_with_height = _convert_normalization_params(
        z_score_dict_dict=radar_z_score_dict_with_height)

    print(('Normalization params for each radar field/height pair:\n{0:s}\n\n'
           ).format(str(radar_table_with_height)))

    sounding_table_no_height = _convert_normalization_params(
        z_score_dict_dict=sounding_z_score_dict_no_height,
        frequency_dict_dict=sounding_freq_dict_no_height,
        min_percentile_level=min_percentile_level,
        max_percentile_level=max_percentile_level)

    print('Normalization params for each sounding field:\n{0:s}\n\n'.format(
        str(sounding_table_no_height)))

    sounding_table_with_height = _convert_normalization_params(
        z_score_dict_dict=sounding_z_score_dict_with_height)

    print(
        ('Normalization params for each sounding field/height pair:\n{0:s}\n\n'
         ).format(str(sounding_table_with_height)))

    print('Writing normalization params to file: "{0:s}"...'.format(
        output_file_name))

    dl_utils.write_normalization_params(
        pickle_file_name=output_file_name,
        radar_table_no_height=radar_table_no_height,
        radar_table_with_height=radar_table_with_height,
        sounding_table_no_height=sounding_table_no_height,
        sounding_table_with_height=sounding_table_with_height)
Esempio n. 8
0
def _find_examples_to_read(option_dict, num_examples_total):
    """Determines which examples to read.

    E = number of examples to read

    :param option_dict: See doc for any generator in this file.
    :param num_examples_total: Number of examples to generate.
    :return: storm_ids: length-E list of storm IDs (strings).
    :return: storm_times_unix_sec: length-E numpy array of storm times.
    """

    error_checking.assert_is_integer(num_examples_total)
    error_checking.assert_is_greater(num_examples_total, 0)

    example_file_names = option_dict[trainval_io.EXAMPLE_FILES_KEY]

    radar_field_names = option_dict[trainval_io.RADAR_FIELDS_KEY]
    radar_heights_m_agl = option_dict[trainval_io.RADAR_HEIGHTS_KEY]
    first_storm_time_unix_sec = option_dict[trainval_io.FIRST_STORM_TIME_KEY]
    last_storm_time_unix_sec = option_dict[trainval_io.LAST_STORM_TIME_KEY]
    num_grid_rows = option_dict[trainval_io.NUM_ROWS_KEY]
    num_grid_columns = option_dict[trainval_io.NUM_COLUMNS_KEY]

    class_to_sampling_fraction_dict = option_dict[
        trainval_io.SAMPLING_FRACTIONS_KEY]

    storm_ids = []
    storm_times_unix_sec = numpy.array([], dtype=int)
    target_values = numpy.array([], dtype=int)

    target_name = None
    num_files = len(example_file_names)

    for i in range(num_files):
        print 'Reading target values from: "{0:s}"...'.format(
            example_file_names[i])

        this_example_dict = input_examples.read_example_file(
            netcdf_file_name=example_file_names[i], include_soundings=False,
            radar_field_names_to_keep=[radar_field_names[0]],
            radar_heights_to_keep_m_agl=radar_heights_m_agl[[0]],
            first_time_to_keep_unix_sec=first_storm_time_unix_sec,
            last_time_to_keep_unix_sec=last_storm_time_unix_sec,
            num_rows_to_keep=num_grid_rows,
            num_columns_to_keep=num_grid_columns)

        target_name = this_example_dict[input_examples.TARGET_NAME_KEY]

        storm_ids += this_example_dict[input_examples.STORM_IDS_KEY]
        storm_times_unix_sec = numpy.concatenate((
            storm_times_unix_sec,
            this_example_dict[input_examples.STORM_TIMES_KEY]
        ))
        target_values = numpy.concatenate((
            target_values, this_example_dict[input_examples.TARGET_VALUES_KEY]
        ))

    indices_to_keep = numpy.where(
        target_values != target_val_utils.INVALID_STORM_INTEGER
    )[0]

    storm_ids = [storm_ids[k] for k in indices_to_keep]
    storm_times_unix_sec = storm_times_unix_sec[indices_to_keep]
    target_values = target_values[indices_to_keep]
    num_examples_found = len(storm_ids)

    if class_to_sampling_fraction_dict is None:
        indices_to_keep = numpy.linspace(
            0, num_examples_found - 1, num=num_examples_found, dtype=int)

        if num_examples_found > num_examples_total:
            indices_to_keep = numpy.random.choice(
                indices_to_keep, size=num_examples_total, replace=False)
    else:
        indices_to_keep = dl_utils.sample_by_class(
            sampling_fraction_by_class_dict=class_to_sampling_fraction_dict,
            target_name=target_name, target_values=target_values,
            num_examples_total=num_examples_total)

    storm_ids = [storm_ids[k] for k in indices_to_keep]
    storm_times_unix_sec = storm_times_unix_sec[indices_to_keep]

    return storm_ids, storm_times_unix_sec
Esempio n. 9
0
def gridrad_generator_2d_reduced(option_dict, list_of_operation_dicts,
                                 num_examples_total):
    """Generates examples with 2-D GridRad images.

    These 2-D images are produced by applying layer operations to the native 3-D
    images.  The layer operations are specified by `list_of_operation_dicts`.

    Each example (storm object) consists of the following:

    - Storm-centered radar images (one 2-D image for each layer operation)
    - Storm-centered sounding (optional)
    - Target value (class)

    :param option_dict: Dictionary with the following keys.
    option_dict['example_file_names']: See doc for
        `training_validation_io.gridrad_generator_2d_reduced`.
    option_dict['binarize_target']: Same.
    option_dict['sounding_field_names']: Same.
    option_dict['sounding_heights_m_agl']: Same.
    option_dict['first_storm_time_unix_sec']: Same.
    option_dict['last_storm_time_unix_sec']: Same.
    option_dict['num_grid_rows']: Same.
    option_dict['num_grid_columns']: Same.
    option_dict['normalization_type_string']: Same.
    option_dict['normalization_param_file_name']: Same.
    option_dict['min_normalized_value']: Same.
    option_dict['max_normalized_value']: Same.
    option_dict['class_to_sampling_fraction_dict']: Same.

    :param list_of_operation_dicts: See doc for
        `input_examples.reduce_examples_3d_to_2d`.
    :param num_examples_total: Number of examples to generate.

    :return: storm_object_dict: Dictionary with the following keys.
    storm_object_dict['list_of_input_matrices']: length-T list of numpy arrays,
        where T = number of input tensors to model.  The first axis of each
        array has length E.
    storm_object_dict['storm_ids']: length-E list of storm IDs.
    storm_object_dict['storm_times_unix_sec']: length-E numpy array of storm
        times.
    storm_object_dict['target_array']: See output doc for
        `training_validation_io.gridrad_generator_2d_reduced`.
    storm_object_dict['sounding_pressure_matrix_pascals']: numpy array (E x H_s)
        of pressures.  If soundings were not read, this is None.
    storm_object_dict['radar_field_names']: length-C list of field names, where
        the [j]th item corresponds to the [j]th channel of the 2-D radar images
        returned in "list_of_input_matrices".
    storm_object_dict['min_radar_heights_m_agl']: length-C numpy array with
        minimum height for each layer operation (used to reduce 3-D radar images
        to 2-D).
    storm_object_dict['max_radar_heights_m_agl']: Same but with max heights.
    storm_object_dict['radar_layer_operation_names']: length-C list with names
        of layer operations.  Each name must be accepted by
        `input_examples._check_layer_operation`.
    """

    unique_radar_field_names, unique_radar_heights_m_agl = (
        trainval_io.layer_ops_to_field_height_pairs(list_of_operation_dicts)
    )

    option_dict[trainval_io.RADAR_FIELDS_KEY] = unique_radar_field_names
    option_dict[trainval_io.RADAR_HEIGHTS_KEY] = unique_radar_heights_m_agl

    storm_ids, storm_times_unix_sec = _find_examples_to_read(
        option_dict=option_dict, num_examples_total=num_examples_total)
    print '\n'

    example_file_names = option_dict[trainval_io.EXAMPLE_FILES_KEY]

    first_storm_time_unix_sec = option_dict[trainval_io.FIRST_STORM_TIME_KEY]
    last_storm_time_unix_sec = option_dict[trainval_io.LAST_STORM_TIME_KEY]
    num_grid_rows = option_dict[trainval_io.NUM_ROWS_KEY]
    num_grid_columns = option_dict[trainval_io.NUM_COLUMNS_KEY]
    sounding_field_names = option_dict[trainval_io.SOUNDING_FIELDS_KEY]
    sounding_heights_m_agl = option_dict[trainval_io.SOUNDING_HEIGHTS_KEY]

    normalization_type_string = option_dict[trainval_io.NORMALIZATION_TYPE_KEY]
    normalization_param_file_name = option_dict[
        trainval_io.NORMALIZATION_FILE_KEY]
    min_normalized_value = option_dict[trainval_io.MIN_NORMALIZED_VALUE_KEY]
    max_normalized_value = option_dict[trainval_io.MAX_NORMALIZED_VALUE_KEY]

    binarize_target = option_dict[trainval_io.BINARIZE_TARGET_KEY]

    this_example_dict = input_examples.read_example_file(
        netcdf_file_name=example_file_names[0], metadata_only=True)
    target_name = this_example_dict[input_examples.TARGET_NAME_KEY]

    num_classes = target_val_utils.target_name_to_num_classes(
        target_name=target_name, include_dead_storms=False)

    if sounding_field_names is None:
        sounding_field_names_to_read = None
    else:
        if soundings.PRESSURE_NAME in sounding_field_names:
            sounding_field_names_to_read = sounding_field_names + []
        else:
            sounding_field_names_to_read = (
                sounding_field_names + [soundings.PRESSURE_NAME]
            )

    radar_image_matrix = None
    sounding_matrix = None
    target_values = None
    sounding_pressure_matrix_pascals = None

    reduction_metadata_dict = {}
    file_index = 0

    while True:
        if file_index >= len(example_file_names):
            raise StopIteration

        print 'Reading data from: "{0:s}"...'.format(
            example_file_names[file_index])

        this_example_dict = input_examples.read_example_file(
            netcdf_file_name=example_file_names[file_index],
            include_soundings=sounding_field_names is not None,
            radar_field_names_to_keep=unique_radar_field_names,
            radar_heights_to_keep_m_agl=unique_radar_heights_m_agl,
            sounding_field_names_to_keep=sounding_field_names_to_read,
            sounding_heights_to_keep_m_agl=sounding_heights_m_agl,
            first_time_to_keep_unix_sec=first_storm_time_unix_sec,
            last_time_to_keep_unix_sec=last_storm_time_unix_sec,
            num_rows_to_keep=num_grid_rows,
            num_columns_to_keep=num_grid_columns)

        file_index += 1
        if this_example_dict is None:
            continue

        indices_to_keep = tracking_utils.find_storm_objects(
            all_storm_ids=this_example_dict[input_examples.STORM_IDS_KEY],
            all_times_unix_sec=this_example_dict[
                input_examples.STORM_TIMES_KEY],
            storm_ids_to_keep=storm_ids,
            times_to_keep_unix_sec=storm_times_unix_sec, allow_missing=True)

        indices_to_keep = indices_to_keep[indices_to_keep >= 0]
        if len(indices_to_keep) == 0:
            continue

        this_example_dict = input_examples.subset_examples(
            example_dict=this_example_dict, indices_to_keep=indices_to_keep)

        this_example_dict = input_examples.reduce_examples_3d_to_2d(
            example_dict=this_example_dict,
            list_of_operation_dicts=list_of_operation_dicts)

        radar_field_names_2d = this_example_dict[
            input_examples.RADAR_FIELDS_KEY]
        for this_key in REDUCTION_METADATA_KEYS:
            reduction_metadata_dict[this_key] = this_example_dict[this_key]

        include_soundings = (
            input_examples.SOUNDING_MATRIX_KEY in this_example_dict)

        if include_soundings:
            pressure_index = this_example_dict[
                input_examples.SOUNDING_FIELDS_KEY
            ].index(soundings.PRESSURE_NAME)

            this_pressure_matrix_pascals = this_example_dict[
                input_examples.SOUNDING_MATRIX_KEY][..., pressure_index]

            this_sounding_matrix = this_example_dict[
                input_examples.SOUNDING_MATRIX_KEY]
            if soundings.PRESSURE_NAME not in sounding_field_names:
                this_sounding_matrix = this_sounding_matrix[..., :-1]

        if target_values is None:
            radar_image_matrix = (
                this_example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY]
                + 0.
            )
            target_values = (
                this_example_dict[input_examples.TARGET_VALUES_KEY] + 0)

            if include_soundings:
                sounding_matrix = this_sounding_matrix + 0.
                sounding_pressure_matrix_pascals = (
                    this_pressure_matrix_pascals + 0.)
        else:
            radar_image_matrix = numpy.concatenate(
                (radar_image_matrix,
                 this_example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY]),
                axis=0)
            target_values = numpy.concatenate((
                target_values,
                this_example_dict[input_examples.TARGET_VALUES_KEY]
            ))

            if include_soundings:
                sounding_matrix = numpy.concatenate(
                    (sounding_matrix, this_sounding_matrix), axis=0)
                sounding_pressure_matrix_pascals = numpy.concatenate(
                    (sounding_pressure_matrix_pascals,
                     this_pressure_matrix_pascals), axis=0)

        if normalization_type_string is not None:
            radar_image_matrix = dl_utils.normalize_radar_images(
                radar_image_matrix=radar_image_matrix,
                field_names=radar_field_names_2d,
                normalization_type_string=normalization_type_string,
                normalization_param_file_name=normalization_param_file_name,
                min_normalized_value=min_normalized_value,
                max_normalized_value=max_normalized_value).astype('float32')

            if include_soundings:
                sounding_matrix = dl_utils.normalize_soundings(
                    sounding_matrix=sounding_matrix,
                    field_names=sounding_field_names,
                    normalization_type_string=normalization_type_string,
                    normalization_param_file_name=normalization_param_file_name,
                    min_normalized_value=min_normalized_value,
                    max_normalized_value=max_normalized_value).astype('float32')

        list_of_predictor_matrices = [radar_image_matrix]
        if include_soundings:
            list_of_predictor_matrices.append(sounding_matrix)

        target_array = _finalize_targets(
            target_values=target_values, binarize_target=binarize_target,
            num_classes=num_classes)

        storm_object_dict = {
            INPUT_MATRICES_KEY: list_of_predictor_matrices,
            TARGET_ARRAY_KEY: target_array,
            STORM_IDS_KEY: this_example_dict[input_examples.STORM_IDS_KEY],
            STORM_TIMES_KEY: this_example_dict[input_examples.STORM_TIMES_KEY],
            SOUNDING_PRESSURES_KEY:
                copy.deepcopy(sounding_pressure_matrix_pascals)
        }

        for this_key in REDUCTION_METADATA_KEYS:
            storm_object_dict[this_key] = reduction_metadata_dict[this_key]

        radar_image_matrix = None
        sounding_matrix = None
        target_values = None
        sounding_pressure_matrix_pascals = None

        yield storm_object_dict
Esempio n. 10
0
def myrorss_generator_2d3d(option_dict, num_examples_total):
    """Generates examples with both 2-D and 3-D radar images.

    Each example (storm object) consists of the following:

    - Storm-centered azimuthal shear (one 2-D image for each field)
    - Storm-centered reflectivity (one 3-D image)
    - Storm-centered sounding (optional)
    - Target value (class)

    :param option_dict: Dictionary with the following keys.
    option_dict['example_file_names']: See doc for
        `training_validation_io.myrorss_generator_2d3d`.
    option_dict['binarize_target']: Same.
    option_dict['radar_field_names']: Same.
    option_dict['radar_heights_m_agl']: Same.
    option_dict['sounding_field_names']: Same.
    option_dict['sounding_heights_m_agl']: Same.
    option_dict['first_storm_time_unix_sec']: Same.
    option_dict['last_storm_time_unix_sec']: Same.
    option_dict['num_grid_rows']: Same.
    option_dict['num_grid_columns']: Same.
    option_dict['normalization_type_string']: See doc for `generator_2d_or_3d`.
    option_dict['normalization_param_file_name']: Same.
    option_dict['min_normalized_value']: Same.
    option_dict['max_normalized_value']: Same.
    option_dict['class_to_sampling_fraction_dict']: Same.

    :param num_examples_total: Total number of examples to generate.

    :return: storm_object_dict: Dictionary with the following keys.
    storm_object_dict['list_of_input_matrices']: length-T list of numpy arrays,
        where T = number of input tensors to model.  The first axis of each
        array has length E.
    storm_object_dict['storm_ids']: length-E list of storm IDs.
    storm_object_dict['storm_times_unix_sec']: length-E numpy array of storm
        times.
    storm_object_dict['target_array']: See output doc for
        `training_validation_io.myrorss_generator_2d3d`.
    storm_object_dict['sounding_pressure_matrix_pascals']: numpy array (E x H_s)
        of pressures.  If soundings were not read, this is None.
    """

    storm_ids, storm_times_unix_sec = _find_examples_to_read(
        option_dict=option_dict, num_examples_total=num_examples_total)
    print '\n'

    example_file_names = option_dict[trainval_io.EXAMPLE_FILES_KEY]

    first_storm_time_unix_sec = option_dict[trainval_io.FIRST_STORM_TIME_KEY]
    last_storm_time_unix_sec = option_dict[trainval_io.LAST_STORM_TIME_KEY]
    num_grid_rows = option_dict[trainval_io.NUM_ROWS_KEY]
    num_grid_columns = option_dict[trainval_io.NUM_COLUMNS_KEY]
    azimuthal_shear_field_names = option_dict[trainval_io.RADAR_FIELDS_KEY]
    reflectivity_heights_m_agl = option_dict[trainval_io.RADAR_HEIGHTS_KEY]
    sounding_field_names = option_dict[trainval_io.SOUNDING_FIELDS_KEY]
    sounding_heights_m_agl = option_dict[trainval_io.SOUNDING_HEIGHTS_KEY]

    normalization_type_string = option_dict[trainval_io.NORMALIZATION_TYPE_KEY]
    normalization_param_file_name = option_dict[
        trainval_io.NORMALIZATION_FILE_KEY]
    min_normalized_value = option_dict[trainval_io.MIN_NORMALIZED_VALUE_KEY]
    max_normalized_value = option_dict[trainval_io.MAX_NORMALIZED_VALUE_KEY]

    binarize_target = option_dict[trainval_io.BINARIZE_TARGET_KEY]

    this_example_dict = input_examples.read_example_file(
        netcdf_file_name=example_file_names[0], metadata_only=True)
    target_name = this_example_dict[input_examples.TARGET_NAME_KEY]

    num_classes = target_val_utils.target_name_to_num_classes(
        target_name=target_name, include_dead_storms=False)

    if sounding_field_names is None:
        sounding_field_names_to_read = None
    else:
        if soundings.PRESSURE_NAME in sounding_field_names:
            sounding_field_names_to_read = sounding_field_names + []
        else:
            sounding_field_names_to_read = (
                sounding_field_names + [soundings.PRESSURE_NAME]
            )

    reflectivity_image_matrix_dbz = None
    az_shear_image_matrix_s01 = None
    sounding_matrix = None
    target_values = None
    sounding_pressure_matrix_pascals = None
    file_index = 0

    while True:
        if file_index >= len(example_file_names):
            raise StopIteration

        print 'Reading data from: "{0:s}"...'.format(
            example_file_names[file_index])

        this_example_dict = input_examples.read_example_file(
            netcdf_file_name=example_file_names[file_index],
            include_soundings=sounding_field_names is not None,
            radar_field_names_to_keep=azimuthal_shear_field_names,
            radar_heights_to_keep_m_agl=reflectivity_heights_m_agl,
            sounding_field_names_to_keep=sounding_field_names_to_read,
            sounding_heights_to_keep_m_agl=sounding_heights_m_agl,
            first_time_to_keep_unix_sec=first_storm_time_unix_sec,
            last_time_to_keep_unix_sec=last_storm_time_unix_sec,
            num_rows_to_keep=num_grid_rows,
            num_columns_to_keep=num_grid_columns)

        file_index += 1
        if this_example_dict is None:
            continue

        indices_to_keep = tracking_utils.find_storm_objects(
            all_storm_ids=this_example_dict[input_examples.STORM_IDS_KEY],
            all_times_unix_sec=this_example_dict[
                input_examples.STORM_TIMES_KEY],
            storm_ids_to_keep=storm_ids,
            times_to_keep_unix_sec=storm_times_unix_sec, allow_missing=True)

        indices_to_keep = indices_to_keep[indices_to_keep >= 0]
        if len(indices_to_keep) == 0:
            continue

        this_example_dict = input_examples.subset_examples(
            example_dict=this_example_dict, indices_to_keep=indices_to_keep)

        include_soundings = (
            input_examples.SOUNDING_MATRIX_KEY in this_example_dict)

        if include_soundings:
            pressure_index = this_example_dict[
                input_examples.SOUNDING_FIELDS_KEY
            ].index(soundings.PRESSURE_NAME)

            this_pressure_matrix_pascals = this_example_dict[
                input_examples.SOUNDING_MATRIX_KEY][..., pressure_index]

            this_sounding_matrix = this_example_dict[
                input_examples.SOUNDING_MATRIX_KEY]
            if soundings.PRESSURE_NAME not in sounding_field_names:
                this_sounding_matrix = this_sounding_matrix[..., -1]

        if target_values is None:
            reflectivity_image_matrix_dbz = (
                this_example_dict[input_examples.REFL_IMAGE_MATRIX_KEY] + 0.
            )
            az_shear_image_matrix_s01 = (
                this_example_dict[input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY]
                + 0.
            )
            target_values = (
                this_example_dict[input_examples.TARGET_VALUES_KEY] + 0)

            if include_soundings:
                sounding_matrix = this_sounding_matrix + 0.
                sounding_pressure_matrix_pascals = (
                    this_pressure_matrix_pascals + 0.)
        else:
            reflectivity_image_matrix_dbz = numpy.concatenate(
                (reflectivity_image_matrix_dbz,
                 this_example_dict[input_examples.REFL_IMAGE_MATRIX_KEY]),
                axis=0)
            az_shear_image_matrix_s01 = numpy.concatenate((
                az_shear_image_matrix_s01,
                this_example_dict[input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY]
            ), axis=0)
            target_values = numpy.concatenate((
                target_values,
                this_example_dict[input_examples.TARGET_VALUES_KEY]
            ))

            if include_soundings:
                sounding_matrix = numpy.concatenate(
                    (sounding_matrix, this_sounding_matrix), axis=0)
                sounding_pressure_matrix_pascals = numpy.concatenate(
                    (sounding_pressure_matrix_pascals,
                     this_pressure_matrix_pascals), axis=0)

        if normalization_type_string is not None:
            reflectivity_image_matrix_dbz = dl_utils.normalize_radar_images(
                radar_image_matrix=reflectivity_image_matrix_dbz,
                field_names=[radar_utils.REFL_NAME],
                normalization_type_string=normalization_type_string,
                normalization_param_file_name=normalization_param_file_name,
                min_normalized_value=min_normalized_value,
                max_normalized_value=max_normalized_value).astype('float32')

            az_shear_image_matrix_s01 = dl_utils.normalize_radar_images(
                radar_image_matrix=az_shear_image_matrix_s01,
                field_names=azimuthal_shear_field_names,
                normalization_type_string=normalization_type_string,
                normalization_param_file_name=normalization_param_file_name,
                min_normalized_value=min_normalized_value,
                max_normalized_value=max_normalized_value).astype('float32')

            if include_soundings:
                sounding_matrix = dl_utils.normalize_soundings(
                    sounding_matrix=sounding_matrix,
                    field_names=sounding_field_names,
                    normalization_type_string=normalization_type_string,
                    normalization_param_file_name=normalization_param_file_name,
                    min_normalized_value=min_normalized_value,
                    max_normalized_value=max_normalized_value).astype('float32')

        list_of_predictor_matrices = [
            reflectivity_image_matrix_dbz, az_shear_image_matrix_s01
        ]
        if include_soundings:
            list_of_predictor_matrices.append(sounding_matrix)

        target_array = _finalize_targets(
            target_values=target_values, binarize_target=binarize_target,
            num_classes=num_classes)

        storm_object_dict = {
            INPUT_MATRICES_KEY: list_of_predictor_matrices,
            TARGET_ARRAY_KEY: target_array,
            STORM_IDS_KEY: this_example_dict[input_examples.STORM_IDS_KEY],
            STORM_TIMES_KEY: this_example_dict[input_examples.STORM_TIMES_KEY],
            SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pascals + 0.
        }

        reflectivity_image_matrix_dbz = None
        az_shear_image_matrix_s01 = None
        sounding_matrix = None
        target_values = None
        sounding_pressure_matrix_pascals = None

        yield storm_object_dict
Esempio n. 11
0
def _run(example_file_name, example_indices, num_radar_rows, num_radar_columns,
         normalization_file_name, output_dir_name):
    """Makes figure to explain one convolution block.

    This is effectively the main method.

    :param example_file_name: See documentation at top of file.
    :param example_indices: Same.
    :param num_radar_rows: Same.
    :param num_radar_columns: Same.
    :param normalization_file_name: Same.
    :param output_dir_name: Same.
    """

    if num_radar_rows <= 0:
        num_radar_rows = None
    if num_radar_columns <= 0:
        num_radar_columns = None

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    print('Reading data from: "{0:s}"...'.format(example_file_name))
    example_dict = input_examples.read_example_file(
        netcdf_file_name=example_file_name, read_all_target_vars=False,
        target_name=DUMMY_TARGET_NAME, include_soundings=False,
        num_rows_to_keep=num_radar_rows, num_columns_to_keep=num_radar_columns,
        radar_heights_to_keep_m_agl=numpy.array([RADAR_HEIGHT_M_AGL], dtype=int)
    )

    if input_examples.REFL_IMAGE_MATRIX_KEY in example_dict:
        input_feature_matrix = example_dict[
            input_examples.REFL_IMAGE_MATRIX_KEY]
    else:
        field_index = example_dict[input_examples.RADAR_FIELDS_KEY].index(
            RADAR_FIELD_NAME
        )

        input_feature_matrix = example_dict[
            input_examples.RADAR_IMAGE_MATRIX_KEY
        ][..., [field_index]]

    num_examples = input_feature_matrix.shape[0]
    error_checking.assert_is_geq_numpy_array(example_indices, 0)
    error_checking.assert_is_less_than_numpy_array(
        example_indices, num_examples)

    input_feature_matrix = dl_utils.normalize_radar_images(
        radar_image_matrix=input_feature_matrix, field_names=[RADAR_FIELD_NAME],
        normalization_type_string=NORMALIZATION_TYPE_STRING,
        normalization_param_file_name=normalization_file_name)

    if len(input_feature_matrix.shape) == 4:
        input_feature_matrix = input_feature_matrix[..., 0]
    else:
        input_feature_matrix = input_feature_matrix[..., 0, 0]

    input_feature_matrix = numpy.expand_dims(input_feature_matrix, axis=-1)

    print('Doing convolution for all {0:d} examples...'.format(num_examples))
    feature_matrix_after_conv = None

    for i in range(num_examples):
        this_feature_matrix = standalone_utils.do_2d_convolution(
            feature_matrix=input_feature_matrix[i, ...] + 0,
            kernel_matrix=KERNEL_MATRIX, pad_edges=False, stride_length_px=1
        )[0, ...]

        if feature_matrix_after_conv is None:
            feature_matrix_after_conv = numpy.full(
                (num_examples,) + this_feature_matrix.shape, numpy.nan
            )

        feature_matrix_after_conv[i, ...] = this_feature_matrix

    print('Doing activation for all {0:d} examples...'.format(num_examples))
    feature_matrix_after_activn = standalone_utils.do_activation(
        input_values=feature_matrix_after_conv + 0,
        function_name=architecture_utils.RELU_FUNCTION_STRING, alpha=0.2)

    print('Doing batch norm for all {0:d} examples...'.format(num_examples))
    feature_matrix_after_bn = standalone_utils.do_batch_normalization(
        feature_matrix=feature_matrix_after_activn + 0
    )

    print('Doing max-pooling for all {0:d} examples...\n'.format(num_examples))
    feature_matrix_after_pooling = None

    for i in range(num_examples):
        this_feature_matrix = standalone_utils.do_2d_pooling(
            feature_matrix=feature_matrix_after_bn[i, ...], stride_length_px=2,
            pooling_type_string=standalone_utils.MAX_POOLING_TYPE_STRING
        )[0, ...]

        if feature_matrix_after_pooling is None:
            feature_matrix_after_pooling = numpy.full(
                (num_examples,) + this_feature_matrix.shape, numpy.nan
            )

        feature_matrix_after_pooling[i, ...] = this_feature_matrix

    for i in example_indices:
        this_output_file_name = '{0:s}/convolution_block{1:06d}.jpg'.format(
            output_dir_name, i)

        _plot_one_example(
            input_feature_matrix=input_feature_matrix[i, ...],
            feature_matrix_after_conv=feature_matrix_after_conv[i, ...],
            feature_matrix_after_activn=feature_matrix_after_activn[i, ...],
            feature_matrix_after_bn=feature_matrix_after_bn[i, ...],
            feature_matrix_after_pooling=feature_matrix_after_pooling[i, ...],
            output_file_name=this_output_file_name)
def _run(input_example_dir_name, storm_metafile_name, num_examples_in_subset,
         subset_randomly, output_example_file_name):
    """Extracts desired examples and writes them to one file.

    This is effectively the main method.

    :param input_example_dir_name: See documentation at top of file.
    :param storm_metafile_name: Same.
    :param num_examples_in_subset: Same.
    :param subset_randomly: Same.
    :param output_example_file_name: Same.
    """

    print(
        'Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name))
    example_id_strings, example_times_unix_sec = (
        tracking_io.read_ids_and_times(storm_metafile_name))

    if not 0 < num_examples_in_subset < len(example_id_strings):
        num_examples_in_subset = None

    if num_examples_in_subset is not None:
        if subset_randomly:
            these_indices = numpy.linspace(0,
                                           len(example_id_strings) - 1,
                                           num=len(example_id_strings),
                                           dtype=int)
            these_indices = numpy.random.choice(these_indices,
                                                size=num_examples_in_subset,
                                                replace=False)

            example_id_strings = [example_id_strings[k] for k in these_indices]
            example_times_unix_sec = example_times_unix_sec[these_indices]
        else:
            example_id_strings = example_id_strings[:num_examples_in_subset]
            example_times_unix_sec = (
                example_times_unix_sec[:num_examples_in_subset])

    example_spc_date_strings = numpy.array([
        time_conversion.time_to_spc_date_string(t)
        for t in example_times_unix_sec
    ])
    spc_date_strings = numpy.unique(example_spc_date_strings)

    example_file_name_by_day = [
        input_examples.find_example_file(
            top_directory_name=input_example_dir_name,
            shuffled=False,
            spc_date_string=d,
            raise_error_if_missing=True) for d in spc_date_strings
    ]

    num_days = len(spc_date_strings)

    for i in range(num_days):
        print('Reading data from: "{0:s}"...'.format(
            example_file_name_by_day[i]))
        all_example_dict = input_examples.read_example_file(
            netcdf_file_name=example_file_name_by_day[i],
            read_all_target_vars=True)

        these_indices = numpy.where(
            example_spc_date_strings == spc_date_strings[i])[0]

        desired_indices = tracking_utils.find_storm_objects(
            all_id_strings=all_example_dict[input_examples.FULL_IDS_KEY],
            all_times_unix_sec=all_example_dict[
                input_examples.STORM_TIMES_KEY],
            id_strings_to_keep=[example_id_strings[k] for k in these_indices],
            times_to_keep_unix_sec=example_times_unix_sec[these_indices],
            allow_missing=False)

        desired_example_dict = input_examples.subset_examples(
            example_dict=all_example_dict, indices_to_keep=desired_indices)

        print('Writing {0:d} desired examples to: "{1:s}"...'.format(
            len(desired_indices), output_example_file_name))
        input_examples.write_example_file(
            netcdf_file_name=output_example_file_name,
            example_dict=desired_example_dict,
            append_to_file=i > 0)
def _run(example_file_name, example_indices, num_radar_rows, num_radar_columns,
         normalization_file_name, output_dir_name):
    """Plots data augmentation.

    This is effectively the main method.

    :param example_file_name: See documentation at top of file.
    :param example_indices: Same.
    :param num_radar_rows: Same.
    :param num_radar_columns: Same.
    :param normalization_file_name: Same.
    :param output_dir_name: Same.
    """

    if num_radar_rows <= 0:
        num_radar_rows = None
    if num_radar_columns <= 0:
        num_radar_columns = None

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    print('Reading data from: "{0:s}"...'.format(example_file_name))
    example_dict = input_examples.read_example_file(
        netcdf_file_name=example_file_name,
        read_all_target_vars=True,
        include_soundings=False,
        num_rows_to_keep=num_radar_rows,
        num_columns_to_keep=num_radar_columns,
        radar_field_names_to_keep=[RADAR_FIELD_NAME],
        radar_heights_to_keep_m_agl=numpy.array([RADAR_HEIGHT_M_AGL],
                                                dtype=int))

    if input_examples.REFL_IMAGE_MATRIX_KEY in example_dict:
        radar_matrix = example_dict[input_examples.REFL_IMAGE_MATRIX_KEY]
    else:
        radar_matrix = example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY]

    num_examples_total = radar_matrix.shape[0]
    error_checking.assert_is_geq_numpy_array(example_indices, 0)
    error_checking.assert_is_less_than_numpy_array(example_indices,
                                                   num_examples_total)

    radar_matrix = radar_matrix[example_indices, ...]
    full_storm_id_strings = [
        example_dict[input_examples.FULL_IDS_KEY][k] for k in example_indices
    ]
    storm_times_unix_sec = example_dict[
        input_examples.STORM_TIMES_KEY][example_indices]

    radar_matrix = dl_utils.normalize_radar_images(
        radar_image_matrix=radar_matrix,
        field_names=[RADAR_FIELD_NAME],
        normalization_type_string=NORMALIZATION_TYPE_STRING,
        normalization_param_file_name=normalization_file_name)

    num_examples = radar_matrix.shape[0]
    dummy_target_values = numpy.full(num_examples, 0, dtype=int)

    radar_matrix = trainval_io._augment_radar_images(
        list_of_predictor_matrices=[radar_matrix],
        target_array=dummy_target_values,
        x_translations_pixels=X_TRANSLATIONS_PX,
        y_translations_pixels=Y_TRANSLATIONS_PX,
        ccw_rotation_angles_deg=CCW_ROTATION_ANGLES_DEG,
        noise_standard_deviation=NOISE_STANDARD_DEVIATION,
        num_noisings=1,
        flip_in_x=False,
        flip_in_y=False)[0][0]

    radar_matrix = dl_utils.denormalize_radar_images(
        radar_image_matrix=radar_matrix,
        field_names=[RADAR_FIELD_NAME],
        normalization_type_string=NORMALIZATION_TYPE_STRING,
        normalization_param_file_name=normalization_file_name)

    orig_radar_matrix = radar_matrix[:num_examples, ...]
    radar_matrix = radar_matrix[num_examples:, ...]
    translated_radar_matrix = radar_matrix[:num_examples, ...]
    radar_matrix = radar_matrix[num_examples:, ...]
    rotated_radar_matrix = radar_matrix[:num_examples, ...]
    noised_radar_matrix = radar_matrix[num_examples:, ...]

    for i in range(num_examples):
        _plot_one_example(orig_radar_matrix=orig_radar_matrix[i, ...],
                          translated_radar_matrix=translated_radar_matrix[i,
                                                                          ...],
                          rotated_radar_matrix=rotated_radar_matrix[i, ...],
                          noised_radar_matrix=noised_radar_matrix[i, ...],
                          output_dir_name=output_dir_name,
                          full_storm_id_string=full_storm_id_strings[i],
                          storm_time_unix_sec=storm_times_unix_sec[i])