def _run(top_input_dir_name, first_spc_date_string, last_spc_date_string,
         resolution_factor, top_output_dir_name):
    """Converts examples from GridRad to MYRORSS format.

    This is effectively the main method.

    :param top_input_dir_name: See documentation at top of file.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param resolution_factor: Same.
    :param top_output_dir_name: Same.
    """

    spc_date_strings = time_conversion.get_spc_dates_in_range(
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string)

    input_file_names = [
        input_examples.find_example_file(
            top_directory_name=top_input_dir_name, shuffled=False,
            spc_date_string=d, raise_error_if_missing=False
        )
        for d in spc_date_strings
    ]

    output_file_names = [
        input_examples.find_example_file(
            top_directory_name=top_output_dir_name, shuffled=False,
            spc_date_string=d, raise_error_if_missing=False
        )
        for d in spc_date_strings
    ]

    num_spc_dates = len(spc_date_strings)

    for i in range(num_spc_dates):
        if not os.path.isfile(input_file_names[i]):
            continue

        _convert_one_file(
            input_file_name=input_file_names[i],
            resolution_factor=resolution_factor,
            output_file_name=output_file_names[i]
        )

        print('\n')
    def test_find_example_file_unshuffled(self):
        """Ensures correct output from find_example_file.

        In this case the hypothetical file is *not* temporally shuffled.
        """

        this_file_name = input_examples.find_example_file(
            top_directory_name=TOP_DIRECTORY_NAME, shuffled=False,
            spc_date_string=SPC_DATE_STRING, raise_error_if_missing=False)

        self.assertTrue(this_file_name == EXAMPLE_FILE_NAME_UNSHUFFLED)
    def test_find_example_file_shuffled(self):
        """Ensures correct output from find_example_file.

        In this case the hypothetical file is temporally shuffled.
        """

        this_file_name = input_examples.find_example_file(
            top_directory_name=TOP_DIRECTORY_NAME, shuffled=True,
            batch_number=BATCH_NUMBER, raise_error_if_missing=False)

        self.assertTrue(this_file_name == EXAMPLE_FILE_NAME_SHUFFLED)
Ejemplo n.º 4
0
def _set_output_locations(top_output_dir_name, num_input_examples,
                          num_examples_per_out_file,
                          first_output_batch_number):
    """Sets locations of output files.

    :param top_output_dir_name: See documentation at top of file.
    :param num_input_examples: Total number of examples in input files.
    :param num_examples_per_out_file: See documentation at top of file.
    :param first_output_batch_number: Same.
    :return: output_example_file_names: 1-D list of paths to output files.
    """

    num_output_files = int(
        numpy.ceil(float(num_input_examples) / num_examples_per_out_file))

    print(
        ('Num input examples = {0:d} ... num examples per output file = {1:d} '
         '... num output files = {2:d}').format(num_input_examples,
                                                num_examples_per_out_file,
                                                num_output_files))

    output_example_file_names = [
        input_examples.find_example_file(
            top_directory_name=top_output_dir_name,
            shuffled=True,
            batch_number=first_output_batch_number + i,
            raise_error_if_missing=False) for i in range(num_output_files)
    ]

    for this_file_name in output_example_file_names:
        if not os.path.isfile(this_file_name):
            continue

        print('Deleting output file: "{0:s}"...'.format(this_file_name))
        os.remove(this_file_name)

    return output_example_file_names
Ejemplo n.º 5
0
def _run(model_file_name, top_example_dir_name, storm_metafile_name,
         output_dir_name):
    """Uses trained CNN to make predictions for specific examples.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param storm_metafile_name: Same.
    :param output_dir_name: Same.
    :raises: ValueError: if the model does multi-class classification.
    """

    print('Reading CNN from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    num_output_neurons = (
        model_object.layers[-1].output.get_shape().as_list()[-1]
    )

    if num_output_neurons > 2:
        error_string = (
            'The model has {0:d} output neurons, which suggests {0:d}-class '
            'classification.  This script handles only binary classification.'
        ).format(num_output_neurons)

        raise ValueError(error_string)

    soundings_only = False

    if isinstance(model_object.input, list):
        list_of_input_tensors = model_object.input
    else:
        list_of_input_tensors = [model_object.input]

    if len(list_of_input_tensors) == 1:
        these_spatial_dim = numpy.array(
            list_of_input_tensors[0].get_shape().as_list()[1:-1], dtype=int
        )
        soundings_only = len(these_spatial_dim) == 1

    cnn_metafile_name = cnn.find_metafile(
        model_file_name=model_file_name, raise_error_if_missing=True
    )
    print('Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name))
    cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name)

    print('Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name))
    desired_full_id_strings, desired_times_unix_sec = (
        tracking_io.read_ids_and_times(storm_metafile_name)
    )

    unique_spc_date_strings = list(set([
        time_conversion.time_to_spc_date_string(t)
        for t in desired_times_unix_sec
    ]))

    example_file_names = [
        input_examples.find_example_file(
            top_directory_name=top_example_dir_name, shuffled=False,
            spc_date_string=d, raise_error_if_missing=True
        ) for d in unique_spc_date_strings
    ]

    first_spc_date_string = time_conversion.time_to_spc_date_string(
        numpy.min(desired_times_unix_sec)
    )
    last_spc_date_string = time_conversion.time_to_spc_date_string(
        numpy.max(desired_times_unix_sec)
    )

    training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]
    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names
    training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = (
        time_conversion.get_start_of_spc_date(first_spc_date_string)
    )
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = (
        time_conversion.get_end_of_spc_date(last_spc_date_string)
    )
    training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = (
        NUM_EXAMPLES_PER_BATCH
    )

    if soundings_only:
        generator_object = testing_io.sounding_generator(
            option_dict=training_option_dict,
            desired_full_id_strings=desired_full_id_strings,
            desired_times_unix_sec=desired_times_unix_sec)

    elif cnn_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            desired_full_id_strings=desired_full_id_strings,
            desired_times_unix_sec=desired_times_unix_sec,
            list_of_operation_dicts=cnn_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY]
        )

    elif cnn_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict,
            desired_full_id_strings=desired_full_id_strings,
            desired_times_unix_sec=desired_times_unix_sec)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict,
            desired_full_id_strings=desired_full_id_strings,
            desired_times_unix_sec=desired_times_unix_sec)

    include_soundings = (
        training_option_dict[trainval_io.SOUNDING_FIELDS_KEY] is not None
    )

    full_storm_id_strings = []
    storm_times_unix_sec = numpy.array([], dtype=int)
    observed_labels = numpy.array([], dtype=int)
    class_probability_matrix = None

    while True:
        try:
            this_storm_object_dict = next(generator_object)
            print(SEPARATOR_STRING)
        except StopIteration:
            break

        full_storm_id_strings += this_storm_object_dict[testing_io.FULL_IDS_KEY]
        storm_times_unix_sec = numpy.concatenate((
            storm_times_unix_sec,
            this_storm_object_dict[testing_io.STORM_TIMES_KEY]
        ))
        observed_labels = numpy.concatenate((
            observed_labels, this_storm_object_dict[testing_io.TARGET_ARRAY_KEY]
        ))

        if soundings_only:
            these_predictor_matrices = [
                this_storm_object_dict[testing_io.SOUNDING_MATRIX_KEY]
            ]
        else:
            these_predictor_matrices = this_storm_object_dict[
                testing_io.INPUT_MATRICES_KEY]

        if include_soundings:
            this_sounding_matrix = these_predictor_matrices[-1]
        else:
            this_sounding_matrix = None

        if soundings_only:
            this_probability_matrix = cnn.apply_cnn_soundings_only(
                model_object=model_object, sounding_matrix=this_sounding_matrix,
                verbose=True)
        elif cnn_metadata_dict[cnn.CONV_2D3D_KEY]:
            if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]:
                this_probability_matrix = cnn.apply_2d_or_3d_cnn(
                    model_object=model_object,
                    radar_image_matrix=these_predictor_matrices[0],
                    sounding_matrix=this_sounding_matrix, verbose=True)
            else:
                this_probability_matrix = cnn.apply_2d3d_cnn(
                    model_object=model_object,
                    reflectivity_matrix_dbz=these_predictor_matrices[0],
                    azimuthal_shear_matrix_s01=these_predictor_matrices[1],
                    sounding_matrix=this_sounding_matrix, verbose=True)
        else:
            this_probability_matrix = cnn.apply_2d_or_3d_cnn(
                model_object=model_object,
                radar_image_matrix=these_predictor_matrices[0],
                sounding_matrix=this_sounding_matrix, verbose=True)

        print(SEPARATOR_STRING)

        if class_probability_matrix is None:
            class_probability_matrix = this_probability_matrix + 0.
        else:
            class_probability_matrix = numpy.concatenate(
                (class_probability_matrix, this_probability_matrix), axis=0
            )

    output_file_name = prediction_io.find_ungridded_file(
        directory_name=output_dir_name, raise_error_if_missing=False)

    print('Writing results to: "{0:s}"...'.format(output_file_name))

    prediction_io.write_ungridded_predictions(
        netcdf_file_name=output_file_name,
        class_probability_matrix=class_probability_matrix,
        observed_labels=observed_labels, storm_ids=full_storm_id_strings,
        storm_times_unix_sec=storm_times_unix_sec,
        target_name=training_option_dict[trainval_io.TARGET_NAME_KEY],
        model_file_name=model_file_name
    )
Ejemplo n.º 6
0
def read_specific_examples(
        top_example_dir_name, desired_storm_ids, desired_times_unix_sec,
        option_dict, list_of_layer_operation_dicts=None):
    """Reads predictors for specific examples (storm objects).

    E = number of desired examples

    :param top_example_dir_name: Name of top-level directory with pre-processed
        examples.  Files therein will be found by
        `input_examples.find_example_file`.
    :param desired_storm_ids: length-E list of storm IDs (strings).
    :param desired_times_unix_sec: length-E numpy array of storm times.
    :param option_dict: See doc for any generator in this file.
    :param list_of_layer_operation_dicts: See doc for
        `gridrad_generator_2d_reduced`.  If you do not want to reduce radar
        images from 3-D to 2-D, leave this as None.
    :return: list_of_predictor_matrices: length-T list of numpy arrays, where
        T = number of input tensors to model.  The first dimension of each numpy
        array has length E.
    :return: sounding_pressure_matrix_pascals: numpy array (E x H_s) of
        pressures.  If soundings were not read, this is None.
    """

    option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = None

    desired_spc_date_strings = [
        time_conversion.time_to_spc_date_string(t)
        for t in desired_times_unix_sec
    ]
    unique_spc_date_strings = numpy.unique(
        numpy.array(desired_spc_date_strings)
    ).tolist()

    myrorss_2d3d = None

    storm_ids = []
    storm_times_unix_sec = numpy.array([], dtype=int)
    list_of_predictor_matrices = None
    sounding_pressure_matrix_pascals = None

    for this_spc_date_string in unique_spc_date_strings:
        this_start_time_unix_sec = time_conversion.get_start_of_spc_date(
            this_spc_date_string)
        this_end_time_unix_sec = time_conversion.get_end_of_spc_date(
            this_spc_date_string)

        this_example_file_name = input_examples.find_example_file(
            top_directory_name=top_example_dir_name, shuffled=False,
            spc_date_string=this_spc_date_string)

        option_dict[trainval_io.EXAMPLE_FILES_KEY] = [this_example_file_name]
        option_dict[trainval_io.FIRST_STORM_TIME_KEY] = this_start_time_unix_sec
        option_dict[trainval_io.LAST_STORM_TIME_KEY] = this_end_time_unix_sec

        if myrorss_2d3d is None:
            netcdf_dataset = netCDF4.Dataset(this_example_file_name)
            myrorss_2d3d = (
                input_examples.REFL_IMAGE_MATRIX_KEY in netcdf_dataset.variables
            )
            netcdf_dataset.close()

        if list_of_layer_operation_dicts is not None:
            this_generator = gridrad_generator_2d_reduced(
                option_dict=option_dict,
                list_of_operation_dicts=list_of_layer_operation_dicts,
                num_examples_total=LARGE_INTEGER)
        elif myrorss_2d3d:
            this_generator = myrorss_generator_2d3d(
                option_dict=option_dict, num_examples_total=LARGE_INTEGER)
        else:
            this_generator = generator_2d_or_3d(
                option_dict=option_dict, num_examples_total=LARGE_INTEGER)

        this_storm_object_dict = next(this_generator)

        these_desired_indices = numpy.where(numpy.logical_and(
            desired_times_unix_sec >= this_start_time_unix_sec,
            desired_times_unix_sec <= this_end_time_unix_sec
        ))[0]

        these_indices = tracking_utils.find_storm_objects(
            all_storm_ids=this_storm_object_dict[STORM_IDS_KEY],
            all_times_unix_sec=this_storm_object_dict[STORM_TIMES_KEY],
            storm_ids_to_keep=
            [desired_storm_ids[k] for k in these_desired_indices],
            times_to_keep_unix_sec=
            desired_times_unix_sec[these_desired_indices],
            allow_missing=False
        )

        storm_ids += [
            this_storm_object_dict[STORM_IDS_KEY][k] for k in these_indices
        ]
        storm_times_unix_sec = numpy.concatenate((
            storm_times_unix_sec,
            this_storm_object_dict[STORM_TIMES_KEY][these_indices]
        ))

        this_pressure_matrix_pascals = this_storm_object_dict[
            SOUNDING_PRESSURES_KEY]

        if this_pressure_matrix_pascals is not None:
            this_pressure_matrix_pascals = this_pressure_matrix_pascals[
                these_indices, ...]

            if sounding_pressure_matrix_pascals is None:
                sounding_pressure_matrix_pascals = (
                    this_pressure_matrix_pascals + 0.)
            else:
                sounding_pressure_matrix_pascals = numpy.concatenate(
                    (sounding_pressure_matrix_pascals,
                     this_pressure_matrix_pascals), axis=0)

        if list_of_predictor_matrices is None:
            num_matrices = len(this_storm_object_dict[INPUT_MATRICES_KEY])
            list_of_predictor_matrices = [None] * num_matrices

        for k in range(len(list_of_predictor_matrices)):
            this_new_matrix = this_storm_object_dict[INPUT_MATRICES_KEY][k][
                these_indices, ...]

            if list_of_predictor_matrices[k] is None:
                list_of_predictor_matrices[k] = this_new_matrix + 0.
            else:
                list_of_predictor_matrices[k] = numpy.concatenate(
                    (list_of_predictor_matrices[k], this_new_matrix), axis=0)

    sort_indices = tracking_utils.find_storm_objects(
        all_storm_ids=storm_ids, all_times_unix_sec=storm_times_unix_sec,
        storm_ids_to_keep=desired_storm_ids,
        times_to_keep_unix_sec=desired_times_unix_sec, allow_missing=False)

    for k in range(len(list_of_predictor_matrices)):
        list_of_predictor_matrices[k] = list_of_predictor_matrices[k][
            sort_indices, ...]

    if sounding_pressure_matrix_pascals is not None:
        sounding_pressure_matrix_pascals = sounding_pressure_matrix_pascals[
            sort_indices, ...]

    return list_of_predictor_matrices, sounding_pressure_matrix_pascals
def _run(input_example_dir_name, storm_metafile_name, num_examples_in_subset,
         subset_randomly, output_example_file_name):
    """Extracts desired examples and writes them to one file.

    This is effectively the main method.

    :param input_example_dir_name: See documentation at top of file.
    :param storm_metafile_name: Same.
    :param num_examples_in_subset: Same.
    :param subset_randomly: Same.
    :param output_example_file_name: Same.
    """

    print(
        'Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name))
    example_id_strings, example_times_unix_sec = (
        tracking_io.read_ids_and_times(storm_metafile_name))

    if not 0 < num_examples_in_subset < len(example_id_strings):
        num_examples_in_subset = None

    if num_examples_in_subset is not None:
        if subset_randomly:
            these_indices = numpy.linspace(0,
                                           len(example_id_strings) - 1,
                                           num=len(example_id_strings),
                                           dtype=int)
            these_indices = numpy.random.choice(these_indices,
                                                size=num_examples_in_subset,
                                                replace=False)

            example_id_strings = [example_id_strings[k] for k in these_indices]
            example_times_unix_sec = example_times_unix_sec[these_indices]
        else:
            example_id_strings = example_id_strings[:num_examples_in_subset]
            example_times_unix_sec = (
                example_times_unix_sec[:num_examples_in_subset])

    example_spc_date_strings = numpy.array([
        time_conversion.time_to_spc_date_string(t)
        for t in example_times_unix_sec
    ])
    spc_date_strings = numpy.unique(example_spc_date_strings)

    example_file_name_by_day = [
        input_examples.find_example_file(
            top_directory_name=input_example_dir_name,
            shuffled=False,
            spc_date_string=d,
            raise_error_if_missing=True) for d in spc_date_strings
    ]

    num_days = len(spc_date_strings)

    for i in range(num_days):
        print('Reading data from: "{0:s}"...'.format(
            example_file_name_by_day[i]))
        all_example_dict = input_examples.read_example_file(
            netcdf_file_name=example_file_name_by_day[i],
            read_all_target_vars=True)

        these_indices = numpy.where(
            example_spc_date_strings == spc_date_strings[i])[0]

        desired_indices = tracking_utils.find_storm_objects(
            all_id_strings=all_example_dict[input_examples.FULL_IDS_KEY],
            all_times_unix_sec=all_example_dict[
                input_examples.STORM_TIMES_KEY],
            id_strings_to_keep=[example_id_strings[k] for k in these_indices],
            times_to_keep_unix_sec=example_times_unix_sec[these_indices],
            allow_missing=False)

        desired_example_dict = input_examples.subset_examples(
            example_dict=all_example_dict, indices_to_keep=desired_indices)

        print('Writing {0:d} desired examples to: "{1:s}"...'.format(
            len(desired_indices), output_example_file_name))
        input_examples.write_example_file(
            netcdf_file_name=output_example_file_name,
            example_dict=desired_example_dict,
            append_to_file=i > 0)