Example #1
0
def subset_by_standard_atmo(prediction_dict, standard_atmo_enum):
    """Subsets examples by standard-atmosphere type.

    :param prediction_dict: See doc for `write_file`.
    :param standard_atmo_enum: See doc for
        `example_utils.check_standard_atmo_type`.
    :return: prediction_dict: Same as input but with fewer examples.
    """

    example_utils.check_standard_atmo_type(standard_atmo_enum)

    all_standard_atmo_enums = example_utils.parse_example_ids(
        prediction_dict[EXAMPLE_IDS_KEY])[
            example_utils.STANDARD_ATMO_FLAGS_KEY]

    desired_indices = numpy.where(
        all_standard_atmo_enums == standard_atmo_enum)[0]
    return subset_by_index(prediction_dict=prediction_dict,
                           desired_indices=desired_indices)
Example #2
0
def subset_by_zenith_angle(prediction_dict,
                           min_zenith_angle_rad,
                           max_zenith_angle_rad,
                           max_inclusive=None):
    """Subsets examples by solar zenith angle.

    :param prediction_dict: See doc for `write_file`.
    :param min_zenith_angle_rad: Minimum zenith angle (radians).
    :param max_zenith_angle_rad: Max zenith angle (radians).
    :param max_inclusive: Boolean flag.  If True (False), `max_zenith_angle_rad`
        will be included in subset.
    :return: prediction_dict: Same as input but with fewer examples.
    """

    error_checking.assert_is_geq(min_zenith_angle_rad, 0.)
    error_checking.assert_is_leq(max_zenith_angle_rad,
                                 MAX_ZENITH_ANGLE_RADIANS)
    error_checking.assert_is_greater(max_zenith_angle_rad,
                                     min_zenith_angle_rad)

    if max_inclusive is None:
        max_inclusive = max_zenith_angle_rad == MAX_ZENITH_ANGLE_RADIANS

    error_checking.assert_is_boolean(max_inclusive)

    all_zenith_angles_rad = example_utils.parse_example_ids(
        prediction_dict[EXAMPLE_IDS_KEY])[example_utils.ZENITH_ANGLES_KEY]

    min_flags = all_zenith_angles_rad >= min_zenith_angle_rad

    if max_inclusive:
        max_flags = all_zenith_angles_rad <= max_zenith_angle_rad
    else:
        max_flags = all_zenith_angles_rad < max_zenith_angle_rad

    desired_indices = numpy.where(numpy.logical_and(min_flags, max_flags))[0]
    return subset_by_index(prediction_dict=prediction_dict,
                           desired_indices=desired_indices)
Example #3
0
    def test_parse_example_ids(self):
        """Ensures correct output from parse_example_ids."""

        metadata_dict = example_utils.parse_example_ids(EXAMPLE_ID_STRINGS)
        these_latitudes_deg_n = metadata_dict[example_utils.LATITUDES_KEY]
        these_longitudes_deg_e = metadata_dict[example_utils.LONGITUDES_KEY]
        these_albedos = metadata_dict[example_utils.ALBEDOS_KEY]
        these_zenith_angles_rad = metadata_dict[
            example_utils.ZENITH_ANGLES_KEY]
        these_times_unix_sec = metadata_dict[example_utils.VALID_TIMES_KEY]
        these_standard_atmo_flags = (
            metadata_dict[example_utils.STANDARD_ATMO_FLAGS_KEY])
        these_10m_temps_kelvins = (
            metadata_dict[example_utils.TEMPERATURES_10M_KEY])

        self.assertTrue(
            numpy.allclose(these_latitudes_deg_n,
                           LATITUDES_FOR_ID_DEG_N,
                           atol=TOLERANCE))
        self.assertTrue(
            numpy.allclose(these_longitudes_deg_e,
                           LONGITUDES_FOR_ID_DEG_E,
                           atol=TOLERANCE))
        self.assertTrue(
            numpy.allclose(these_albedos, ALBEDOS_FOR_ID, atol=TOLERANCE))
        self.assertTrue(
            numpy.allclose(these_zenith_angles_rad,
                           ZENITH_ANGLES_FOR_ID_RAD,
                           atol=TOLERANCE))
        self.assertTrue(
            numpy.array_equal(these_times_unix_sec, TIMES_FOR_ID_UNIX_SEC))
        self.assertTrue(
            numpy.array_equal(these_standard_atmo_flags,
                              STANDARD_ATMO_FLAGS_FOR_ID))
        self.assertTrue(
            numpy.allclose(these_10m_temps_kelvins,
                           TEMPERATURES_FOR_ID_KELVINS,
                           atol=TOLERANCE))
Example #4
0
def subset_by_month(prediction_dict, desired_month):
    """Subsets examples by month.

    :param prediction_dict: See doc for `write_file`.
    :param desired_month: Desired month (integer from 1...12).
    :return: prediction_dict: Same as input but with fewer examples.
    """

    error_checking.assert_is_integer(desired_month)
    error_checking.assert_is_geq(desired_month, 1)
    error_checking.assert_is_leq(desired_month, 12)

    all_times_unix_sec = example_utils.parse_example_ids(
        prediction_dict[EXAMPLE_IDS_KEY])[example_utils.VALID_TIMES_KEY]

    all_months = numpy.array([
        int(time_conversion.unix_sec_to_string(t, '%m'))
        for t in all_times_unix_sec
    ],
                             dtype=int)

    desired_indices = numpy.where(all_months == desired_month)[0]
    return subset_by_index(prediction_dict=prediction_dict,
                           desired_indices=desired_indices)
Example #5
0
def subset_by_albedo(prediction_dict,
                     min_albedo,
                     max_albedo,
                     max_inclusive=None):
    """Subsets examples by albedo.

    :param prediction_dict: See doc for `write_file`.
    :param min_albedo: Minimum albedo (unitless).
    :param max_albedo: Max albedo (unitless).
    :param max_inclusive: Boolean flag.  If True (False), `max_albedo` will be
        included in subset.
    :return: prediction_dict: Same as input but with fewer examples.
    """

    error_checking.assert_is_geq(min_albedo, 0.)
    error_checking.assert_is_leq(max_albedo, 1.)
    error_checking.assert_is_greater(max_albedo, min_albedo)

    if max_inclusive is None:
        max_inclusive = max_albedo == 1.

    error_checking.assert_is_boolean(max_inclusive)

    all_albedos = example_utils.parse_example_ids(
        prediction_dict[EXAMPLE_IDS_KEY])[example_utils.ALBEDOS_KEY]

    min_flags = all_albedos >= min_albedo

    if max_inclusive:
        max_flags = all_albedos <= max_albedo
    else:
        max_flags = all_albedos < max_albedo

    desired_indices = numpy.where(numpy.logical_and(min_flags, max_flags))[0]
    return subset_by_index(prediction_dict=prediction_dict,
                           desired_indices=desired_indices)
Example #6
0
def _run_experiment_one_example(example_dict, example_index, max_noise_k_day01,
                                high_res_pressures_pa, high_res_heights_m_asl,
                                first_interp_method_name,
                                second_interp_method_name, interp_fluxes,
                                output_dir_name):
    """Runs interpolation experiment for one example (one profile).

    H = number of levels in high-resolution grid

    :param example_dict: Dictionary in format returned by
        `example_io.read_file`.
    :param example_index: Will run experiment for [i]th example, where
        i = `example_index`.
    :param max_noise_k_day01: See documentation at top of file.
    :param high_res_pressures_pa: length-H numpy array of pressures (Pascals) in
        high-resolution grid.
    :param high_res_heights_m_asl: length-H numpy array of heights (metres above
        sea level) in high-resolution grid.
    :param first_interp_method_name: See documentation at top of file.
    :param second_interp_method_name: Same.
    :param interp_fluxes: Same.
    :param output_dir_name: Same.
    :return: max_difference_k_day01: Column-max difference between
        low-resolution and fake low-resolution heating rates.
    """

    example_id_string = (
        example_dict[example_utils.EXAMPLE_IDS_KEY][example_index])
    metadata_dict = example_utils.parse_example_ids([example_id_string])
    surface_height_m_asl = geodetic_utils._get_elevation(
        latitude_deg=metadata_dict[example_utils.LATITUDES_KEY][0],
        longitude_deg=metadata_dict[example_utils.LONGITUDES_KEY][0])[0]

    low_res_heights_m_agl = example_dict[example_utils.HEIGHTS_KEY]
    low_res_heights_m_asl = surface_height_m_asl + low_res_heights_m_agl
    low_res_pressures_pa = standard_atmo.height_to_pressure(
        low_res_heights_m_asl)
    low_res_heating_rates_k_day01 = example_utils.get_field_from_dict(
        example_dict=example_dict,
        field_name=example_utils.SHORTWAVE_HEATING_RATE_NAME)[example_index, :]

    if interp_fluxes:
        low_res_down_fluxes_w_m02 = example_utils.get_field_from_dict(
            example_dict=example_dict,
            field_name=example_utils.SHORTWAVE_DOWN_FLUX_NAME)[
                example_index, :]

        interp_object = interp1d(x=low_res_pressures_pa[::-1],
                                 y=low_res_down_fluxes_w_m02[::-1],
                                 kind=first_interp_method_name,
                                 bounds_error=False,
                                 fill_value='extrapolate',
                                 assume_sorted=True)
        high_res_down_fluxes_w_m02 = interp_object(high_res_pressures_pa)

        low_res_up_fluxes_w_m02 = example_utils.get_field_from_dict(
            example_dict=example_dict,
            field_name=example_utils.SHORTWAVE_UP_FLUX_NAME)[example_index, :]

        interp_object = interp1d(x=low_res_pressures_pa[::-1],
                                 y=low_res_up_fluxes_w_m02[::-1],
                                 kind=first_interp_method_name,
                                 bounds_error=False,
                                 fill_value='extrapolate',
                                 assume_sorted=True)
        high_res_up_fluxes_w_m02 = interp_object(high_res_pressures_pa)

        high_res_heating_rates_k_day01 = _fluxes_to_heating_rate(
            down_fluxes_w_m02=high_res_down_fluxes_w_m02,
            up_fluxes_w_m02=high_res_up_fluxes_w_m02,
            pressures_pa=high_res_pressures_pa)

        interp_object = interp1d(x=high_res_pressures_pa[::-1],
                                 y=high_res_down_fluxes_w_m02[::-1],
                                 kind=second_interp_method_name,
                                 bounds_error=True,
                                 assume_sorted=True)
        fake_low_res_down_fluxes_w_m02 = interp_object(low_res_pressures_pa)

        interp_object = interp1d(x=high_res_pressures_pa[::-1],
                                 y=high_res_up_fluxes_w_m02[::-1],
                                 kind=second_interp_method_name,
                                 bounds_error=True,
                                 assume_sorted=True)
        fake_low_res_up_fluxes_w_m02 = interp_object(low_res_pressures_pa)

        fake_low_res_heating_rates_k_day01 = _fluxes_to_heating_rate(
            down_fluxes_w_m02=fake_low_res_down_fluxes_w_m02,
            up_fluxes_w_m02=fake_low_res_up_fluxes_w_m02,
            pressures_pa=low_res_pressures_pa)
    else:
        interp_object = interp1d(x=low_res_pressures_pa[::-1],
                                 y=low_res_heating_rates_k_day01[::-1],
                                 kind=first_interp_method_name,
                                 bounds_error=False,
                                 fill_value='extrapolate',
                                 assume_sorted=True)
        high_res_heating_rates_k_day01 = interp_object(high_res_pressures_pa)

        noise_values_k_day01 = numpy.random.uniform(
            low=-max_noise_k_day01,
            high=max_noise_k_day01,
            size=high_res_heating_rates_k_day01.shape)
        noise_values_k_day01 *= (
            max_noise_k_day01 /
            numpy.max(numpy.absolute(noise_values_k_day01)))
        high_res_heating_rates_k_day01 += noise_values_k_day01

        interp_object = interp1d(x=high_res_pressures_pa[::-1],
                                 y=high_res_heating_rates_k_day01[::-1],
                                 kind=second_interp_method_name,
                                 bounds_error=True,
                                 assume_sorted=True)
        fake_low_res_heating_rates_k_day01 = interp_object(
            low_res_pressures_pa)

    high_res_heights_m_agl = high_res_heights_m_asl - surface_height_m_asl

    figure_object, axes_object = profile_plotting.plot_one_variable(
        values=low_res_heating_rates_k_day01,
        heights_m_agl=low_res_heights_m_agl,
        use_log_scale=True,
        line_colour=LOW_RES_COLOUR,
        line_width=LOW_RES_LINE_WIDTH)
    profile_plotting.plot_one_variable(values=high_res_heating_rates_k_day01,
                                       heights_m_agl=high_res_heights_m_agl,
                                       use_log_scale=True,
                                       line_colour=HIGH_RES_COLOUR,
                                       line_width=HIGH_RES_LINE_WIDTH,
                                       figure_object=figure_object)
    profile_plotting.plot_one_variable(
        values=fake_low_res_heating_rates_k_day01,
        heights_m_agl=low_res_heights_m_agl,
        use_log_scale=True,
        line_colour=FAKE_LOW_RES_COLOUR,
        line_width=FAKE_LOW_RES_LINE_WIDTH,
        figure_object=figure_object)

    axes_object.set_xlim(left=-0.5)
    y_max = axes_object.get_ylim()[1]
    axes_object.set_ylim(top=y_max * 1.05)

    max_difference_k_day01 = numpy.max(
        numpy.absolute(low_res_heating_rates_k_day01 -
                       fake_low_res_heating_rates_k_day01))
    title_string = (
        'Max diff between low-res and reconstructed low-res = {0:.4f} K day'
    ).format(max_difference_k_day01)

    title_string = title_string + r'$^{-1}$'
    axes_object.set_title(title_string, fontsize=20)

    output_file_name = '{0:s}/{1:s}.jpg'.format(output_dir_name,
                                                example_id_string)
    print('Saving figure to: "{0:s}"...'.format(output_file_name))
    figure_object.savefig(output_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)

    return max_difference_k_day01
def _run(input_file_name, top_output_dir_name):
    """Splits predictions by site (point location).

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param top_output_dir_name: Same.
    :raises: ValueError: if any example cannot be assigned to a site.
    """

    # Read data.
    print('Reading data from: "{0:s}"...'.format(input_file_name))
    prediction_dict = prediction_io.read_file(input_file_name)
    example_metadata_dict = example_utils.parse_example_ids(
        prediction_dict[prediction_io.EXAMPLE_IDS_KEY])

    example_latitudes_deg_n = number_rounding.round_to_nearest(
        example_metadata_dict[example_utils.LATITUDES_KEY],
        LATLNG_TOLERANCE_DEG)
    example_longitudes_deg_e = number_rounding.round_to_nearest(
        example_metadata_dict[example_utils.LONGITUDES_KEY],
        LATLNG_TOLERANCE_DEG)
    example_longitudes_deg_e = lng_conversion.convert_lng_positive_in_west(
        example_longitudes_deg_e)

    num_examples = len(example_latitudes_deg_n)
    example_written_flags = numpy.full(num_examples, False, dtype=bool)

    site_names = list(SITE_NAME_TO_LATLNG.keys())
    num_sites = len(site_names)

    for j in range(num_sites):
        this_site_latitude_deg_n = SITE_NAME_TO_LATLNG[site_names[j]][0]
        this_site_longitude_deg_e = SITE_NAME_TO_LATLNG[site_names[j]][1]

        these_indices = numpy.where(
            numpy.logical_and(
                numpy.absolute(example_latitudes_deg_n -
                               this_site_latitude_deg_n) <=
                LATLNG_TOLERANCE_DEG,
                numpy.absolute(example_longitudes_deg_e -
                               this_site_longitude_deg_e) <=
                LATLNG_TOLERANCE_DEG))[0]

        this_prediction_dict = prediction_io.subset_by_index(
            prediction_dict=copy.deepcopy(prediction_dict),
            desired_indices=these_indices)

        this_output_file_name = '{0:s}/{1:s}/predictions.nc'.format(
            top_output_dir_name, site_names[j])
        print('Writing {0:d} examples to: "{1:s}"...'.format(
            len(these_indices), this_output_file_name))

        if len(these_indices) == 0:
            continue

        example_written_flags[these_indices] = True

        prediction_io.write_file(
            netcdf_file_name=this_output_file_name,
            scalar_target_matrix=this_prediction_dict[
                prediction_io.SCALAR_TARGETS_KEY],
            vector_target_matrix=this_prediction_dict[
                prediction_io.VECTOR_TARGETS_KEY],
            scalar_prediction_matrix=this_prediction_dict[
                prediction_io.SCALAR_PREDICTIONS_KEY],
            vector_prediction_matrix=this_prediction_dict[
                prediction_io.VECTOR_PREDICTIONS_KEY],
            heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
            example_id_strings=this_prediction_dict[
                prediction_io.EXAMPLE_IDS_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])

    if numpy.all(example_written_flags):
        return

    # bad_latitudes_deg_n = (
    #     example_latitudes_deg_n[example_written_flags == False]
    # )
    # bad_longitudes_deg_e = (
    #     example_longitudes_deg_e[example_written_flags == False]
    # )
    # bad_coord_matrix = numpy.transpose(numpy.vstack((
    #     bad_latitudes_deg_n, bad_longitudes_deg_e
    # )))
    # bad_coord_matrix = numpy.unique(bad_coord_matrix, axis=0)
    # print(bad_coord_matrix)

    error_string = (
        '{0:d} of {1:d} examples could not be assigned to a site.  This is a '
        'BIG PROBLEM.').format(numpy.sum(example_written_flags == False),
                               num_examples)

    raise ValueError(error_string)
Example #8
0
def read_file(netcdf_file_name,
              exclude_summit_greenland=False,
              id_strings_to_read=None,
              allow_missing_ids=False):
    """Reads learning examples from NetCDF file.

    E = number of examples
    H = number of heights
    P_s = number of scalar predictors
    P_v = number of vector predictors
    T_s = number of scalar targets
    T_v = number of vector targets

    :param netcdf_file_name: Path to input file.
    :param exclude_summit_greenland: Boolean flag.  If True, will not read data
        from Summit, Greenland.
    :param id_strings_to_read: 1-D list of IDs for examples to read.  If None,
        will read all examples.
    :param allow_missing_ids: [used only if `id_strings_to_read is not None`]
        Boolean flag.  If True, will allow missing IDs.  If False, will throw
        error for missing IDs.

    :return: example_dict: Dictionary with the following keys.
    example_dict['scalar_predictor_matrix']: numpy array (E x P_s) with values
        of scalar predictors.
    example_dict['scalar_predictor_names']: list (length P_s) with names of
        scalar predictors.
    example_dict['vector_predictor_matrix']: numpy array (E x H x P_v) with
        values of vector predictors.
    example_dict['vector_predictor_names']: list (length P_v) with names of
        vector predictors.
    example_dict['scalar_target_matrix']: numpy array (E x T_s) with values of
        scalar targets.
    example_dict['scalar_target_names']: list (length T_s) with names of scalar
        targets.
    example_dict['vector_target_matrix']: numpy array (E x H x T_v) with values
        of vector targets.
    example_dict['vector_target_names']: list (length T_v) with names of vector
        targets.
    example_dict['valid_times_unix_sec']: length-E numpy array of valid times
        (Unix seconds).
    example_dict['heights_m_agl']: length-H numpy array of heights (metres above
        ground level).
    example_dict['standard_atmo_flags']: length-E numpy array of flags (each in
        the list `STANDARD_ATMO_ENUMS`).
    example_dict['example_id_strings']: length-E list of example IDs.
    """

    # TODO(thunderhoser): This is a HACK.
    if not os.path.isfile(netcdf_file_name):
        netcdf_file_name = netcdf_file_name.replace('/home/ryan.lagerquist',
                                                    '/home/ralager')

    dataset_object = netCDF4.Dataset(netcdf_file_name)

    example_id_strings = [
        str(id) for id in netCDF4.chartostring(dataset_object.variables[
            example_utils.EXAMPLE_IDS_KEY][:])
    ]

    if id_strings_to_read is None:
        num_examples = dataset_object.dimensions[EXAMPLE_DIMENSION_KEY].size
        indices_to_read = numpy.linspace(0,
                                         num_examples - 1,
                                         num=num_examples,
                                         dtype=int)
    else:
        exclude_summit_greenland = False

        indices_to_read = example_utils.find_examples(
            all_id_strings=example_id_strings,
            desired_id_strings=id_strings_to_read,
            allow_missing=allow_missing_ids)
        indices_to_read = indices_to_read[indices_to_read >= 0]

    error_checking.assert_is_boolean(exclude_summit_greenland)

    # TODO(thunderhoser): This is a HACK to deal with potentially bad data.
    if exclude_summit_greenland:
        metadata_dict = example_utils.parse_example_ids(example_id_strings)
        latitudes_deg_n = metadata_dict[example_utils.LATITUDES_KEY]
        longitudes_deg_e = lng_conversion.convert_lng_positive_in_west(
            metadata_dict[example_utils.LONGITUDES_KEY])

        bad_flags = numpy.logical_and(
            numpy.isclose(latitudes_deg_n, SUMMIT_LATITUDE_DEG_N, atol=1e-4),
            numpy.isclose(longitudes_deg_e, SUMMIT_LONGITUDE_DEG_E, atol=1e-4))
        good_indices = numpy.where(numpy.invert(bad_flags))[0]

        warning_string = (
            'Removing {0:d} of {1:d} examples (profiles), because they are at '
            'Summit GL.').format(
                len(indices_to_read) - len(good_indices), len(indices_to_read))
        warnings.warn(warning_string)

        indices_to_read = indices_to_read[good_indices]

    example_dict = {
        example_utils.EXAMPLE_IDS_KEY:
        [example_id_strings[k] for k in indices_to_read]
    }

    string_keys = [
        example_utils.SCALAR_PREDICTOR_NAMES_KEY,
        example_utils.VECTOR_PREDICTOR_NAMES_KEY,
        example_utils.SCALAR_TARGET_NAMES_KEY,
        example_utils.VECTOR_TARGET_NAMES_KEY
    ]
    main_data_keys = [
        example_utils.SCALAR_PREDICTOR_VALS_KEY,
        example_utils.VECTOR_PREDICTOR_VALS_KEY,
        example_utils.SCALAR_TARGET_VALS_KEY,
        example_utils.VECTOR_TARGET_VALS_KEY
    ]
    integer_keys = [
        example_utils.VALID_TIMES_KEY, example_utils.STANDARD_ATMO_FLAGS_KEY
    ]

    for this_key in string_keys:
        example_dict[this_key] = [
            str(n) for n in netCDF4.chartostring(
                dataset_object.variables[this_key][:])
        ]

    for this_key in main_data_keys:
        example_dict[this_key] = numpy.array(
            dataset_object.variables[this_key][indices_to_read, ...],
            dtype=float)

    for this_key in integer_keys:
        example_dict[this_key] = numpy.array(numpy.round(
            dataset_object.variables[this_key][indices_to_read]),
                                             dtype=int)

    example_dict[example_utils.HEIGHTS_KEY] = numpy.array(
        dataset_object.variables[example_utils.HEIGHTS_KEY][:], dtype=float)

    dataset_object.close()
    return example_dict
Example #9
0
def _run(input_file_name, min_latitude_deg, max_latitude_deg, min_longitude_deg,
         max_longitude_deg, latitude_spacing_deg, longitude_spacing_deg,
         output_dir_name):
    """Splits predictions by spatial region.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param min_latitude_deg: Same.
    :param max_latitude_deg: Same.
    :param min_longitude_deg: Same.
    :param max_longitude_deg: Same.
    :param latitude_spacing_deg: Same.
    :param longitude_spacing_deg: Same.
    :param output_dir_name: Same.
    """

    # Read data.
    print('Reading data from: "{0:s}"...'.format(input_file_name))
    prediction_dict = prediction_io.read_file(input_file_name)
    example_metadata_dict = example_utils.parse_example_ids(
        prediction_dict[prediction_io.EXAMPLE_IDS_KEY]
    )

    example_latitudes_deg = example_metadata_dict[example_utils.LATITUDES_KEY]
    example_longitudes_deg = example_metadata_dict[example_utils.LONGITUDES_KEY]

    these_limits_deg = numpy.array([
        min_latitude_deg, max_latitude_deg, min_longitude_deg, max_longitude_deg
    ])
    if numpy.any(numpy.isnan(these_limits_deg)):
        min_latitude_deg = numpy.min(example_latitudes_deg)
        max_latitude_deg = numpy.max(example_latitudes_deg)
        min_longitude_deg = numpy.min(example_longitudes_deg)
        max_longitude_deg = numpy.max(example_longitudes_deg)

    # Create grid.
    grid_point_latitudes_deg, grid_point_longitudes_deg = (
        misc.create_latlng_grid(
            min_latitude_deg=min_latitude_deg,
            max_latitude_deg=max_latitude_deg,
            latitude_spacing_deg=latitude_spacing_deg,
            min_longitude_deg=min_longitude_deg,
            max_longitude_deg=max_longitude_deg,
            longitude_spacing_deg=longitude_spacing_deg
        )
    )

    num_grid_rows = len(grid_point_latitudes_deg)
    num_grid_columns = len(grid_point_longitudes_deg)

    grid_edge_latitudes_deg, grid_edge_longitudes_deg = (
        grids.get_latlng_grid_cell_edges(
            min_latitude_deg=grid_point_latitudes_deg[0],
            min_longitude_deg=grid_point_longitudes_deg[0],
            lat_spacing_deg=numpy.diff(grid_point_latitudes_deg[:2])[0],
            lng_spacing_deg=numpy.diff(grid_point_longitudes_deg[:2])[0],
            num_rows=num_grid_rows, num_columns=num_grid_columns
        )
    )

    print(SEPARATOR_STRING)

    for i in range(num_grid_rows):
        for j in range(num_grid_columns):
            these_indices = grids.find_events_in_grid_cell(
                event_x_coords_metres=example_longitudes_deg,
                event_y_coords_metres=example_latitudes_deg,
                grid_edge_x_coords_metres=grid_edge_longitudes_deg,
                grid_edge_y_coords_metres=grid_edge_latitudes_deg,
                row_index=i, column_index=j, verbose=False
            )

            this_prediction_dict = prediction_io.subset_by_index(
                prediction_dict=copy.deepcopy(prediction_dict),
                desired_indices=these_indices
            )
            this_num_examples = len(
                this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]
            )

            if this_num_examples == 0:
                continue

            this_output_file_name = prediction_io.find_file(
                directory_name=output_dir_name, grid_row=i, grid_column=j,
                raise_error_if_missing=False
            )
            print('Writing {0:d} examples to: "{1:s}"...'.format(
                len(this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY]),
                this_output_file_name
            ))

            prediction_io.write_file(
                netcdf_file_name=this_output_file_name,
                scalar_target_matrix=
                this_prediction_dict[prediction_io.SCALAR_TARGETS_KEY],
                vector_target_matrix=
                this_prediction_dict[prediction_io.VECTOR_TARGETS_KEY],
                scalar_prediction_matrix=
                this_prediction_dict[prediction_io.SCALAR_PREDICTIONS_KEY],
                vector_prediction_matrix=
                this_prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY],
                heights_m_agl=this_prediction_dict[prediction_io.HEIGHTS_KEY],
                example_id_strings=
                this_prediction_dict[prediction_io.EXAMPLE_IDS_KEY],
                model_file_name=
                this_prediction_dict[prediction_io.MODEL_FILE_KEY]
            )

    print(SEPARATOR_STRING)

    grid_metafile_name = prediction_io.find_grid_metafile(
        prediction_dir_name=output_dir_name, raise_error_if_missing=False
    )

    print('Writing grid metadata to: "{0:s}"...'.format(grid_metafile_name))
    prediction_io.write_grid_metafile(
        grid_point_latitudes_deg=grid_point_latitudes_deg,
        grid_point_longitudes_deg=grid_point_longitudes_deg,
        netcdf_file_name=grid_metafile_name
    )
Example #10
0
def write_file(netcdf_file_name,
               scalar_target_matrix,
               vector_target_matrix,
               scalar_prediction_matrix,
               vector_prediction_matrix,
               heights_m_agl,
               example_id_strings,
               model_file_name,
               isotonic_model_file_name=None):
    """Writes predictions to NetCDF file.

    E = number of examples
    H = number of heights
    T_s = number of scalar targets
    T_v = number of vector targets

    :param netcdf_file_name: Path to output file.
    :param scalar_target_matrix: numpy array (E x T_s) with actual values of
        scalar targets.
    :param vector_target_matrix: numpy array (E x H x T_v) with actual values of
        vector targets.
    :param scalar_prediction_matrix: Same as `scalar_target_matrix` but with
        predicted values.
    :param vector_prediction_matrix: Same as `vector_target_matrix` but with
        predicted values.
    :param heights_m_agl: length-H numpy array of heights (metres above ground
        level).
    :param example_id_strings: length-E list of IDs created by
        `example_utils.create_example_ids`.
    :param model_file_name: Path to file with trained model (readable by
        `neural_net.read_model`).
    :param isotonic_model_file_name: Path to file with trained isotonic-
        regression models (readable by `isotonic_regression.read_file`) used to
        make predictions.  If isotonic regression was not used, leave this as
        None.
    """

    # Check input args.
    error_checking.assert_is_numpy_array_without_nan(scalar_target_matrix)
    error_checking.assert_is_numpy_array(scalar_target_matrix,
                                         num_dimensions=2)

    error_checking.assert_is_numpy_array_without_nan(scalar_prediction_matrix)
    error_checking.assert_is_numpy_array(scalar_prediction_matrix,
                                         exact_dimensions=numpy.array(
                                             scalar_target_matrix.shape,
                                             dtype=int))

    error_checking.assert_is_numpy_array_without_nan(vector_target_matrix)
    error_checking.assert_is_numpy_array(vector_target_matrix,
                                         num_dimensions=3)

    num_examples = scalar_target_matrix.shape[0]
    expected_dim = numpy.array(
        (num_examples, ) + vector_target_matrix.shape[1:], dtype=int)
    error_checking.assert_is_numpy_array(vector_target_matrix,
                                         exact_dimensions=expected_dim)

    error_checking.assert_is_numpy_array_without_nan(vector_prediction_matrix)
    error_checking.assert_is_numpy_array(vector_prediction_matrix,
                                         exact_dimensions=numpy.array(
                                             vector_target_matrix.shape,
                                             dtype=int))

    num_heights = vector_target_matrix.shape[1]
    error_checking.assert_is_greater_numpy_array(heights_m_agl, 0.)
    error_checking.assert_is_numpy_array(heights_m_agl,
                                         exact_dimensions=numpy.array(
                                             [num_heights], dtype=int))

    error_checking.assert_is_numpy_array(numpy.array(example_id_strings),
                                         exact_dimensions=numpy.array(
                                             [num_examples], dtype=int))
    example_utils.parse_example_ids(example_id_strings)

    error_checking.assert_is_string(model_file_name)
    if isotonic_model_file_name is None:
        isotonic_model_file_name = ''
    error_checking.assert_is_string(isotonic_model_file_name)

    # Write to NetCDF file.
    file_system_utils.mkdir_recursive_if_necessary(file_name=netcdf_file_name)
    dataset_object = netCDF4.Dataset(netcdf_file_name,
                                     'w',
                                     format='NETCDF3_64BIT_OFFSET')

    dataset_object.setncattr(MODEL_FILE_KEY, model_file_name)
    dataset_object.setncattr(ISOTONIC_MODEL_FILE_KEY, isotonic_model_file_name)

    num_examples = vector_target_matrix.shape[0]
    dataset_object.createDimension(EXAMPLE_DIMENSION_KEY, num_examples)
    dataset_object.createDimension(HEIGHT_DIMENSION_KEY,
                                   vector_target_matrix.shape[1])
    dataset_object.createDimension(VECTOR_TARGET_DIMENSION_KEY,
                                   vector_target_matrix.shape[2])

    num_scalar_targets = scalar_target_matrix.shape[1]
    if num_scalar_targets > 0:
        dataset_object.createDimension(SCALAR_TARGET_DIMENSION_KEY,
                                       scalar_target_matrix.shape[1])

    if num_examples == 0:
        num_id_characters = 1
    else:
        num_id_characters = numpy.max(
            numpy.array([len(id) for id in example_id_strings]))

    dataset_object.createDimension(EXAMPLE_ID_CHAR_DIM_KEY, num_id_characters)

    this_string_format = 'S{0:d}'.format(num_id_characters)
    example_ids_char_array = netCDF4.stringtochar(
        numpy.array(example_id_strings, dtype=this_string_format))

    dataset_object.createVariable(EXAMPLE_IDS_KEY,
                                  datatype='S1',
                                  dimensions=(EXAMPLE_DIMENSION_KEY,
                                              EXAMPLE_ID_CHAR_DIM_KEY))
    dataset_object.variables[EXAMPLE_IDS_KEY][:] = numpy.array(
        example_ids_char_array)

    dataset_object.createVariable(HEIGHTS_KEY,
                                  datatype=numpy.float32,
                                  dimensions=HEIGHT_DIMENSION_KEY)
    dataset_object.variables[HEIGHTS_KEY][:] = heights_m_agl

    if num_scalar_targets > 0:
        dataset_object.createVariable(SCALAR_TARGETS_KEY,
                                      datatype=numpy.float32,
                                      dimensions=(EXAMPLE_DIMENSION_KEY,
                                                  SCALAR_TARGET_DIMENSION_KEY))
        dataset_object.variables[SCALAR_TARGETS_KEY][:] = scalar_target_matrix

        dataset_object.createVariable(SCALAR_PREDICTIONS_KEY,
                                      datatype=numpy.float32,
                                      dimensions=(EXAMPLE_DIMENSION_KEY,
                                                  SCALAR_TARGET_DIMENSION_KEY))
        dataset_object.variables[SCALAR_PREDICTIONS_KEY][:] = (
            scalar_prediction_matrix)

    these_dimensions = (EXAMPLE_DIMENSION_KEY, HEIGHT_DIMENSION_KEY,
                        VECTOR_TARGET_DIMENSION_KEY)

    dataset_object.createVariable(VECTOR_TARGETS_KEY,
                                  datatype=numpy.float32,
                                  dimensions=these_dimensions)
    dataset_object.variables[VECTOR_TARGETS_KEY][:] = vector_target_matrix

    dataset_object.createVariable(VECTOR_PREDICTIONS_KEY,
                                  datatype=numpy.float32,
                                  dimensions=these_dimensions)
    dataset_object.variables[VECTOR_PREDICTIONS_KEY][:] = (
        vector_prediction_matrix)

    dataset_object.close()
Example #11
0
def get_raw_examples(example_file_name, num_examples, example_dir_name,
                     example_id_file_name):
    """Returns raw examples.

    The difference between `get_raw_examples` and `get_examples_for_inference`
    is that `get_raw_examples` returns examples in their raw form, *not*
    pre-processed to be fed through a model for inference.

    :param example_file_name: See doc for `get_examples_for_inference`.
    :param num_examples: Same.
    :param example_dir_name: Same.
    :param example_id_file_name: Same.
    :return: example_dict: See doc for `example_io.read_file`.
    """

    error_checking.assert_is_string(example_file_name)
    use_specific_ids = example_file_name == ''

    if use_specific_ids:
        error_checking.assert_is_string(example_id_file_name)

        print('Reading desired example IDs from: "{0:s}"...'.format(
            example_id_file_name))
        example_id_strings = read_example_ids_from_netcdf(example_id_file_name)

        valid_times_unix_sec = example_utils.parse_example_ids(
            example_id_strings)[example_utils.VALID_TIMES_KEY]

        example_file_names = example_io.find_many_files(
            directory_name=example_dir_name,
            first_time_unix_sec=numpy.min(valid_times_unix_sec),
            last_time_unix_sec=numpy.max(valid_times_unix_sec))

        num_files = len(example_file_names)
        example_dicts = [dict()] * num_files

        for i in range(num_files):
            print('Reading data from: "{0:s}"...'.format(
                example_file_names[i]))
            example_dicts[i] = example_io.read_file(example_file_names[i])

        example_dict = example_utils.concat_examples(example_dicts)

        good_indices = example_utils.find_examples(
            all_id_strings=example_dict[example_utils.EXAMPLE_IDS_KEY],
            desired_id_strings=example_id_strings,
            allow_missing=False)

        example_dict = example_utils.subset_by_index(
            example_dict=example_dict, desired_indices=good_indices)
    else:
        error_checking.assert_is_string(example_dir_name)
        error_checking.assert_is_integer(num_examples)
        error_checking.assert_is_greater(num_examples, 0)

        print('Reading data from: "{0:s}"...'.format(example_file_name))
        example_dict = example_io.read_file(example_file_name)

        num_examples_total = len(example_dict[example_utils.VALID_TIMES_KEY])
        desired_indices = numpy.linspace(0,
                                         num_examples_total - 1,
                                         num=num_examples_total,
                                         dtype=int)

        if num_examples < num_examples_total:
            desired_indices = numpy.random.choice(desired_indices,
                                                  size=num_examples,
                                                  replace=False)

        example_dict = example_utils.subset_by_index(
            example_dict=example_dict, desired_indices=desired_indices)

    return example_dict
Example #12
0
def _run(tropical_example_dir_name, non_tropical_example_dir_name, year,
         assorted1_example_dir_name, assorted2_example_dir_name):
    """Splits examples into Assorted1 and Assorted2 sites.

    This is effectively the main method.

    :param tropical_example_dir_name: See documentation at top of file.
    :param non_tropical_example_dir_name: Same.
    :param year: Same.
    :param assorted1_example_dir_name: Same.
    :param assorted2_example_dir_name: Same.
    """

    tropical_example_file_name = example_io.find_file(
        directory_name=tropical_example_dir_name, year=year,
        raise_error_if_missing=True
    )
    non_tropical_example_file_name = example_io.find_file(
        directory_name=non_tropical_example_dir_name, year=year,
        raise_error_if_missing=True
    )

    print('Reading data from: "{0:s}"...'.format(tropical_example_file_name))
    tropical_example_dict = example_io.read_file(tropical_example_file_name)

    print('Reading data from: "{0:s}"...'.format(
        non_tropical_example_file_name
    ))
    non_tropical_example_dict = example_io.read_file(
        non_tropical_example_file_name
    )

    example_dict = example_utils.concat_examples([
        tropical_example_dict, non_tropical_example_dict
    ])
    del tropical_example_dict, non_tropical_example_dict

    example_metadata_dict = example_utils.parse_example_ids(
        example_dict[example_utils.EXAMPLE_IDS_KEY]
    )
    example_latitudes_deg_n = example_metadata_dict[example_utils.LATITUDES_KEY]
    example_longitudes_deg_e = lng_conversion.convert_lng_positive_in_west(
        example_metadata_dict[example_utils.LONGITUDES_KEY]
    )

    example_coord_matrix = numpy.transpose(numpy.vstack((
        example_latitudes_deg_n, example_longitudes_deg_e
    )))
    assorted2_coord_matrix = numpy.transpose(numpy.vstack((
        ASSORTED2_LATITUDES_DEG_N, ASSORTED2_LONGITUDES_DEG_E
    )))
    distance_matrix_deg2 = euclidean_distances(
        X=example_coord_matrix, Y=assorted2_coord_matrix, squared=True
    )

    assorted2_flags = numpy.any(distance_matrix_deg2 <= TOLERANCE_DEG2, axis=1)
    assorted2_example_dict = example_utils.subset_by_index(
        example_dict=copy.deepcopy(example_dict),
        desired_indices=numpy.where(assorted2_flags)[0]
    )
    assorted2_example_file_name = example_io.find_file(
        directory_name=assorted2_example_dir_name, year=year,
        raise_error_if_missing=False
    )

    print('Writing {0:d} examples in set Assorted2 to: "{1:s}"...'.format(
        len(assorted2_example_dict[example_utils.VALID_TIMES_KEY]),
        assorted2_example_file_name
    ))
    example_io.write_file(
        example_dict=assorted2_example_dict,
        netcdf_file_name=assorted2_example_file_name
    )

    assorted1_example_dict = example_utils.subset_by_index(
        example_dict=example_dict,
        desired_indices=numpy.where(numpy.invert(assorted2_flags))[0]
    )
    assorted1_example_file_name = example_io.find_file(
        directory_name=assorted1_example_dir_name, year=year,
        raise_error_if_missing=False
    )

    print('Writing {0:d} examples in set Assorted1 to: "{1:s}"...'.format(
        len(assorted1_example_dict[example_utils.VALID_TIMES_KEY]),
        assorted1_example_file_name
    ))
    example_io.write_file(
        example_dict=assorted1_example_dict,
        netcdf_file_name=assorted1_example_file_name
    )