Ejemplo n.º 1
0
    def test_concat_examples_bad_heights(self):
        """Ensures correct output from concat_examples.

        In this case, expecting an error due to mismatched heights.
        """

        this_second_example_dict = copy.deepcopy(SECOND_EXAMPLE_DICT)
        this_second_example_dict[example_utils.HEIGHTS_KEY] += 1

        with self.assertRaises(ValueError):
            example_utils.concat_examples(
                [FIRST_EXAMPLE_DICT, this_second_example_dict])
Ejemplo n.º 2
0
    def test_concat_examples_bad_fields(self):
        """Ensures correct output from concat_examples.

        In this case, expecting an error due to mismatched fields.
        """

        this_second_example_dict = copy.deepcopy(SECOND_EXAMPLE_DICT)

        this_second_example_dict[
            example_utils.SCALAR_PREDICTOR_NAMES_KEY].append(
                example_utils.ALBEDO_NAME)

        with self.assertRaises(ValueError):
            example_utils.concat_examples(
                [FIRST_EXAMPLE_DICT, this_second_example_dict])
Ejemplo n.º 3
0
    def test_concat_examples_good(self):
        """Ensures correct output from concat_examples.

        In this case, not expecting an error.
        """

        this_example_dict = example_utils.concat_examples(
            [FIRST_EXAMPLE_DICT, SECOND_EXAMPLE_DICT])

        self.assertTrue(
            _compare_example_dicts(this_example_dict, CONCAT_EXAMPLE_DICT))
Ejemplo n.º 4
0
def _process_files_one_year(rrtm_directory_name, input_example_dir_name, year):
    """Processes RRTM files for one year.

    :param rrtm_directory_name: See documentation at top of file.
    :param input_example_dir_name: Same.
    :param year: Year (integer).
    :return: example_dict: Dictionary with all examples for the given year.  For
        a list of keys, see `example_io.read_file`.
    """

    rrtm_file_names = _find_rrtm_files(rrtm_directory_name=rrtm_directory_name,
                                       year=year)

    num_files = len(rrtm_file_names)
    example_dicts = [dict()] * num_files

    for i in range(num_files):
        print('Reading data from: "{0:s}"...'.format(rrtm_file_names[i]))
        example_dicts[i] = rrtm_io.read_file(
            netcdf_file_name=rrtm_file_names[i], allow_bad_values=True)

    if input_example_dir_name is None:
        example_file_name = None
    else:
        example_file_name = rrtm_io.find_file(
            directory_name=input_example_dir_name,
            year=year,
            raise_error_if_missing=False)

        if not os.path.isfile(example_file_name):
            example_file_name = None

    if example_file_name is not None:
        print('Reading data from: "{0:s}"...'.format(example_file_name))
        example_dicts.append(rrtm_io.read_file(example_file_name))

    return example_utils.concat_examples(example_dicts)
def _run(tropical_example_dir_name, non_tropical_example_dir_name,
         num_histogram_bins, output_dir_name):
    """Plots distribution of each target variable.

    This is effectively the main method.

    :param tropical_example_dir_name: See documentation at top of file.
    :param non_tropical_example_dir_name: Same.
    :param num_histogram_bins: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    first_time_unix_sec = (
        time_conversion.first_and_last_times_in_year(FIRST_YEAR)[0])
    last_time_unix_sec = (
        time_conversion.first_and_last_times_in_year(LAST_YEAR)[-1])

    example_file_names = example_io.find_many_files(
        directory_name=tropical_example_dir_name,
        first_time_unix_sec=first_time_unix_sec,
        last_time_unix_sec=last_time_unix_sec,
        raise_error_if_all_missing=True,
        raise_error_if_any_missing=True)

    example_file_names += example_io.find_many_files(
        directory_name=non_tropical_example_dir_name,
        first_time_unix_sec=first_time_unix_sec,
        last_time_unix_sec=last_time_unix_sec,
        raise_error_if_all_missing=True,
        raise_error_if_any_missing=True)

    example_dicts = []

    for this_file_name in example_file_names:
        print('Reading data from: "{0:s}"...'.format(this_file_name))
        this_example_dict = example_io.read_file(this_file_name)
        this_example_dict = example_utils.subset_by_field(
            example_dict=this_example_dict, field_names=TARGET_NAMES_IN_FILE)

        example_dicts.append(this_example_dict)

    example_dict = example_utils.concat_examples(example_dicts)
    del example_dicts

    letter_label = None
    panel_file_names = []

    for this_target_name in TARGET_NAMES:
        if this_target_name in TARGET_NAMES_IN_FILE:
            these_target_values = example_utils.get_field_from_dict(
                example_dict=example_dict, field_name=this_target_name)
        else:
            down_fluxes_w_m02 = example_utils.get_field_from_dict(
                example_dict=example_dict,
                field_name=example_utils.SHORTWAVE_SURFACE_DOWN_FLUX_NAME)
            up_fluxes_w_m02 = example_utils.get_field_from_dict(
                example_dict=example_dict,
                field_name=example_utils.SHORTWAVE_TOA_UP_FLUX_NAME)
            these_target_values = down_fluxes_w_m02 - up_fluxes_w_m02

        these_target_values = numpy.ravel(these_target_values)

        if letter_label is None:
            letter_label = 'a'
        else:
            letter_label = chr(ord(letter_label) + 1)

        this_file_name = _plot_histogram_one_target(
            target_values=these_target_values,
            target_name=this_target_name,
            num_bins=num_histogram_bins,
            letter_label=letter_label,
            output_dir_name=output_dir_name)
        panel_file_names.append(this_file_name)

    concat_file_name = '{0:s}/target_distributions.jpg'.format(output_dir_name)
    print('Concatenating panels to: "{0:s}"...'.format(concat_file_name))

    imagemagick_utils.concatenate_images(input_file_names=panel_file_names,
                                         output_file_name=concat_file_name,
                                         num_panel_rows=2,
                                         num_panel_columns=2,
                                         border_width_pixels=25)
    imagemagick_utils.trim_whitespace(input_file_name=concat_file_name,
                                      output_file_name=concat_file_name)
Ejemplo n.º 6
0
def get_raw_examples(example_file_name, num_examples, example_dir_name,
                     example_id_file_name):
    """Returns raw examples.

    The difference between `get_raw_examples` and `get_examples_for_inference`
    is that `get_raw_examples` returns examples in their raw form, *not*
    pre-processed to be fed through a model for inference.

    :param example_file_name: See doc for `get_examples_for_inference`.
    :param num_examples: Same.
    :param example_dir_name: Same.
    :param example_id_file_name: Same.
    :return: example_dict: See doc for `example_io.read_file`.
    """

    error_checking.assert_is_string(example_file_name)
    use_specific_ids = example_file_name == ''

    if use_specific_ids:
        error_checking.assert_is_string(example_id_file_name)

        print('Reading desired example IDs from: "{0:s}"...'.format(
            example_id_file_name))
        example_id_strings = read_example_ids_from_netcdf(example_id_file_name)

        valid_times_unix_sec = example_utils.parse_example_ids(
            example_id_strings)[example_utils.VALID_TIMES_KEY]

        example_file_names = example_io.find_many_files(
            directory_name=example_dir_name,
            first_time_unix_sec=numpy.min(valid_times_unix_sec),
            last_time_unix_sec=numpy.max(valid_times_unix_sec))

        num_files = len(example_file_names)
        example_dicts = [dict()] * num_files

        for i in range(num_files):
            print('Reading data from: "{0:s}"...'.format(
                example_file_names[i]))
            example_dicts[i] = example_io.read_file(example_file_names[i])

        example_dict = example_utils.concat_examples(example_dicts)

        good_indices = example_utils.find_examples(
            all_id_strings=example_dict[example_utils.EXAMPLE_IDS_KEY],
            desired_id_strings=example_id_strings,
            allow_missing=False)

        example_dict = example_utils.subset_by_index(
            example_dict=example_dict, desired_indices=good_indices)
    else:
        error_checking.assert_is_string(example_dir_name)
        error_checking.assert_is_integer(num_examples)
        error_checking.assert_is_greater(num_examples, 0)

        print('Reading data from: "{0:s}"...'.format(example_file_name))
        example_dict = example_io.read_file(example_file_name)

        num_examples_total = len(example_dict[example_utils.VALID_TIMES_KEY])
        desired_indices = numpy.linspace(0,
                                         num_examples_total - 1,
                                         num=num_examples_total,
                                         dtype=int)

        if num_examples < num_examples_total:
            desired_indices = numpy.random.choice(desired_indices,
                                                  size=num_examples,
                                                  replace=False)

        example_dict = example_utils.subset_by_index(
            example_dict=example_dict, desired_indices=desired_indices)

    return example_dict
Ejemplo n.º 7
0
def _run(tropical_example_dir_name, non_tropical_example_dir_name, year,
         assorted1_example_dir_name, assorted2_example_dir_name):
    """Splits examples into Assorted1 and Assorted2 sites.

    This is effectively the main method.

    :param tropical_example_dir_name: See documentation at top of file.
    :param non_tropical_example_dir_name: Same.
    :param year: Same.
    :param assorted1_example_dir_name: Same.
    :param assorted2_example_dir_name: Same.
    """

    tropical_example_file_name = example_io.find_file(
        directory_name=tropical_example_dir_name, year=year,
        raise_error_if_missing=True
    )
    non_tropical_example_file_name = example_io.find_file(
        directory_name=non_tropical_example_dir_name, year=year,
        raise_error_if_missing=True
    )

    print('Reading data from: "{0:s}"...'.format(tropical_example_file_name))
    tropical_example_dict = example_io.read_file(tropical_example_file_name)

    print('Reading data from: "{0:s}"...'.format(
        non_tropical_example_file_name
    ))
    non_tropical_example_dict = example_io.read_file(
        non_tropical_example_file_name
    )

    example_dict = example_utils.concat_examples([
        tropical_example_dict, non_tropical_example_dict
    ])
    del tropical_example_dict, non_tropical_example_dict

    example_metadata_dict = example_utils.parse_example_ids(
        example_dict[example_utils.EXAMPLE_IDS_KEY]
    )
    example_latitudes_deg_n = example_metadata_dict[example_utils.LATITUDES_KEY]
    example_longitudes_deg_e = lng_conversion.convert_lng_positive_in_west(
        example_metadata_dict[example_utils.LONGITUDES_KEY]
    )

    example_coord_matrix = numpy.transpose(numpy.vstack((
        example_latitudes_deg_n, example_longitudes_deg_e
    )))
    assorted2_coord_matrix = numpy.transpose(numpy.vstack((
        ASSORTED2_LATITUDES_DEG_N, ASSORTED2_LONGITUDES_DEG_E
    )))
    distance_matrix_deg2 = euclidean_distances(
        X=example_coord_matrix, Y=assorted2_coord_matrix, squared=True
    )

    assorted2_flags = numpy.any(distance_matrix_deg2 <= TOLERANCE_DEG2, axis=1)
    assorted2_example_dict = example_utils.subset_by_index(
        example_dict=copy.deepcopy(example_dict),
        desired_indices=numpy.where(assorted2_flags)[0]
    )
    assorted2_example_file_name = example_io.find_file(
        directory_name=assorted2_example_dir_name, year=year,
        raise_error_if_missing=False
    )

    print('Writing {0:d} examples in set Assorted2 to: "{1:s}"...'.format(
        len(assorted2_example_dict[example_utils.VALID_TIMES_KEY]),
        assorted2_example_file_name
    ))
    example_io.write_file(
        example_dict=assorted2_example_dict,
        netcdf_file_name=assorted2_example_file_name
    )

    assorted1_example_dict = example_utils.subset_by_index(
        example_dict=example_dict,
        desired_indices=numpy.where(numpy.invert(assorted2_flags))[0]
    )
    assorted1_example_file_name = example_io.find_file(
        directory_name=assorted1_example_dir_name, year=year,
        raise_error_if_missing=False
    )

    print('Writing {0:d} examples in set Assorted1 to: "{1:s}"...'.format(
        len(assorted1_example_dict[example_utils.VALID_TIMES_KEY]),
        assorted1_example_file_name
    ))
    example_io.write_file(
        example_dict=assorted1_example_dict,
        netcdf_file_name=assorted1_example_file_name
    )