Example #1
0
def _run(example_file_name, output_dir_name):
    """Plots predictors and targets for one example.

    This is effectively the main method.

    :param example_file_name: See documentation at top of file.
    :param output_dir_name: Same.
    """

    print('Reading data from: "{0:s}"...'.format(example_file_name))
    example_dict = example_io.read_file(example_file_name)
    cloud_layer_counts = example_utils.find_cloud_layers(
        example_dict=example_dict,
        min_path_kg_m02=MIN_CLOUD_LAYER_PATH_KG_M02,
        for_ice=False)[1]

    desired_indices = numpy.where(cloud_layer_counts > 1)[0]
    example_dict = example_utils.subset_by_index(
        example_dict=example_dict, desired_indices=desired_indices)

    liquid_water_paths_kg_m02 = example_utils.get_field_from_dict(
        example_dict=example_dict,
        field_name=example_utils.LIQUID_WATER_PATH_NAME,
        height_m_agl=10.)
    sort_indices = numpy.argsort(-1 * liquid_water_paths_kg_m02)
    desired_index = sort_indices[10]

    _do_plotting(example_dict=example_dict,
                 example_index=desired_index,
                 output_dir_name=output_dir_name)
def _run(tropical_example_dir_name, non_tropical_example_dir_name,
         num_histogram_bins, output_dir_name):
    """Plots distribution of each target variable.

    This is effectively the main method.

    :param tropical_example_dir_name: See documentation at top of file.
    :param non_tropical_example_dir_name: Same.
    :param num_histogram_bins: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    first_time_unix_sec = (
        time_conversion.first_and_last_times_in_year(FIRST_YEAR)[0])
    last_time_unix_sec = (
        time_conversion.first_and_last_times_in_year(LAST_YEAR)[-1])

    example_file_names = example_io.find_many_files(
        directory_name=tropical_example_dir_name,
        first_time_unix_sec=first_time_unix_sec,
        last_time_unix_sec=last_time_unix_sec,
        raise_error_if_all_missing=True,
        raise_error_if_any_missing=True)

    example_file_names += example_io.find_many_files(
        directory_name=non_tropical_example_dir_name,
        first_time_unix_sec=first_time_unix_sec,
        last_time_unix_sec=last_time_unix_sec,
        raise_error_if_all_missing=True,
        raise_error_if_any_missing=True)

    example_dicts = []

    for this_file_name in example_file_names:
        print('Reading data from: "{0:s}"...'.format(this_file_name))
        this_example_dict = example_io.read_file(this_file_name)
        this_example_dict = example_utils.subset_by_field(
            example_dict=this_example_dict, field_names=TARGET_NAMES_IN_FILE)

        example_dicts.append(this_example_dict)

    example_dict = example_utils.concat_examples(example_dicts)
    del example_dicts

    letter_label = None
    panel_file_names = []

    for this_target_name in TARGET_NAMES:
        if this_target_name in TARGET_NAMES_IN_FILE:
            these_target_values = example_utils.get_field_from_dict(
                example_dict=example_dict, field_name=this_target_name)
        else:
            down_fluxes_w_m02 = example_utils.get_field_from_dict(
                example_dict=example_dict,
                field_name=example_utils.SHORTWAVE_SURFACE_DOWN_FLUX_NAME)
            up_fluxes_w_m02 = example_utils.get_field_from_dict(
                example_dict=example_dict,
                field_name=example_utils.SHORTWAVE_TOA_UP_FLUX_NAME)
            these_target_values = down_fluxes_w_m02 - up_fluxes_w_m02

        these_target_values = numpy.ravel(these_target_values)

        if letter_label is None:
            letter_label = 'a'
        else:
            letter_label = chr(ord(letter_label) + 1)

        this_file_name = _plot_histogram_one_target(
            target_values=these_target_values,
            target_name=this_target_name,
            num_bins=num_histogram_bins,
            letter_label=letter_label,
            output_dir_name=output_dir_name)
        panel_file_names.append(this_file_name)

    concat_file_name = '{0:s}/target_distributions.jpg'.format(output_dir_name)
    print('Concatenating panels to: "{0:s}"...'.format(concat_file_name))

    imagemagick_utils.concatenate_images(input_file_names=panel_file_names,
                                         output_file_name=concat_file_name,
                                         num_panel_rows=2,
                                         num_panel_columns=2,
                                         border_width_pixels=25)
    imagemagick_utils.trim_whitespace(input_file_name=concat_file_name,
                                      output_file_name=concat_file_name)
Example #3
0
def _run(model_file_name, example_dir_name, first_time_string,
         last_time_string, exclude_summit_greenland, output_file_name):
    """Applies trained neural net in inference mode.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param example_dir_name: Same.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param exclude_summit_greenland: Same.
    :param output_file_name: Same.
    """

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, TIME_FORMAT)

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = neural_net.read_model(model_file_name)

    metafile_name = neural_net.find_metafile(
        model_dir_name=os.path.split(model_file_name)[0],
        raise_error_if_missing=True)

    print('Reading metadata from: "{0:s}"...'.format(metafile_name))
    metadata_dict = neural_net.read_metafile(metafile_name)

    generator_option_dict = copy.deepcopy(
        metadata_dict[neural_net.TRAINING_OPTIONS_KEY])
    generator_option_dict[neural_net.EXAMPLE_DIRECTORY_KEY] = example_dir_name
    generator_option_dict[neural_net.FIRST_TIME_KEY] = first_time_unix_sec
    generator_option_dict[neural_net.LAST_TIME_KEY] = last_time_unix_sec

    vector_target_norm_type_string = copy.deepcopy(
        generator_option_dict[neural_net.VECTOR_TARGET_NORM_TYPE_KEY])
    scalar_target_norm_type_string = copy.deepcopy(
        generator_option_dict[neural_net.SCALAR_TARGET_NORM_TYPE_KEY])
    generator_option_dict[neural_net.VECTOR_TARGET_NORM_TYPE_KEY] = None
    generator_option_dict[neural_net.SCALAR_TARGET_NORM_TYPE_KEY] = None

    net_type_string = metadata_dict[neural_net.NET_TYPE_KEY]
    predictor_matrix, target_array, example_id_strings = neural_net.create_data(
        option_dict=generator_option_dict,
        for_inference=True,
        net_type_string=net_type_string,
        exclude_summit_greenland=exclude_summit_greenland)
    print(SEPARATOR_STRING)

    exec_start_time_unix_sec = time.time()
    prediction_array = neural_net.apply_model(
        model_object=model_object,
        predictor_matrix=predictor_matrix,
        num_examples_per_batch=NUM_EXAMPLES_PER_BATCH,
        net_type_string=net_type_string,
        verbose=True)

    print(SEPARATOR_STRING)
    print('Time to apply neural net = {0:.4f} seconds'.format(
        time.time() - exec_start_time_unix_sec))

    vector_target_matrix = target_array[0]
    vector_prediction_matrix = prediction_array[0]

    if len(target_array) == 2:
        scalar_target_matrix = target_array[1]
        scalar_prediction_matrix = prediction_array[1]
    else:
        scalar_target_matrix = None
        scalar_prediction_matrix = None

    target_example_dict = _targets_numpy_to_dict(
        scalar_target_matrix=scalar_target_matrix,
        vector_target_matrix=vector_target_matrix,
        model_metadata_dict=metadata_dict)

    prediction_example_dict = _targets_numpy_to_dict(
        scalar_target_matrix=scalar_prediction_matrix,
        vector_target_matrix=vector_prediction_matrix,
        model_metadata_dict=metadata_dict)

    normalization_file_name = (
        generator_option_dict[neural_net.NORMALIZATION_FILE_KEY])
    print(('Reading training examples (for normalization) from: "{0:s}"...'
           ).format(normalization_file_name))
    training_example_dict = example_io.read_file(normalization_file_name)
    training_example_dict = example_utils.subset_by_height(
        example_dict=training_example_dict,
        heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY])

    num_examples = len(example_id_strings)
    num_heights = len(prediction_example_dict[example_utils.HEIGHTS_KEY])

    this_dict = {
        example_utils.VECTOR_PREDICTOR_NAMES_KEY: [],
        example_utils.VECTOR_PREDICTOR_VALS_KEY:
        numpy.full((num_examples, num_heights, 0), 0.),
        example_utils.SCALAR_PREDICTOR_NAMES_KEY: [],
        example_utils.SCALAR_PREDICTOR_VALS_KEY:
        numpy.full((num_examples, 0), 0.)
    }

    target_example_dict.update(this_dict)
    prediction_example_dict.update(this_dict)

    if vector_target_norm_type_string is not None:
        print('Denormalizing predicted vectors...')

        # down_flux_inc_matrix_w_m03 = example_utils.get_field_from_dict(
        #     example_dict=prediction_example_dict,
        #     field_name=example_utils.SHORTWAVE_DOWN_FLUX_INC_NAME
        # )
        # print(down_flux_inc_matrix_w_m03[0, ...])
        # print('\n')

        prediction_example_dict = normalization.denormalize_data(
            new_example_dict=prediction_example_dict,
            training_example_dict=training_example_dict,
            normalization_type_string=vector_target_norm_type_string,
            min_normalized_value=generator_option_dict[
                neural_net.VECTOR_TARGET_MIN_VALUE_KEY],
            max_normalized_value=generator_option_dict[
                neural_net.VECTOR_TARGET_MAX_VALUE_KEY],
            separate_heights=True,
            apply_to_predictors=False,
            apply_to_vector_targets=True,
            apply_to_scalar_targets=False)

        # down_flux_inc_matrix_w_m03 = example_utils.get_field_from_dict(
        #     example_dict=prediction_example_dict,
        #     field_name=example_utils.SHORTWAVE_DOWN_FLUX_INC_NAME
        # )
        # print(down_flux_inc_matrix_w_m03[0, ...])
        # print('\n\n\n')

    if scalar_target_norm_type_string is not None:
        print('Denormalizing predicted scalars...')

        prediction_example_dict = normalization.denormalize_data(
            new_example_dict=prediction_example_dict,
            training_example_dict=training_example_dict,
            normalization_type_string=scalar_target_norm_type_string,
            min_normalized_value=generator_option_dict[
                neural_net.SCALAR_TARGET_MIN_VALUE_KEY],
            max_normalized_value=generator_option_dict[
                neural_net.SCALAR_TARGET_MAX_VALUE_KEY],
            separate_heights=True,
            apply_to_predictors=False,
            apply_to_vector_targets=False,
            apply_to_scalar_targets=True)

    add_heating_rate = generator_option_dict[neural_net.OMIT_HEATING_RATE_KEY]

    if add_heating_rate:
        pressure_matrix_pascals = _get_unnormalized_pressure(
            model_metadata_dict=metadata_dict,
            example_id_strings=example_id_strings)

        prediction_example_dict = _get_predicted_heating_rates(
            prediction_example_dict=prediction_example_dict,
            pressure_matrix_pascals=pressure_matrix_pascals,
            model_metadata_dict=metadata_dict)

    vector_target_names = (
        generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY])

    if example_utils.SHORTWAVE_HEATING_RATE_NAME in vector_target_names:
        heating_rate_index = vector_target_names.index(
            example_utils.SHORTWAVE_HEATING_RATE_NAME)

        heights_m_agl = generator_option_dict[neural_net.HEIGHTS_KEY]
        height_indices = numpy.where(
            heights_m_agl >= ZERO_HEATING_HEIGHT_M_AGL)[0]

        vector_target_matrix = (
            prediction_example_dict[example_utils.VECTOR_TARGET_VALS_KEY])
        vector_target_matrix[..., heating_rate_index][..., height_indices] = 0.
        prediction_example_dict[example_utils.VECTOR_TARGET_VALS_KEY] = (
            vector_target_matrix)

    all_heights_m_agl = generator_option_dict[neural_net.HEIGHTS_KEY]
    desired_heights_m_agl = (
        all_heights_m_agl[all_heights_m_agl < MAX_HEIGHT_M_AGL])

    target_example_dict = example_utils.subset_by_height(
        example_dict=target_example_dict, heights_m_agl=desired_heights_m_agl)
    prediction_example_dict = example_utils.subset_by_height(
        example_dict=prediction_example_dict,
        heights_m_agl=desired_heights_m_agl)

    print('Writing target (actual) and predicted values to: "{0:s}"...'.format(
        output_file_name))
    prediction_io.write_file(netcdf_file_name=output_file_name,
                             scalar_target_matrix=target_example_dict[
                                 example_utils.SCALAR_TARGET_VALS_KEY],
                             vector_target_matrix=target_example_dict[
                                 example_utils.VECTOR_TARGET_VALS_KEY],
                             scalar_prediction_matrix=prediction_example_dict[
                                 example_utils.SCALAR_TARGET_VALS_KEY],
                             vector_prediction_matrix=prediction_example_dict[
                                 example_utils.VECTOR_TARGET_VALS_KEY],
                             heights_m_agl=desired_heights_m_agl,
                             example_id_strings=example_id_strings,
                             model_file_name=model_file_name)
Example #4
0
def _run(evaluation_file_names, line_styles, line_colour_strings,
         set_descriptions_verbose, confidence_level, use_log_scale,
         plot_by_height, output_dir_name):
    """Plots model evaluation.

    This is effectively the main method.

    :param evaluation_file_names: See documentation at top of file.
    :param line_styles: Same.
    :param line_colour_strings: Same.
    :param set_descriptions_verbose: Same.
    :param confidence_level: Same.
    :param use_log_scale: Same.
    :param plot_by_height: Same.
    :param output_dir_name: Same.
    """

    # Check input args.
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    if confidence_level < 0:
        confidence_level = None

    if confidence_level is not None:
        error_checking.assert_is_geq(confidence_level, 0.9)
        error_checking.assert_is_less_than(confidence_level, 1.)

    num_evaluation_sets = len(evaluation_file_names)
    expected_dim = numpy.array([num_evaluation_sets], dtype=int)

    error_checking.assert_is_string_list(line_styles)
    error_checking.assert_is_numpy_array(numpy.array(line_styles),
                                         exact_dimensions=expected_dim)

    error_checking.assert_is_string_list(set_descriptions_verbose)
    error_checking.assert_is_numpy_array(numpy.array(set_descriptions_verbose),
                                         exact_dimensions=expected_dim)

    set_descriptions_verbose = [
        s.replace('_', ' ') for s in set_descriptions_verbose
    ]
    set_descriptions_abbrev = [
        s.lower().replace(' ', '-') for s in set_descriptions_verbose
    ]

    error_checking.assert_is_string_list(line_colour_strings)
    error_checking.assert_is_numpy_array(numpy.array(line_colour_strings),
                                         exact_dimensions=expected_dim)
    line_colours = [
        numpy.fromstring(s, dtype=float, sep='_') / 255
        for s in line_colour_strings
    ]

    for i in range(num_evaluation_sets):
        error_checking.assert_is_numpy_array(line_colours[i],
                                             exact_dimensions=numpy.array(
                                                 [3], dtype=int))
        error_checking.assert_is_geq_numpy_array(line_colours[i], 0.)
        error_checking.assert_is_leq_numpy_array(line_colours[i], 1.)

    # Read files.
    evaluation_tables_xarray = [xarray.Dataset()] * num_evaluation_sets
    prediction_dicts = [dict()] * num_evaluation_sets

    for i in range(num_evaluation_sets):
        print('Reading data from: "{0:s}"...'.format(evaluation_file_names[i]))
        evaluation_tables_xarray[i] = evaluation.read_file(
            evaluation_file_names[i])

        this_prediction_file_name = (
            evaluation_tables_xarray[i].attrs[evaluation.PREDICTION_FILE_KEY])

        print(
            'Reading data from: "{0:s}"...'.format(this_prediction_file_name))
        prediction_dicts[i] = prediction_io.read_file(
            this_prediction_file_name)

    model_file_name = (
        evaluation_tables_xarray[0].attrs[evaluation.MODEL_FILE_KEY])
    model_metafile_name = neural_net.find_metafile(
        model_dir_name=os.path.split(model_file_name)[0],
        raise_error_if_missing=True)

    print('Reading metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = neural_net.read_metafile(model_metafile_name)
    generator_option_dict = model_metadata_dict[
        neural_net.TRAINING_OPTIONS_KEY]

    scalar_target_names = (
        generator_option_dict[neural_net.SCALAR_TARGET_NAMES_KEY])
    vector_target_names = (
        generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY])
    heights_m_agl = generator_option_dict[neural_net.HEIGHTS_KEY]

    try:
        t = evaluation_tables_xarray[0]
        aux_target_names = t.coords[evaluation.AUX_TARGET_FIELD_DIM].values
    except:
        aux_target_names = []

    num_scalar_targets = len(scalar_target_names)
    num_vector_targets = len(vector_target_names)
    num_heights = len(heights_m_agl)
    num_aux_targets = len(aux_target_names)

    example_dict = {
        example_utils.SCALAR_TARGET_NAMES_KEY:
        scalar_target_names,
        example_utils.VECTOR_TARGET_NAMES_KEY:
        vector_target_names,
        example_utils.HEIGHTS_KEY:
        heights_m_agl,
        example_utils.SCALAR_PREDICTOR_NAMES_KEY:
        generator_option_dict[neural_net.SCALAR_PREDICTOR_NAMES_KEY],
        example_utils.VECTOR_PREDICTOR_NAMES_KEY:
        generator_option_dict[neural_net.VECTOR_PREDICTOR_NAMES_KEY]
    }

    normalization_file_name = (
        generator_option_dict[neural_net.NORMALIZATION_FILE_KEY])
    print(('Reading training examples (for climatology) from: "{0:s}"...'
           ).format(normalization_file_name))

    training_example_dict = example_io.read_file(normalization_file_name)
    training_example_dict = example_utils.subset_by_height(
        example_dict=training_example_dict, heights_m_agl=heights_m_agl)
    mean_training_example_dict = normalization.create_mean_example(
        new_example_dict=example_dict,
        training_example_dict=training_example_dict)

    print(SEPARATOR_STRING)

    # Do actual stuff.
    _plot_error_distributions(
        prediction_dicts=prediction_dicts,
        model_metadata_dict=model_metadata_dict,
        aux_target_names=aux_target_names,
        set_descriptions_abbrev=set_descriptions_abbrev,
        set_descriptions_verbose=set_descriptions_verbose,
        output_dir_name=output_dir_name)
    print(SEPARATOR_STRING)

    _plot_reliability_by_height(
        evaluation_tables_xarray=evaluation_tables_xarray,
        vector_target_names=vector_target_names,
        heights_m_agl=heights_m_agl,
        set_descriptions_abbrev=set_descriptions_abbrev,
        set_descriptions_verbose=set_descriptions_verbose,
        output_dir_name=output_dir_name)
    print(SEPARATOR_STRING)

    for k in range(num_vector_targets):
        for this_score_name in list(SCORE_NAME_TO_PROFILE_KEY.keys()):
            _plot_score_profile(
                evaluation_tables_xarray=evaluation_tables_xarray,
                line_styles=line_styles,
                line_colours=line_colours,
                set_descriptions_verbose=set_descriptions_verbose,
                confidence_level=confidence_level,
                target_name=vector_target_names[k],
                score_name=this_score_name,
                use_log_scale=use_log_scale,
                output_dir_name=output_dir_name)

    print(SEPARATOR_STRING)

    for k in range(num_scalar_targets):
        _plot_attributes_diagram(
            evaluation_tables_xarray=evaluation_tables_xarray,
            line_styles=line_styles,
            line_colours=line_colours,
            set_descriptions_abbrev=set_descriptions_abbrev,
            set_descriptions_verbose=set_descriptions_verbose,
            confidence_level=confidence_level,
            mean_training_example_dict=mean_training_example_dict,
            target_name=scalar_target_names[k],
            output_dir_name=output_dir_name)

    for k in range(num_aux_targets):
        _plot_attributes_diagram(
            evaluation_tables_xarray=evaluation_tables_xarray,
            line_styles=line_styles,
            line_colours=line_colours,
            set_descriptions_abbrev=set_descriptions_abbrev,
            set_descriptions_verbose=set_descriptions_verbose,
            confidence_level=confidence_level,
            mean_training_example_dict=mean_training_example_dict,
            target_name=aux_target_names[k],
            output_dir_name=output_dir_name)

    if not plot_by_height:
        return

    print(SEPARATOR_STRING)

    for k in range(num_vector_targets):
        for j in range(num_heights):
            _plot_attributes_diagram(
                evaluation_tables_xarray=evaluation_tables_xarray,
                line_styles=line_styles,
                line_colours=line_colours,
                set_descriptions_abbrev=set_descriptions_abbrev,
                set_descriptions_verbose=set_descriptions_verbose,
                confidence_level=confidence_level,
                mean_training_example_dict=mean_training_example_dict,
                height_m_agl=heights_m_agl[j],
                target_name=vector_target_names[k],
                output_dir_name=output_dir_name)

        if k != num_vector_targets - 1:
            print(SEPARATOR_STRING)
Example #5
0
def _run(example_file_name, num_examples, choose_max_heating_rate,
         max_noise_k_day01, pressure_cutoffs_pa, pressure_spacings_pa,
         first_interp_method_name, second_interp_method_name, interp_fluxes,
         output_dir_name):
    """Runs interpolation experiment.

    This is effectively the main method.

    :param example_file_name: See documentation at top of file.
    :param num_examples: Same.
    :param choose_max_heating_rate: Same.
    :param max_noise_k_day01: Same.
    :param pressure_cutoffs_pa: Same.
    :param pressure_spacings_pa: Same.
    :param first_interp_method_name: Same.
    :param second_interp_method_name: Same.
    :param interp_fluxes: Same.
    :param output_dir_name: Same.
    """

    if interp_fluxes:
        max_noise_k_day01 = 0.

    error_checking.assert_is_greater(num_examples, 0)
    error_checking.assert_is_geq(max_noise_k_day01, 0.)

    error_checking.assert_is_geq_numpy_array(pressure_cutoffs_pa, 0.)
    error_checking.assert_is_greater_numpy_array(
        numpy.diff(pressure_cutoffs_pa), 0.)
    error_checking.assert_is_greater_numpy_array(pressure_spacings_pa, 0.)

    num_spacings = len(pressure_spacings_pa)
    expected_dim = numpy.array([num_spacings + 1], dtype=int)
    error_checking.assert_is_numpy_array(pressure_cutoffs_pa,
                                         exact_dimensions=expected_dim)

    high_res_pressures_pa = numpy.array([], dtype=float)

    for i in range(num_spacings):
        this_num_pressures = int(
            numpy.ceil(1 +
                       (pressure_cutoffs_pa[i + 1] - pressure_cutoffs_pa[i]) /
                       pressure_spacings_pa[i]))
        these_pressures_pa = numpy.linspace(pressure_cutoffs_pa[i],
                                            pressure_cutoffs_pa[i + 1],
                                            num=this_num_pressures,
                                            dtype=float)

        if i != num_spacings - 1:
            these_pressures_pa = these_pressures_pa[:-1]

        high_res_pressures_pa = numpy.concatenate(
            (high_res_pressures_pa, these_pressures_pa))

    print('Number of levels in high-resolution grid = {0:d}'.format(
        len(high_res_pressures_pa)))

    if high_res_pressures_pa[0] < TOLERANCE:
        high_res_pressures_pa[0] = 0.5 * high_res_pressures_pa[1]

    high_res_pressures_pa = high_res_pressures_pa[::-1]
    high_res_heights_m_asl = standard_atmo.pressure_to_height(
        high_res_pressures_pa)

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    print('Reading data from: "{0:s}"...'.format(example_file_name))
    example_dict = example_io.read_file(example_file_name)

    heating_rate_matrix_k_day01 = example_utils.get_field_from_dict(
        example_dict=example_dict,
        field_name=example_utils.SHORTWAVE_HEATING_RATE_NAME)

    if choose_max_heating_rate:
        hr_criterion_by_example = numpy.max(heating_rate_matrix_k_day01,
                                            axis=1)
    else:
        abs_diff_matrix = numpy.absolute(
            numpy.diff(heating_rate_matrix_k_day01[:, :-1], axis=1))
        hr_criterion_by_example = numpy.max(abs_diff_matrix, axis=1)

    good_indices = numpy.argsort(-1 * hr_criterion_by_example)
    good_indices = good_indices[:num_examples]
    example_dict = example_utils.subset_by_index(example_dict=example_dict,
                                                 desired_indices=good_indices)

    num_examples = len(good_indices)
    max_differences_k_day01 = numpy.full(num_examples, numpy.nan)

    for i in range(num_examples):
        max_differences_k_day01[i] = _run_experiment_one_example(
            example_dict=example_dict,
            example_index=i,
            max_noise_k_day01=max_noise_k_day01,
            high_res_pressures_pa=high_res_pressures_pa,
            high_res_heights_m_asl=high_res_heights_m_asl,
            first_interp_method_name=first_interp_method_name,
            second_interp_method_name=second_interp_method_name,
            interp_fluxes=interp_fluxes,
            output_dir_name=output_dir_name)

    print('Average max difference = {0:.4f} K day^-1'.format(
        numpy.mean(max_differences_k_day01)))
    print('Median max difference = {0:.4f} K day^-1'.format(
        numpy.median(max_differences_k_day01)))
    print('Max max difference = {0:.4f} K day^-1'.format(
        numpy.max(max_differences_k_day01)))
def _run(model_file_name, example_file_name, num_examples, example_dir_name,
         example_id_file_name, layer_name, neuron_indices, ideal_activation,
         num_iterations, learning_rate, l2_weight, output_file_name):
    """Runs backwards optimization.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param example_file_name: Same.
    :param num_examples: Same.
    :param example_dir_name: Same.
    :param example_id_file_name: Same.
    :param layer_name: Same.
    :param neuron_indices: Same.
    :param ideal_activation: Same.
    :param num_iterations: Same.
    :param learning_rate: Same.
    :param l2_weight: Same.
    :param output_file_name: Same.
    """

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = neural_net.read_model(model_file_name)

    metafile_name = neural_net.find_metafile(
        model_dir_name=os.path.split(model_file_name)[0],
        raise_error_if_missing=True
    )

    print('Reading metadata from: "{0:s}"...'.format(metafile_name))
    metadata_dict = neural_net.read_metafile(metafile_name)

    predictor_matrix, _, example_id_strings = (
        misc_utils.get_examples_for_inference(
            model_metadata_dict=metadata_dict,
            example_file_name=example_file_name,
            num_examples=num_examples, example_dir_name=example_dir_name,
            example_id_file_name=example_id_file_name
        )
    )
    print(SEPARATOR_STRING)

    generator_option_dict = metadata_dict[neural_net.TRAINING_OPTIONS_KEY]
    normalization_file_name = (
        generator_option_dict[neural_net.NORMALIZATION_FILE_KEY]
    )

    print((
        'Reading training examples (for normalization) from: "{0:s}"...'
    ).format(
        normalization_file_name
    ))
    training_example_dict = example_io.read_file(normalization_file_name)
    training_example_dict = example_utils.subset_by_height(
        example_dict=training_example_dict,
        heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY]
    )

    num_examples = len(example_id_strings)
    bwo_dict = None

    for i in range(num_examples):
        this_bwo_dict = bwo.optimize_input_for_neuron(
            model_object=model_object,
            init_function_or_matrix=predictor_matrix[i, ...],
            layer_name=layer_name, neuron_indices=neuron_indices,
            ideal_activation=ideal_activation, num_iterations=num_iterations,
            learning_rate=learning_rate, l2_weight=l2_weight
        )

        if i == num_examples - 1:
            print(SEPARATOR_STRING)
        else:
            print(MINOR_SEPARATOR_STRING)

        if bwo_dict is None:
            these_dim = numpy.array(
                (num_examples,) +
                this_bwo_dict[bwo.INITIAL_PREDICTORS_KEY].shape[1:],
                dtype=int
            )

            bwo_dict = {
                bwo.INITIAL_PREDICTORS_KEY: numpy.full(these_dim, numpy.nan),
                bwo.FINAL_PREDICTORS_KEY: numpy.full(these_dim, numpy.nan),
                bwo.INITIAL_ACTIVATIONS_KEY:
                    numpy.full(num_examples, numpy.nan),
                bwo.FINAL_ACTIVATIONS_KEY: numpy.full(num_examples, numpy.nan)
            }

        bwo_dict[bwo.INITIAL_PREDICTORS_KEY][i, ...] = (
            this_bwo_dict[bwo.INITIAL_PREDICTORS_KEY][0, ...]
        )
        bwo_dict[bwo.FINAL_PREDICTORS_KEY][i, ...] = (
            this_bwo_dict[bwo.FINAL_PREDICTORS_KEY][0, ...]
        )
        bwo_dict[bwo.INITIAL_ACTIVATIONS_KEY][i] = (
            this_bwo_dict[bwo.INITIAL_ACTIVATION_KEY]
        )
        bwo_dict[bwo.FINAL_ACTIVATIONS_KEY][i] = (
            this_bwo_dict[bwo.FINAL_ACTIVATION_KEY]
        )

    if example_file_name == '':
        example_file_name = example_io.find_many_files(
            directory_name=example_dir_name,
            first_time_unix_sec=0, last_time_unix_sec=int(1e12),
            raise_error_if_any_missing=False, raise_error_if_all_missing=True
        )[0]

    first_example_dict = example_io.read_file(example_file_name)
    first_example_dict = example_utils.subset_by_height(
        example_dict=first_example_dict,
        heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY]
    )

    net_type_string = metadata_dict[neural_net.NET_TYPE_KEY]

    init_example_dict = copy.deepcopy(first_example_dict)
    this_example_dict = neural_net.predictors_numpy_to_dict(
        predictor_matrix=bwo_dict[bwo.INITIAL_PREDICTORS_KEY],
        example_dict=init_example_dict, net_type_string=net_type_string
    )
    init_example_dict.update(this_example_dict)

    if generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY] is not None:
        init_example_dict = normalization.denormalize_data(
            new_example_dict=init_example_dict,
            training_example_dict=training_example_dict,
            normalization_type_string=
            generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY],
            min_normalized_value=
            generator_option_dict[neural_net.PREDICTOR_MIN_NORM_VALUE_KEY],
            max_normalized_value=
            generator_option_dict[neural_net.PREDICTOR_MAX_NORM_VALUE_KEY],
            separate_heights=True, apply_to_predictors=True,
            apply_to_vector_targets=False, apply_to_scalar_targets=False
        )

    init_scalar_predictor_matrix = (
        init_example_dict[example_utils.SCALAR_PREDICTOR_VALS_KEY]
    )
    init_vector_predictor_matrix = (
        init_example_dict[example_utils.VECTOR_PREDICTOR_VALS_KEY]
    )

    final_example_dict = copy.deepcopy(first_example_dict)
    this_example_dict = neural_net.predictors_numpy_to_dict(
        predictor_matrix=bwo_dict[bwo.FINAL_PREDICTORS_KEY],
        example_dict=final_example_dict, net_type_string=net_type_string
    )
    final_example_dict.update(this_example_dict)

    if generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY] is not None:
        final_example_dict = normalization.denormalize_data(
            new_example_dict=final_example_dict,
            training_example_dict=training_example_dict,
            normalization_type_string=
            generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY],
            min_normalized_value=
            generator_option_dict[neural_net.PREDICTOR_MIN_NORM_VALUE_KEY],
            max_normalized_value=
            generator_option_dict[neural_net.PREDICTOR_MAX_NORM_VALUE_KEY],
            separate_heights=True, apply_to_predictors=True,
            apply_to_vector_targets=False, apply_to_scalar_targets=False
        )

    final_scalar_predictor_matrix = (
        final_example_dict[example_utils.SCALAR_PREDICTOR_VALS_KEY]
    )
    final_vector_predictor_matrix = (
        final_example_dict[example_utils.VECTOR_PREDICTOR_VALS_KEY]
    )

    print('Writing results to file: "{0:s}"...'.format(output_file_name))
    bwo.write_file(
        netcdf_file_name=output_file_name,
        init_scalar_predictor_matrix=init_scalar_predictor_matrix,
        final_scalar_predictor_matrix=final_scalar_predictor_matrix,
        init_vector_predictor_matrix=init_vector_predictor_matrix,
        final_vector_predictor_matrix=final_vector_predictor_matrix,
        initial_activations=bwo_dict[bwo.INITIAL_ACTIVATIONS_KEY],
        final_activations=bwo_dict[bwo.FINAL_ACTIVATIONS_KEY],
        example_id_strings=example_id_strings, model_file_name=model_file_name,
        layer_name=layer_name, neuron_indices=neuron_indices,
        ideal_activation=ideal_activation, num_iterations=num_iterations,
        learning_rate=learning_rate, l2_weight=l2_weight
    )
Example #7
0
def _run(tropical_example_dir_name, non_tropical_example_dir_name,
         output_file_name):
    """Plots all sites wtih data.

    This is effectively the main method.

    :param tropical_example_dir_name: See documentation at top of file.
    :param non_tropical_example_dir_name: Same.
    :param output_file_name: Same.
    """

    first_time_unix_sec = (
        time_conversion.first_and_last_times_in_year(FIRST_YEAR)[0])
    last_time_unix_sec = (
        time_conversion.first_and_last_times_in_year(LAST_YEAR)[-1])

    tropical_file_names = example_io.find_many_files(
        directory_name=tropical_example_dir_name,
        first_time_unix_sec=first_time_unix_sec,
        last_time_unix_sec=last_time_unix_sec,
        raise_error_if_all_missing=True,
        raise_error_if_any_missing=False)

    non_tropical_file_names = example_io.find_many_files(
        directory_name=non_tropical_example_dir_name,
        first_time_unix_sec=first_time_unix_sec,
        last_time_unix_sec=last_time_unix_sec,
        raise_error_if_all_missing=True,
        raise_error_if_any_missing=False)

    latitudes_deg_n = numpy.array([])
    longitudes_deg_e = numpy.array([])

    for this_file_name in tropical_file_names:
        print('Reading data from: "{0:s}"...'.format(this_file_name))
        this_example_dict = example_io.read_file(this_file_name)

        these_latitudes_deg_n = example_utils.get_field_from_dict(
            example_dict=this_example_dict,
            field_name=example_utils.LATITUDE_NAME)
        these_longitudes_deg_e = example_utils.get_field_from_dict(
            example_dict=this_example_dict,
            field_name=example_utils.LONGITUDE_NAME)

        latitudes_deg_n = numpy.concatenate(
            (latitudes_deg_n, these_latitudes_deg_n))
        longitudes_deg_e = numpy.concatenate(
            (longitudes_deg_e, these_longitudes_deg_e))

    for this_file_name in non_tropical_file_names:
        print('Reading data from: "{0:s}"...'.format(this_file_name))
        this_example_dict = example_io.read_file(this_file_name)

        these_latitudes_deg_n = example_utils.get_field_from_dict(
            example_dict=this_example_dict,
            field_name=example_utils.LATITUDE_NAME)
        these_longitudes_deg_e = example_utils.get_field_from_dict(
            example_dict=this_example_dict,
            field_name=example_utils.LONGITUDE_NAME)

        latitudes_deg_n = numpy.concatenate(
            (latitudes_deg_n, these_latitudes_deg_n))
        longitudes_deg_e = numpy.concatenate(
            (longitudes_deg_e, these_longitudes_deg_e))

    coord_matrix = numpy.transpose(
        numpy.vstack((latitudes_deg_n, longitudes_deg_e)))
    coord_matrix = number_rounding.round_to_nearest(coord_matrix,
                                                    LATLNG_TOLERANCE_DEG)
    coord_matrix = numpy.unique(coord_matrix, axis=0)

    latitudes_deg_n = coord_matrix[:, 0]
    longitudes_deg_e = coord_matrix[:, 1]

    figure_object, axes_object, basemap_object = (
        plotting_utils.create_equidist_cylindrical_map(
            min_latitude_deg=MIN_PLOT_LATITUDE_DEG_N,
            max_latitude_deg=MAX_PLOT_LATITUDE_DEG_N,
            min_longitude_deg=MIN_PLOT_LONGITUDE_DEG_E,
            max_longitude_deg=MAX_PLOT_LONGITUDE_DEG_E,
            resolution_string='l'))

    plotting_utils.plot_coastlines(basemap_object=basemap_object,
                                   axes_object=axes_object,
                                   line_colour=BORDER_COLOUR,
                                   line_width=BORDER_WIDTH)
    plotting_utils.plot_countries(basemap_object=basemap_object,
                                  axes_object=axes_object,
                                  line_colour=BORDER_COLOUR,
                                  line_width=BORDER_WIDTH)
    plotting_utils.plot_parallels(basemap_object=basemap_object,
                                  axes_object=axes_object,
                                  num_parallels=NUM_PARALLELS,
                                  line_colour=GRID_LINE_COLOUR,
                                  line_width=GRID_LINE_WIDTH,
                                  font_size=FONT_SIZE)
    plotting_utils.plot_meridians(basemap_object=basemap_object,
                                  axes_object=axes_object,
                                  num_meridians=NUM_MERIDIANS,
                                  line_colour=GRID_LINE_COLOUR,
                                  line_width=GRID_LINE_WIDTH,
                                  font_size=FONT_SIZE)

    arctic_indices = numpy.where(latitudes_deg_n >= 66.5)[0]
    print(len(arctic_indices))

    arctic_x_coords, arctic_y_coords = basemap_object(
        longitudes_deg_e[arctic_indices], latitudes_deg_n[arctic_indices])
    axes_object.plot(arctic_x_coords,
                     arctic_y_coords,
                     linestyle='None',
                     marker=MARKER_TYPE,
                     markersize=MARKER_SIZE,
                     markeredgewidth=0,
                     markerfacecolor=ARCTIC_COLOUR,
                     markeredgecolor=ARCTIC_COLOUR)

    mid_latitude_indices = numpy.where(
        numpy.logical_and(latitudes_deg_n >= 30., latitudes_deg_n < 66.5))[0]
    print(len(mid_latitude_indices))

    mid_latitude_x_coords, mid_latitude_y_coords = basemap_object(
        longitudes_deg_e[mid_latitude_indices],
        latitudes_deg_n[mid_latitude_indices])
    axes_object.plot(mid_latitude_x_coords,
                     mid_latitude_y_coords,
                     linestyle='None',
                     marker=MARKER_TYPE,
                     markersize=MARKER_SIZE,
                     markeredgewidth=0,
                     markerfacecolor=MID_LATITUDE_COLOUR,
                     markeredgecolor=MID_LATITUDE_COLOUR)

    tropical_indices = numpy.where(latitudes_deg_n < 30.)[0]
    print(len(tropical_indices))

    tropical_x_coords, tropical_y_coords = basemap_object(
        longitudes_deg_e[tropical_indices], latitudes_deg_n[tropical_indices])
    axes_object.plot(tropical_x_coords,
                     tropical_y_coords,
                     linestyle='None',
                     marker=MARKER_TYPE,
                     markersize=MARKER_SIZE,
                     markeredgewidth=0,
                     markerfacecolor=TROPICAL_COLOUR,
                     markeredgecolor=TROPICAL_COLOUR)

    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    print('Saving figure to: "{0:s}"...'.format(output_file_name))
    figure_object.savefig(output_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)
Example #8
0
def get_raw_examples(example_file_name, num_examples, example_dir_name,
                     example_id_file_name):
    """Returns raw examples.

    The difference between `get_raw_examples` and `get_examples_for_inference`
    is that `get_raw_examples` returns examples in their raw form, *not*
    pre-processed to be fed through a model for inference.

    :param example_file_name: See doc for `get_examples_for_inference`.
    :param num_examples: Same.
    :param example_dir_name: Same.
    :param example_id_file_name: Same.
    :return: example_dict: See doc for `example_io.read_file`.
    """

    error_checking.assert_is_string(example_file_name)
    use_specific_ids = example_file_name == ''

    if use_specific_ids:
        error_checking.assert_is_string(example_id_file_name)

        print('Reading desired example IDs from: "{0:s}"...'.format(
            example_id_file_name))
        example_id_strings = read_example_ids_from_netcdf(example_id_file_name)

        valid_times_unix_sec = example_utils.parse_example_ids(
            example_id_strings)[example_utils.VALID_TIMES_KEY]

        example_file_names = example_io.find_many_files(
            directory_name=example_dir_name,
            first_time_unix_sec=numpy.min(valid_times_unix_sec),
            last_time_unix_sec=numpy.max(valid_times_unix_sec))

        num_files = len(example_file_names)
        example_dicts = [dict()] * num_files

        for i in range(num_files):
            print('Reading data from: "{0:s}"...'.format(
                example_file_names[i]))
            example_dicts[i] = example_io.read_file(example_file_names[i])

        example_dict = example_utils.concat_examples(example_dicts)

        good_indices = example_utils.find_examples(
            all_id_strings=example_dict[example_utils.EXAMPLE_IDS_KEY],
            desired_id_strings=example_id_strings,
            allow_missing=False)

        example_dict = example_utils.subset_by_index(
            example_dict=example_dict, desired_indices=good_indices)
    else:
        error_checking.assert_is_string(example_dir_name)
        error_checking.assert_is_integer(num_examples)
        error_checking.assert_is_greater(num_examples, 0)

        print('Reading data from: "{0:s}"...'.format(example_file_name))
        example_dict = example_io.read_file(example_file_name)

        num_examples_total = len(example_dict[example_utils.VALID_TIMES_KEY])
        desired_indices = numpy.linspace(0,
                                         num_examples_total - 1,
                                         num=num_examples_total,
                                         dtype=int)

        if num_examples < num_examples_total:
            desired_indices = numpy.random.choice(desired_indices,
                                                  size=num_examples,
                                                  replace=False)

        example_dict = example_utils.subset_by_index(
            example_dict=example_dict, desired_indices=desired_indices)

    return example_dict
def _run(example_dir_name, first_year, last_year, min_percentile_level,
         max_percentile_level, output_file_name):
    """Finds normalization parameters for radiative-transfer data.

    This is effectively the main method.

    :param example_dir_name: See documentation at top of file.
    :param first_year: Same.
    :param last_year: Same.
    :param min_percentile_level: Same.
    :param max_percentile_level: Same.
    :param output_file_name: Same.
    """

    error_checking.assert_is_geq(last_year, first_year)
    years = numpy.linspace(
        first_year, last_year, num=last_year - first_year + 1, dtype=int
    )

    num_years = len(years)
    example_file_names = [None] * num_years

    for i in range(num_years):
        example_file_names[i] = example_io.find_file(
            example_dir_name=example_dir_name, year=years[i],
            raise_error_if_missing=True
        )

    this_example_dict = example_io.read_file(example_file_names[0])
    heights_m_agl = numpy.round(
        this_example_dict[example_io.HEIGHTS_KEY]
    ).astype(int)

    orig_parameter_dict = {
        normalization_params.NUM_VALUES_KEY: 0,
        normalization_params.MEAN_VALUE_KEY: 0.,
        normalization_params.MEAN_OF_SQUARES_KEY: 0.
    }
    field_names = example_io.PREDICTOR_NAMES + example_io.TARGET_NAMES

    z_score_dict_with_height = {}
    z_score_dict_no_height = {}
    frequency_dict_with_height = {}
    frequency_dict_no_height = {}

    for this_field_name in field_names:
        z_score_dict_no_height[this_field_name] = copy.deepcopy(
            orig_parameter_dict
        )
        frequency_dict_no_height[this_field_name] = dict()

        for this_height_m_agl in heights_m_agl:
            z_score_dict_with_height[this_field_name, this_height_m_agl] = (
                copy.deepcopy(orig_parameter_dict)
            )
            frequency_dict_with_height[this_field_name, this_height_m_agl] = (
                dict()
            )

    for i in range(num_years):
        print('Reading data from: "{0:s}"...'.format(example_file_names[i]))
        this_example_dict = example_io.read_file(example_file_names[i])

        for this_field_name in field_names:
            print('Updating normalization params for "{0:s}"...'.format(
                this_field_name
            ))
            this_data_matrix = example_io.get_field_from_dict(
                example_dict=this_example_dict, field_name=this_field_name,
                height_m_agl=None
            )

            this_data_matrix = normalization.convert_to_log_if_necessary(
                physical_values=this_data_matrix, field_name=this_field_name
            )

            z_score_dict_no_height[this_field_name] = (
                normalization_params.update_z_score_params(
                    z_score_param_dict=z_score_dict_no_height[this_field_name],
                    new_data_matrix=this_data_matrix
                )
            )
            frequency_dict_no_height[this_field_name] = (
                normalization_params.update_frequency_dict(
                    frequency_dict=frequency_dict_no_height[this_field_name],
                    new_data_matrix=this_data_matrix,
                    rounding_base=FIELD_TO_ROUNDING_BASE_SCALAR[this_field_name]
                )
            )

            for this_height_m_agl in heights_m_agl:

                # TODO(thunderhoser): Could probably speed up code by not doing
                # this shit for scalar fields.
                if this_field_name in SCALAR_FIELD_NAMES:
                    z_score_dict_with_height[
                        this_field_name, this_height_m_agl
                    ] = copy.deepcopy(z_score_dict_no_height[this_field_name])

                    frequency_dict_with_height[
                        this_field_name, this_height_m_agl
                    ] = copy.deepcopy(frequency_dict_no_height[this_field_name])

                    continue

                print((
                    'Updating normalization params for "{0:s}" at {1:d} m '
                    'AGL...'
                ).format(
                    this_field_name, this_height_m_agl
                ))

                this_data_matrix = example_io.get_field_from_dict(
                    example_dict=this_example_dict, field_name=this_field_name,
                    height_m_agl=this_height_m_agl
                )
                this_data_matrix = normalization.convert_to_log_if_necessary(
                    physical_values=this_data_matrix, field_name=this_field_name
                )

                this_dict = z_score_dict_with_height[
                    this_field_name, this_height_m_agl
                ]
                this_dict = normalization_params.update_z_score_params(
                    z_score_param_dict=this_dict,
                    new_data_matrix=this_data_matrix
                )
                z_score_dict_with_height[
                    this_field_name, this_height_m_agl
                ] = this_dict

                this_dict = frequency_dict_with_height[
                    this_field_name, this_height_m_agl
                ]
                this_dict = normalization_params.update_frequency_dict(
                    frequency_dict=this_dict,
                    new_data_matrix=this_data_matrix,
                    rounding_base=FIELD_TO_ROUNDING_BASE_VECTOR[this_field_name]
                )
                frequency_dict_with_height[
                    this_field_name, this_height_m_agl
                ] = this_dict

    norm_table_no_height = normalization_params.finalize_params(
        z_score_dict_dict=z_score_dict_no_height,
        frequency_dict_dict=frequency_dict_no_height,
        min_percentile_level=min_percentile_level,
        max_percentile_level=max_percentile_level
    )

    print((
        'Overall normalization params (not separated by height):\n{0:s}\n\n'
    ).format(
        str(norm_table_no_height)
    ))

    norm_table_with_height = normalization_params.finalize_params(
        z_score_dict_dict=z_score_dict_with_height,
        frequency_dict_dict=frequency_dict_with_height,
        min_percentile_level=min_percentile_level,
        max_percentile_level=max_percentile_level
    )

    print('Normalization params separated by height:\n{0:s}\n\n'.format(
        str(norm_table_with_height)
    ))

    normalization_params.write_file(
        pickle_file_name=output_file_name,
        norm_table_no_height=norm_table_no_height,
        norm_table_with_height=norm_table_with_height
    )
def _run(input_prediction_file_name, average_over_height, scale_by_climo,
         num_examples_per_set, output_dir_name):
    """Finds best and worst heating-rate predictions.

    This is effectively the main method.

    :param input_prediction_file_name: See documentation at top of file.
    :param average_over_height: Same.
    :param scale_by_climo: Same.
    :param num_examples_per_set: Same.
    :param output_dir_name: Same.
    """

    # TODO(thunderhoser): Maybe allow specific height again (e.g., 15 km).

    error_checking.assert_is_greater(num_examples_per_set, 0)
    scale_by_climo = scale_by_climo and not average_over_height

    print('Reading data from: "{0:s}"...'.format(input_prediction_file_name))
    prediction_dict = prediction_io.read_file(input_prediction_file_name)

    model_file_name = prediction_dict[prediction_io.MODEL_FILE_KEY]
    model_metafile_name = neural_net.find_metafile(
        model_dir_name=os.path.split(model_file_name)[0])

    print(
        'Reading model metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = neural_net.read_metafile(model_metafile_name)
    generator_option_dict = model_metadata_dict[
        neural_net.TRAINING_OPTIONS_KEY]

    vector_target_names = (
        generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY])
    hr_index = (vector_target_names.index(
        example_utils.SHORTWAVE_HEATING_RATE_NAME))

    target_matrix_k_day01 = (
        prediction_dict[prediction_io.VECTOR_TARGETS_KEY][..., hr_index])
    prediction_matrix_k_day01 = (
        prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY][..., hr_index])

    bias_matrix = prediction_matrix_k_day01 - target_matrix_k_day01
    absolute_error_matrix = numpy.absolute(bias_matrix)

    if average_over_height:
        bias_matrix = numpy.mean(bias_matrix, axis=1, keepdims=True)
        absolute_error_matrix = numpy.mean(absolute_error_matrix,
                                           axis=1,
                                           keepdims=True)

    if scale_by_climo:
        normalization_file_name = (
            generator_option_dict[neural_net.NORMALIZATION_FILE_KEY])

        print(('Reading training examples (for climatology) from: "{0:s}"...'
               ).format(normalization_file_name))

        training_example_dict = example_io.read_file(normalization_file_name)
        training_example_dict = example_utils.subset_by_field(
            example_dict=training_example_dict,
            field_names=[example_utils.SHORTWAVE_HEATING_RATE_NAME])
        training_example_dict = example_utils.subset_by_height(
            example_dict=training_example_dict,
            heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY])

        dummy_example_dict = {
            example_utils.SCALAR_PREDICTOR_NAMES_KEY: [],
            example_utils.VECTOR_PREDICTOR_NAMES_KEY: [],
            example_utils.SCALAR_TARGET_NAMES_KEY: [],
            example_utils.VECTOR_TARGET_NAMES_KEY:
            [example_utils.SHORTWAVE_HEATING_RATE_NAME],
            example_utils.HEIGHTS_KEY:
            generator_option_dict[neural_net.HEIGHTS_KEY]
        }

        mean_training_example_dict = normalization.create_mean_example(
            new_example_dict=dummy_example_dict,
            training_example_dict=training_example_dict)
        climo_matrix_k_day01 = mean_training_example_dict[
            example_utils.VECTOR_TARGET_VALS_KEY][..., 0]

        bias_matrix = bias_matrix / climo_matrix_k_day01
        absolute_error_matrix = absolute_error_matrix / climo_matrix_k_day01

    print(SEPARATOR_STRING)
    high_bias_indices, low_bias_indices, low_abs_error_indices = (
        misc_utils.find_best_and_worst_predictions(
            bias_matrix=bias_matrix,
            absolute_error_matrix=absolute_error_matrix,
            num_examples_per_set=num_examples_per_set))
    print(SEPARATOR_STRING)

    high_bias_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=high_bias_indices)
    high_bias_file_name = (
        '{0:s}/predictions_high-bias.nc'.format(output_dir_name))

    print('Writing examples with greatest positive bias to: "{0:s}"...'.format(
        high_bias_file_name))
    prediction_io.write_file(
        netcdf_file_name=high_bias_file_name,
        scalar_target_matrix=high_bias_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=high_bias_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=high_bias_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=high_bias_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=high_bias_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=high_bias_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=high_bias_prediction_dict[
            prediction_io.MODEL_FILE_KEY])

    low_bias_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=low_bias_indices)
    low_bias_file_name = (
        '{0:s}/predictions_low-bias.nc'.format(output_dir_name))

    print('Writing examples with greatest negative bias to: "{0:s}"...'.format(
        low_bias_file_name))
    prediction_io.write_file(
        netcdf_file_name=low_bias_file_name,
        scalar_target_matrix=low_bias_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=low_bias_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=low_bias_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=low_bias_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=low_bias_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=low_bias_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=low_bias_prediction_dict[prediction_io.MODEL_FILE_KEY])

    low_abs_error_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=low_abs_error_indices)
    low_abs_error_file_name = (
        '{0:s}/predictions_low-absolute-error.nc'.format(output_dir_name))

    print(
        'Writing examples with smallest absolute error to: "{0:s}"...'.format(
            low_abs_error_file_name))
    prediction_io.write_file(
        netcdf_file_name=low_abs_error_file_name,
        scalar_target_matrix=low_abs_error_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=low_abs_error_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=low_abs_error_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=low_abs_error_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=low_abs_error_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=low_abs_error_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=low_abs_error_prediction_dict[
            prediction_io.MODEL_FILE_KEY])

    if scale_by_climo:
        return

    if average_over_height:
        mean_targets_k_day01 = numpy.mean(target_matrix_k_day01, axis=1)
        sort_indices = numpy.argsort(-1 * mean_targets_k_day01)
    else:
        max_targets_k_day01 = numpy.max(target_matrix_k_day01, axis=1)
        sort_indices = numpy.argsort(-1 * max_targets_k_day01)

    large_hr_indices = sort_indices[:num_examples_per_set]
    large_hr_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=large_hr_indices)
    large_hr_file_name = (
        '{0:s}/predictions_large-heating-rate.nc'.format(output_dir_name))

    print('Writing examples with greatest heating rate to: "{0:s}"...'.format(
        large_hr_file_name))
    prediction_io.write_file(
        netcdf_file_name=large_hr_file_name,
        scalar_target_matrix=large_hr_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=large_hr_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=large_hr_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=large_hr_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=large_hr_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=large_hr_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=large_hr_prediction_dict[prediction_io.MODEL_FILE_KEY])

    if not average_over_height:
        return

    mean_targets_k_day01 = numpy.mean(target_matrix_k_day01, axis=1)
    sort_indices = numpy.argsort(mean_targets_k_day01)
    small_hr_indices = sort_indices[:num_examples_per_set]

    small_hr_prediction_dict = prediction_io.subset_by_index(
        prediction_dict=copy.deepcopy(prediction_dict),
        desired_indices=small_hr_indices)
    small_hr_file_name = (
        '{0:s}/predictions_small-heating-rate.nc'.format(output_dir_name))

    print('Writing examples with smallest heating rate to: "{0:s}"...'.format(
        small_hr_file_name))
    prediction_io.write_file(
        netcdf_file_name=small_hr_file_name,
        scalar_target_matrix=small_hr_prediction_dict[
            prediction_io.SCALAR_TARGETS_KEY],
        vector_target_matrix=small_hr_prediction_dict[
            prediction_io.VECTOR_TARGETS_KEY],
        scalar_prediction_matrix=small_hr_prediction_dict[
            prediction_io.SCALAR_PREDICTIONS_KEY],
        vector_prediction_matrix=small_hr_prediction_dict[
            prediction_io.VECTOR_PREDICTIONS_KEY],
        heights_m_agl=small_hr_prediction_dict[prediction_io.HEIGHTS_KEY],
        example_id_strings=small_hr_prediction_dict[
            prediction_io.EXAMPLE_IDS_KEY],
        model_file_name=small_hr_prediction_dict[prediction_io.MODEL_FILE_KEY])
Example #11
0
def _run(tropical_example_dir_name, non_tropical_example_dir_name, year,
         assorted1_example_dir_name, assorted2_example_dir_name):
    """Splits examples into Assorted1 and Assorted2 sites.

    This is effectively the main method.

    :param tropical_example_dir_name: See documentation at top of file.
    :param non_tropical_example_dir_name: Same.
    :param year: Same.
    :param assorted1_example_dir_name: Same.
    :param assorted2_example_dir_name: Same.
    """

    tropical_example_file_name = example_io.find_file(
        directory_name=tropical_example_dir_name, year=year,
        raise_error_if_missing=True
    )
    non_tropical_example_file_name = example_io.find_file(
        directory_name=non_tropical_example_dir_name, year=year,
        raise_error_if_missing=True
    )

    print('Reading data from: "{0:s}"...'.format(tropical_example_file_name))
    tropical_example_dict = example_io.read_file(tropical_example_file_name)

    print('Reading data from: "{0:s}"...'.format(
        non_tropical_example_file_name
    ))
    non_tropical_example_dict = example_io.read_file(
        non_tropical_example_file_name
    )

    example_dict = example_utils.concat_examples([
        tropical_example_dict, non_tropical_example_dict
    ])
    del tropical_example_dict, non_tropical_example_dict

    example_metadata_dict = example_utils.parse_example_ids(
        example_dict[example_utils.EXAMPLE_IDS_KEY]
    )
    example_latitudes_deg_n = example_metadata_dict[example_utils.LATITUDES_KEY]
    example_longitudes_deg_e = lng_conversion.convert_lng_positive_in_west(
        example_metadata_dict[example_utils.LONGITUDES_KEY]
    )

    example_coord_matrix = numpy.transpose(numpy.vstack((
        example_latitudes_deg_n, example_longitudes_deg_e
    )))
    assorted2_coord_matrix = numpy.transpose(numpy.vstack((
        ASSORTED2_LATITUDES_DEG_N, ASSORTED2_LONGITUDES_DEG_E
    )))
    distance_matrix_deg2 = euclidean_distances(
        X=example_coord_matrix, Y=assorted2_coord_matrix, squared=True
    )

    assorted2_flags = numpy.any(distance_matrix_deg2 <= TOLERANCE_DEG2, axis=1)
    assorted2_example_dict = example_utils.subset_by_index(
        example_dict=copy.deepcopy(example_dict),
        desired_indices=numpy.where(assorted2_flags)[0]
    )
    assorted2_example_file_name = example_io.find_file(
        directory_name=assorted2_example_dir_name, year=year,
        raise_error_if_missing=False
    )

    print('Writing {0:d} examples in set Assorted2 to: "{1:s}"...'.format(
        len(assorted2_example_dict[example_utils.VALID_TIMES_KEY]),
        assorted2_example_file_name
    ))
    example_io.write_file(
        example_dict=assorted2_example_dict,
        netcdf_file_name=assorted2_example_file_name
    )

    assorted1_example_dict = example_utils.subset_by_index(
        example_dict=example_dict,
        desired_indices=numpy.where(numpy.invert(assorted2_flags))[0]
    )
    assorted1_example_file_name = example_io.find_file(
        directory_name=assorted1_example_dir_name, year=year,
        raise_error_if_missing=False
    )

    print('Writing {0:d} examples in set Assorted1 to: "{1:s}"...'.format(
        len(assorted1_example_dict[example_utils.VALID_TIMES_KEY]),
        assorted1_example_file_name
    ))
    example_io.write_file(
        example_dict=assorted1_example_dict,
        netcdf_file_name=assorted1_example_file_name
    )