예제 #1
0
    def test_find_many_files(self):
        """Ensures correct output from find_many_files."""

        these_file_names = example_io.find_many_files(
            directory_name=EXAMPLE_DIR_NAME,
            first_time_unix_sec=FIRST_FILE_TIME_UNIX_SEC,
            last_time_unix_sec=LAST_FILE_TIME_UNIX_SEC,
            raise_error_if_any_missing=False,
            raise_error_if_all_missing=False,
            test_mode=True)

        self.assertTrue(these_file_names == EXAMPLE_FILE_NAMES)
def _run(tropical_example_dir_name, non_tropical_example_dir_name,
         num_histogram_bins, output_dir_name):
    """Plots distribution of each target variable.

    This is effectively the main method.

    :param tropical_example_dir_name: See documentation at top of file.
    :param non_tropical_example_dir_name: Same.
    :param num_histogram_bins: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    first_time_unix_sec = (
        time_conversion.first_and_last_times_in_year(FIRST_YEAR)[0])
    last_time_unix_sec = (
        time_conversion.first_and_last_times_in_year(LAST_YEAR)[-1])

    example_file_names = example_io.find_many_files(
        directory_name=tropical_example_dir_name,
        first_time_unix_sec=first_time_unix_sec,
        last_time_unix_sec=last_time_unix_sec,
        raise_error_if_all_missing=True,
        raise_error_if_any_missing=True)

    example_file_names += example_io.find_many_files(
        directory_name=non_tropical_example_dir_name,
        first_time_unix_sec=first_time_unix_sec,
        last_time_unix_sec=last_time_unix_sec,
        raise_error_if_all_missing=True,
        raise_error_if_any_missing=True)

    example_dicts = []

    for this_file_name in example_file_names:
        print('Reading data from: "{0:s}"...'.format(this_file_name))
        this_example_dict = example_io.read_file(this_file_name)
        this_example_dict = example_utils.subset_by_field(
            example_dict=this_example_dict, field_names=TARGET_NAMES_IN_FILE)

        example_dicts.append(this_example_dict)

    example_dict = example_utils.concat_examples(example_dicts)
    del example_dicts

    letter_label = None
    panel_file_names = []

    for this_target_name in TARGET_NAMES:
        if this_target_name in TARGET_NAMES_IN_FILE:
            these_target_values = example_utils.get_field_from_dict(
                example_dict=example_dict, field_name=this_target_name)
        else:
            down_fluxes_w_m02 = example_utils.get_field_from_dict(
                example_dict=example_dict,
                field_name=example_utils.SHORTWAVE_SURFACE_DOWN_FLUX_NAME)
            up_fluxes_w_m02 = example_utils.get_field_from_dict(
                example_dict=example_dict,
                field_name=example_utils.SHORTWAVE_TOA_UP_FLUX_NAME)
            these_target_values = down_fluxes_w_m02 - up_fluxes_w_m02

        these_target_values = numpy.ravel(these_target_values)

        if letter_label is None:
            letter_label = 'a'
        else:
            letter_label = chr(ord(letter_label) + 1)

        this_file_name = _plot_histogram_one_target(
            target_values=these_target_values,
            target_name=this_target_name,
            num_bins=num_histogram_bins,
            letter_label=letter_label,
            output_dir_name=output_dir_name)
        panel_file_names.append(this_file_name)

    concat_file_name = '{0:s}/target_distributions.jpg'.format(output_dir_name)
    print('Concatenating panels to: "{0:s}"...'.format(concat_file_name))

    imagemagick_utils.concatenate_images(input_file_names=panel_file_names,
                                         output_file_name=concat_file_name,
                                         num_panel_rows=2,
                                         num_panel_columns=2,
                                         border_width_pixels=25)
    imagemagick_utils.trim_whitespace(input_file_name=concat_file_name,
                                      output_file_name=concat_file_name)
예제 #3
0
def _run(tropical_example_dir_name, non_tropical_example_dir_name,
         output_file_name):
    """Plots all sites wtih data.

    This is effectively the main method.

    :param tropical_example_dir_name: See documentation at top of file.
    :param non_tropical_example_dir_name: Same.
    :param output_file_name: Same.
    """

    first_time_unix_sec = (
        time_conversion.first_and_last_times_in_year(FIRST_YEAR)[0])
    last_time_unix_sec = (
        time_conversion.first_and_last_times_in_year(LAST_YEAR)[-1])

    tropical_file_names = example_io.find_many_files(
        directory_name=tropical_example_dir_name,
        first_time_unix_sec=first_time_unix_sec,
        last_time_unix_sec=last_time_unix_sec,
        raise_error_if_all_missing=True,
        raise_error_if_any_missing=False)

    non_tropical_file_names = example_io.find_many_files(
        directory_name=non_tropical_example_dir_name,
        first_time_unix_sec=first_time_unix_sec,
        last_time_unix_sec=last_time_unix_sec,
        raise_error_if_all_missing=True,
        raise_error_if_any_missing=False)

    latitudes_deg_n = numpy.array([])
    longitudes_deg_e = numpy.array([])

    for this_file_name in tropical_file_names:
        print('Reading data from: "{0:s}"...'.format(this_file_name))
        this_example_dict = example_io.read_file(this_file_name)

        these_latitudes_deg_n = example_utils.get_field_from_dict(
            example_dict=this_example_dict,
            field_name=example_utils.LATITUDE_NAME)
        these_longitudes_deg_e = example_utils.get_field_from_dict(
            example_dict=this_example_dict,
            field_name=example_utils.LONGITUDE_NAME)

        latitudes_deg_n = numpy.concatenate(
            (latitudes_deg_n, these_latitudes_deg_n))
        longitudes_deg_e = numpy.concatenate(
            (longitudes_deg_e, these_longitudes_deg_e))

    for this_file_name in non_tropical_file_names:
        print('Reading data from: "{0:s}"...'.format(this_file_name))
        this_example_dict = example_io.read_file(this_file_name)

        these_latitudes_deg_n = example_utils.get_field_from_dict(
            example_dict=this_example_dict,
            field_name=example_utils.LATITUDE_NAME)
        these_longitudes_deg_e = example_utils.get_field_from_dict(
            example_dict=this_example_dict,
            field_name=example_utils.LONGITUDE_NAME)

        latitudes_deg_n = numpy.concatenate(
            (latitudes_deg_n, these_latitudes_deg_n))
        longitudes_deg_e = numpy.concatenate(
            (longitudes_deg_e, these_longitudes_deg_e))

    coord_matrix = numpy.transpose(
        numpy.vstack((latitudes_deg_n, longitudes_deg_e)))
    coord_matrix = number_rounding.round_to_nearest(coord_matrix,
                                                    LATLNG_TOLERANCE_DEG)
    coord_matrix = numpy.unique(coord_matrix, axis=0)

    latitudes_deg_n = coord_matrix[:, 0]
    longitudes_deg_e = coord_matrix[:, 1]

    figure_object, axes_object, basemap_object = (
        plotting_utils.create_equidist_cylindrical_map(
            min_latitude_deg=MIN_PLOT_LATITUDE_DEG_N,
            max_latitude_deg=MAX_PLOT_LATITUDE_DEG_N,
            min_longitude_deg=MIN_PLOT_LONGITUDE_DEG_E,
            max_longitude_deg=MAX_PLOT_LONGITUDE_DEG_E,
            resolution_string='l'))

    plotting_utils.plot_coastlines(basemap_object=basemap_object,
                                   axes_object=axes_object,
                                   line_colour=BORDER_COLOUR,
                                   line_width=BORDER_WIDTH)
    plotting_utils.plot_countries(basemap_object=basemap_object,
                                  axes_object=axes_object,
                                  line_colour=BORDER_COLOUR,
                                  line_width=BORDER_WIDTH)
    plotting_utils.plot_parallels(basemap_object=basemap_object,
                                  axes_object=axes_object,
                                  num_parallels=NUM_PARALLELS,
                                  line_colour=GRID_LINE_COLOUR,
                                  line_width=GRID_LINE_WIDTH,
                                  font_size=FONT_SIZE)
    plotting_utils.plot_meridians(basemap_object=basemap_object,
                                  axes_object=axes_object,
                                  num_meridians=NUM_MERIDIANS,
                                  line_colour=GRID_LINE_COLOUR,
                                  line_width=GRID_LINE_WIDTH,
                                  font_size=FONT_SIZE)

    arctic_indices = numpy.where(latitudes_deg_n >= 66.5)[0]
    print(len(arctic_indices))

    arctic_x_coords, arctic_y_coords = basemap_object(
        longitudes_deg_e[arctic_indices], latitudes_deg_n[arctic_indices])
    axes_object.plot(arctic_x_coords,
                     arctic_y_coords,
                     linestyle='None',
                     marker=MARKER_TYPE,
                     markersize=MARKER_SIZE,
                     markeredgewidth=0,
                     markerfacecolor=ARCTIC_COLOUR,
                     markeredgecolor=ARCTIC_COLOUR)

    mid_latitude_indices = numpy.where(
        numpy.logical_and(latitudes_deg_n >= 30., latitudes_deg_n < 66.5))[0]
    print(len(mid_latitude_indices))

    mid_latitude_x_coords, mid_latitude_y_coords = basemap_object(
        longitudes_deg_e[mid_latitude_indices],
        latitudes_deg_n[mid_latitude_indices])
    axes_object.plot(mid_latitude_x_coords,
                     mid_latitude_y_coords,
                     linestyle='None',
                     marker=MARKER_TYPE,
                     markersize=MARKER_SIZE,
                     markeredgewidth=0,
                     markerfacecolor=MID_LATITUDE_COLOUR,
                     markeredgecolor=MID_LATITUDE_COLOUR)

    tropical_indices = numpy.where(latitudes_deg_n < 30.)[0]
    print(len(tropical_indices))

    tropical_x_coords, tropical_y_coords = basemap_object(
        longitudes_deg_e[tropical_indices], latitudes_deg_n[tropical_indices])
    axes_object.plot(tropical_x_coords,
                     tropical_y_coords,
                     linestyle='None',
                     marker=MARKER_TYPE,
                     markersize=MARKER_SIZE,
                     markeredgewidth=0,
                     markerfacecolor=TROPICAL_COLOUR,
                     markeredgecolor=TROPICAL_COLOUR)

    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    print('Saving figure to: "{0:s}"...'.format(output_file_name))
    figure_object.savefig(output_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)
def _run(model_file_name, example_file_name, num_examples, example_dir_name,
         example_id_file_name, layer_name, neuron_indices, ideal_activation,
         num_iterations, learning_rate, l2_weight, output_file_name):
    """Runs backwards optimization.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param example_file_name: Same.
    :param num_examples: Same.
    :param example_dir_name: Same.
    :param example_id_file_name: Same.
    :param layer_name: Same.
    :param neuron_indices: Same.
    :param ideal_activation: Same.
    :param num_iterations: Same.
    :param learning_rate: Same.
    :param l2_weight: Same.
    :param output_file_name: Same.
    """

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = neural_net.read_model(model_file_name)

    metafile_name = neural_net.find_metafile(
        model_dir_name=os.path.split(model_file_name)[0],
        raise_error_if_missing=True
    )

    print('Reading metadata from: "{0:s}"...'.format(metafile_name))
    metadata_dict = neural_net.read_metafile(metafile_name)

    predictor_matrix, _, example_id_strings = (
        misc_utils.get_examples_for_inference(
            model_metadata_dict=metadata_dict,
            example_file_name=example_file_name,
            num_examples=num_examples, example_dir_name=example_dir_name,
            example_id_file_name=example_id_file_name
        )
    )
    print(SEPARATOR_STRING)

    generator_option_dict = metadata_dict[neural_net.TRAINING_OPTIONS_KEY]
    normalization_file_name = (
        generator_option_dict[neural_net.NORMALIZATION_FILE_KEY]
    )

    print((
        'Reading training examples (for normalization) from: "{0:s}"...'
    ).format(
        normalization_file_name
    ))
    training_example_dict = example_io.read_file(normalization_file_name)
    training_example_dict = example_utils.subset_by_height(
        example_dict=training_example_dict,
        heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY]
    )

    num_examples = len(example_id_strings)
    bwo_dict = None

    for i in range(num_examples):
        this_bwo_dict = bwo.optimize_input_for_neuron(
            model_object=model_object,
            init_function_or_matrix=predictor_matrix[i, ...],
            layer_name=layer_name, neuron_indices=neuron_indices,
            ideal_activation=ideal_activation, num_iterations=num_iterations,
            learning_rate=learning_rate, l2_weight=l2_weight
        )

        if i == num_examples - 1:
            print(SEPARATOR_STRING)
        else:
            print(MINOR_SEPARATOR_STRING)

        if bwo_dict is None:
            these_dim = numpy.array(
                (num_examples,) +
                this_bwo_dict[bwo.INITIAL_PREDICTORS_KEY].shape[1:],
                dtype=int
            )

            bwo_dict = {
                bwo.INITIAL_PREDICTORS_KEY: numpy.full(these_dim, numpy.nan),
                bwo.FINAL_PREDICTORS_KEY: numpy.full(these_dim, numpy.nan),
                bwo.INITIAL_ACTIVATIONS_KEY:
                    numpy.full(num_examples, numpy.nan),
                bwo.FINAL_ACTIVATIONS_KEY: numpy.full(num_examples, numpy.nan)
            }

        bwo_dict[bwo.INITIAL_PREDICTORS_KEY][i, ...] = (
            this_bwo_dict[bwo.INITIAL_PREDICTORS_KEY][0, ...]
        )
        bwo_dict[bwo.FINAL_PREDICTORS_KEY][i, ...] = (
            this_bwo_dict[bwo.FINAL_PREDICTORS_KEY][0, ...]
        )
        bwo_dict[bwo.INITIAL_ACTIVATIONS_KEY][i] = (
            this_bwo_dict[bwo.INITIAL_ACTIVATION_KEY]
        )
        bwo_dict[bwo.FINAL_ACTIVATIONS_KEY][i] = (
            this_bwo_dict[bwo.FINAL_ACTIVATION_KEY]
        )

    if example_file_name == '':
        example_file_name = example_io.find_many_files(
            directory_name=example_dir_name,
            first_time_unix_sec=0, last_time_unix_sec=int(1e12),
            raise_error_if_any_missing=False, raise_error_if_all_missing=True
        )[0]

    first_example_dict = example_io.read_file(example_file_name)
    first_example_dict = example_utils.subset_by_height(
        example_dict=first_example_dict,
        heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY]
    )

    net_type_string = metadata_dict[neural_net.NET_TYPE_KEY]

    init_example_dict = copy.deepcopy(first_example_dict)
    this_example_dict = neural_net.predictors_numpy_to_dict(
        predictor_matrix=bwo_dict[bwo.INITIAL_PREDICTORS_KEY],
        example_dict=init_example_dict, net_type_string=net_type_string
    )
    init_example_dict.update(this_example_dict)

    if generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY] is not None:
        init_example_dict = normalization.denormalize_data(
            new_example_dict=init_example_dict,
            training_example_dict=training_example_dict,
            normalization_type_string=
            generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY],
            min_normalized_value=
            generator_option_dict[neural_net.PREDICTOR_MIN_NORM_VALUE_KEY],
            max_normalized_value=
            generator_option_dict[neural_net.PREDICTOR_MAX_NORM_VALUE_KEY],
            separate_heights=True, apply_to_predictors=True,
            apply_to_vector_targets=False, apply_to_scalar_targets=False
        )

    init_scalar_predictor_matrix = (
        init_example_dict[example_utils.SCALAR_PREDICTOR_VALS_KEY]
    )
    init_vector_predictor_matrix = (
        init_example_dict[example_utils.VECTOR_PREDICTOR_VALS_KEY]
    )

    final_example_dict = copy.deepcopy(first_example_dict)
    this_example_dict = neural_net.predictors_numpy_to_dict(
        predictor_matrix=bwo_dict[bwo.FINAL_PREDICTORS_KEY],
        example_dict=final_example_dict, net_type_string=net_type_string
    )
    final_example_dict.update(this_example_dict)

    if generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY] is not None:
        final_example_dict = normalization.denormalize_data(
            new_example_dict=final_example_dict,
            training_example_dict=training_example_dict,
            normalization_type_string=
            generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY],
            min_normalized_value=
            generator_option_dict[neural_net.PREDICTOR_MIN_NORM_VALUE_KEY],
            max_normalized_value=
            generator_option_dict[neural_net.PREDICTOR_MAX_NORM_VALUE_KEY],
            separate_heights=True, apply_to_predictors=True,
            apply_to_vector_targets=False, apply_to_scalar_targets=False
        )

    final_scalar_predictor_matrix = (
        final_example_dict[example_utils.SCALAR_PREDICTOR_VALS_KEY]
    )
    final_vector_predictor_matrix = (
        final_example_dict[example_utils.VECTOR_PREDICTOR_VALS_KEY]
    )

    print('Writing results to file: "{0:s}"...'.format(output_file_name))
    bwo.write_file(
        netcdf_file_name=output_file_name,
        init_scalar_predictor_matrix=init_scalar_predictor_matrix,
        final_scalar_predictor_matrix=final_scalar_predictor_matrix,
        init_vector_predictor_matrix=init_vector_predictor_matrix,
        final_vector_predictor_matrix=final_vector_predictor_matrix,
        initial_activations=bwo_dict[bwo.INITIAL_ACTIVATIONS_KEY],
        final_activations=bwo_dict[bwo.FINAL_ACTIVATIONS_KEY],
        example_id_strings=example_id_strings, model_file_name=model_file_name,
        layer_name=layer_name, neuron_indices=neuron_indices,
        ideal_activation=ideal_activation, num_iterations=num_iterations,
        learning_rate=learning_rate, l2_weight=l2_weight
    )
예제 #5
0
def get_raw_examples(example_file_name, num_examples, example_dir_name,
                     example_id_file_name):
    """Returns raw examples.

    The difference between `get_raw_examples` and `get_examples_for_inference`
    is that `get_raw_examples` returns examples in their raw form, *not*
    pre-processed to be fed through a model for inference.

    :param example_file_name: See doc for `get_examples_for_inference`.
    :param num_examples: Same.
    :param example_dir_name: Same.
    :param example_id_file_name: Same.
    :return: example_dict: See doc for `example_io.read_file`.
    """

    error_checking.assert_is_string(example_file_name)
    use_specific_ids = example_file_name == ''

    if use_specific_ids:
        error_checking.assert_is_string(example_id_file_name)

        print('Reading desired example IDs from: "{0:s}"...'.format(
            example_id_file_name))
        example_id_strings = read_example_ids_from_netcdf(example_id_file_name)

        valid_times_unix_sec = example_utils.parse_example_ids(
            example_id_strings)[example_utils.VALID_TIMES_KEY]

        example_file_names = example_io.find_many_files(
            directory_name=example_dir_name,
            first_time_unix_sec=numpy.min(valid_times_unix_sec),
            last_time_unix_sec=numpy.max(valid_times_unix_sec))

        num_files = len(example_file_names)
        example_dicts = [dict()] * num_files

        for i in range(num_files):
            print('Reading data from: "{0:s}"...'.format(
                example_file_names[i]))
            example_dicts[i] = example_io.read_file(example_file_names[i])

        example_dict = example_utils.concat_examples(example_dicts)

        good_indices = example_utils.find_examples(
            all_id_strings=example_dict[example_utils.EXAMPLE_IDS_KEY],
            desired_id_strings=example_id_strings,
            allow_missing=False)

        example_dict = example_utils.subset_by_index(
            example_dict=example_dict, desired_indices=good_indices)
    else:
        error_checking.assert_is_string(example_dir_name)
        error_checking.assert_is_integer(num_examples)
        error_checking.assert_is_greater(num_examples, 0)

        print('Reading data from: "{0:s}"...'.format(example_file_name))
        example_dict = example_io.read_file(example_file_name)

        num_examples_total = len(example_dict[example_utils.VALID_TIMES_KEY])
        desired_indices = numpy.linspace(0,
                                         num_examples_total - 1,
                                         num=num_examples_total,
                                         dtype=int)

        if num_examples < num_examples_total:
            desired_indices = numpy.random.choice(desired_indices,
                                                  size=num_examples,
                                                  replace=False)

        example_dict = example_utils.subset_by_index(
            example_dict=example_dict, desired_indices=desired_indices)

    return example_dict