Esempio n. 1
0
def _run(first_batch_number, last_batch_number):
    """Counts number of training examples.

    This is effectively the main method.

    :param first_batch_number: See documentation at top of file.
    :param last_batch_number: Same.
    """

    example_file_names = trainval_io.find_downsized_3d_example_files(
        top_directory_name=TOP_EXAMPLE_DIR_NAME, shuffled=True,
        first_batch_number=first_batch_number,
        last_batch_number=last_batch_number)

    num_examples = 0

    for this_file_name in example_file_names:
        print 'Reading data from: "{0:s}"...'.format(this_file_name)

        this_example_dict = trainval_io.read_downsized_3d_examples(
            netcdf_file_name=this_file_name, metadata_only=True)

        this_num_examples = len(this_example_dict[trainval_io.TARGET_TIMES_KEY])
        num_examples += this_num_examples

        print 'Number of examples so far = {0:d}\n'.format(num_examples)
def _read_input_examples(example_file_name, cnn_metadata_dict, num_examples,
                         example_indices):
    """Reads input examples (images to be reconstructed).

    :param example_file_name: See documentation at top of file.
    :param cnn_metadata_dict: Dictionary returned by
        `traditional_cnn.read_model_metadata`.
    :param num_examples: See documentation at top of file.
    :param example_indices: Same.
    :return: actual_image_matrix: E-by-M-by-N-by-C numpy array with actual
        images (input examples to CNN).
    """

    print 'Reading input examples (images) from: "{0:s}"...'.format(
        example_file_name)
    example_dict = trainval_io.read_downsized_3d_examples(
        netcdf_file_name=example_file_name,
        predictor_names_to_keep=cnn_metadata_dict[
            traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
        num_half_rows_to_keep=cnn_metadata_dict[
            traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
        num_half_columns_to_keep=cnn_metadata_dict[
            traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY])

    actual_image_matrix = example_dict[trainval_io.PREDICTOR_MATRIX_KEY]

    if num_examples is not None:
        num_examples_total = actual_image_matrix.shape[0]
        example_indices = numpy.linspace(0,
                                         num_examples_total - 1,
                                         num=num_examples_total,
                                         dtype=int)

        num_examples = min([num_examples, num_examples_total])
        example_indices = numpy.random.choice(example_indices,
                                              size=num_examples,
                                              replace=False)

    return actual_image_matrix[example_indices, ...]
Esempio n. 3
0
def _find_baseline_and_test_examples(top_example_dir_name, first_time_string,
                                     last_time_string, num_baseline_examples,
                                     num_test_examples, cnn_model_object,
                                     cnn_metadata_dict):
    """Finds examples for baseline and test sets.

    :param top_example_dir_name: See documentation at top of file.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param num_baseline_examples: Same.
    :param num_test_examples: Same.
    :param cnn_model_object:
    :param cnn_metadata_dict:
    :return: baseline_image_matrix: B-by-M-by-N-by-C numpy array of baseline
        images (input examples for the CNN).
    :return: test_image_matrix: B-by-M-by-N-by-C numpy array of test images
        (input examples for the CNN).
    """

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, TIME_FORMAT)

    example_file_names = trainval_io.find_downsized_3d_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_target_time_unix_sec=first_time_unix_sec,
        last_target_time_unix_sec=last_time_unix_sec)

    file_indices = numpy.array([], dtype=int)
    file_position_indices = numpy.array([], dtype=int)
    cold_front_probabilities = numpy.array([], dtype=float)

    for k in range(len(example_file_names)):
        print 'Reading data from: "{0:s}"...'.format(example_file_names[k])
        this_example_dict = trainval_io.read_downsized_3d_examples(
            netcdf_file_name=example_file_names[k],
            metadata_only=False,
            predictor_names_to_keep=cnn_metadata_dict[
                traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
            num_half_rows_to_keep=cnn_metadata_dict[
                traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
            num_half_columns_to_keep=cnn_metadata_dict[
                traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY],
            first_time_to_keep_unix_sec=first_time_unix_sec,
            last_time_to_keep_unix_sec=last_time_unix_sec)

        this_num_examples = len(
            this_example_dict[trainval_io.TARGET_TIMES_KEY])
        if this_num_examples == 0:
            continue

        these_file_indices = numpy.full(this_num_examples, k, dtype=int)
        these_position_indices = numpy.linspace(0,
                                                this_num_examples - 1,
                                                num=this_num_examples,
                                                dtype=int)

        these_cold_front_probs = _get_cnn_predictions(
            cnn_model_object=cnn_model_object,
            predictor_matrix=this_example_dict[
                trainval_io.PREDICTOR_MATRIX_KEY],
            target_class=front_utils.COLD_FRONT_INTEGER_ID,
            verbose=True)
        print '\n'

        file_indices = numpy.concatenate((file_indices, these_file_indices))
        file_position_indices = numpy.concatenate(
            (file_position_indices, these_position_indices))
        cold_front_probabilities = numpy.concatenate(
            (cold_front_probabilities, these_cold_front_probs))

    print SEPARATOR_STRING

    # Find test set.
    test_indices = numpy.argsort(-1 *
                                 cold_front_probabilities)[:num_test_examples]
    file_indices_for_test = file_indices[test_indices]
    file_position_indices_for_test = file_position_indices[test_indices]

    print 'Cold-front probabilities for the {0:d} test examples are:'.format(
        num_test_examples)
    for i in test_indices:
        print cold_front_probabilities[i]
    print SEPARATOR_STRING

    # Find baseline set.
    baseline_indices = numpy.linspace(0,
                                      num_baseline_examples - 1,
                                      num=num_baseline_examples,
                                      dtype=int)

    baseline_indices = (set(baseline_indices.tolist()) -
                        set(test_indices.tolist()))
    baseline_indices = numpy.array(list(baseline_indices), dtype=int)
    baseline_indices = numpy.random.choice(baseline_indices,
                                           size=num_baseline_examples,
                                           replace=False)

    file_indices_for_baseline = file_indices[baseline_indices]
    file_position_indices_for_baseline = file_position_indices[
        baseline_indices]

    print('Cold-front probabilities for the {0:d} baseline examples are:'
          ).format(num_baseline_examples)
    for i in baseline_indices:
        print cold_front_probabilities[i]
    print SEPARATOR_STRING

    # Read test and baseline sets.
    baseline_image_matrix = None
    test_image_matrix = None

    for k in range(len(example_file_names)):
        if not (k in file_indices_for_test or k in file_indices_for_baseline):
            continue

        print 'Reading data from: "{0:s}"...'.format(example_file_names[k])
        this_example_dict = trainval_io.read_downsized_3d_examples(
            netcdf_file_name=example_file_names[k],
            metadata_only=False,
            predictor_names_to_keep=cnn_metadata_dict[
                traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
            num_half_rows_to_keep=cnn_metadata_dict[
                traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
            num_half_columns_to_keep=cnn_metadata_dict[
                traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY],
            first_time_to_keep_unix_sec=first_time_unix_sec,
            last_time_to_keep_unix_sec=last_time_unix_sec)

        this_predictor_matrix = this_example_dict[
            trainval_io.PREDICTOR_MATRIX_KEY]

        if baseline_image_matrix is None:
            baseline_image_matrix = numpy.full(
                (num_baseline_examples, ) + this_predictor_matrix.shape[1:],
                numpy.nan)
            test_image_matrix = numpy.full(
                (num_test_examples, ) + this_predictor_matrix.shape[1:],
                numpy.nan)

        these_baseline_indices = numpy.where(file_indices_for_baseline == k)[0]
        if len(these_baseline_indices) > 0:
            baseline_image_matrix[these_baseline_indices, ...] = (
                this_predictor_matrix[
                    file_position_indices_for_baseline[these_baseline_indices],
                    ...])

        these_test_indices = numpy.where(file_indices_for_test == k)[0]
        if len(these_test_indices) > 0:
            test_image_matrix[these_test_indices, ...] = (
                this_predictor_matrix[
                    file_position_indices_for_test[these_test_indices], ...])

    return baseline_image_matrix, test_image_matrix
Esempio n. 4
0
def _trainval_generator(top_input_dir_name, first_time_unix_sec,
                        last_time_unix_sec, narr_predictor_names,
                        num_half_rows, num_half_columns,
                        num_examples_per_batch, cnn_model_object,
                        cnn_feature_layer_name):
    """Generates training or validation examples for upconvnet on the fly.

    :param top_input_dir_name: Name of top-level directory with downsized 3-D
        examples (two spatial dimensions).  Files therein will be found by
        `training_validation_io.find_downsized_3d_example_file` (with
        `shuffled = True`) and read by
        `training_validation_io.read_downsized_3d_examples`.
    :param first_time_unix_sec: First valid time.  Only examples with valid time
        in `first_time_unix_sec`...`last_time_unix_sec` will be kept.
    :param last_time_unix_sec: See above.
    :param narr_predictor_names: See doc for
        `training_validation_io.read_downsized_3d_examples`.
    :param num_half_rows: See doc for
        `training_validation_io.read_downsized_3d_examples`.
    :param num_half_columns: Same.
    :param num_examples_per_batch: Number of examples in each batch.
    :param cnn_model_object: Trained CNN model (instance of
        `keras.models.Model`).  This will be used to turn images stored in
        `top_input_dir_name` into scalar features.
    :param cnn_feature_layer_name: The "scalar features" will be the set of
        activations from this layer.
    :return: feature_matrix: E-by-Z numpy array of scalar features.  These are
        the "predictors" for the upconv network.
    :return: target_matrix: E-by-M-by-N-by-C numpy array of target images.
        These are the predictors for the CNN and the targets for the upconv
        network.
    """

    error_checking.assert_is_integer(num_examples_per_batch)
    error_checking.assert_is_geq(num_examples_per_batch, 10)

    partial_cnn_model_object = cnn.model_to_feature_generator(
        model_object=cnn_model_object,
        output_layer_name=cnn_feature_layer_name)

    example_file_names = trainval_io.find_downsized_3d_example_files(
        top_directory_name=top_input_dir_name,
        shuffled=True,
        first_batch_number=0,
        last_batch_number=LARGE_INTEGER)
    shuffle(example_file_names)

    num_files = len(example_file_names)
    file_index = 0
    batch_indices = numpy.linspace(0,
                                   num_examples_per_batch - 1,
                                   num=num_examples_per_batch,
                                   dtype=int)

    num_predictors = len(narr_predictor_names)
    num_examples_in_memory = 0
    full_target_matrix = None

    while True:
        while num_examples_in_memory < num_examples_per_batch:
            print 'Reading data from: "{0:s}"...'.format(
                example_file_names[file_index])

            this_example_dict = trainval_io.read_downsized_3d_examples(
                netcdf_file_name=example_file_names[file_index],
                predictor_names_to_keep=narr_predictor_names,
                num_half_rows_to_keep=num_half_rows,
                num_half_columns_to_keep=num_half_columns,
                first_time_to_keep_unix_sec=first_time_unix_sec,
                last_time_to_keep_unix_sec=last_time_unix_sec)

            file_index += 1
            if file_index >= num_files:
                file_index = 0

            this_num_examples = len(
                this_example_dict[trainval_io.TARGET_TIMES_KEY])
            if this_num_examples == 0:
                continue

            if full_target_matrix is None or full_target_matrix.size == 0:
                full_target_matrix = (
                    this_example_dict[trainval_io.PREDICTOR_MATRIX_KEY] + 0.)
            else:
                full_target_matrix = numpy.concatenate(
                    (full_target_matrix,
                     this_example_dict[trainval_io.PREDICTOR_MATRIX_KEY]),
                    axis=0)

            num_examples_in_memory = full_target_matrix.shape[0]

        target_matrix = full_target_matrix[batch_indices,
                                           ...].astype('float32')
        feature_matrix = partial_cnn_model_object.predict(
            target_matrix, batch_size=num_examples_per_batch)

        for i in range(num_examples_per_batch):
            for m in range(num_predictors):
                target_matrix[i, ...,
                              m] = (target_matrix[i, ..., m] -
                                    numpy.mean(target_matrix[i, ..., m]))

        num_examples_in_memory = 0
        full_target_matrix = None

        yield (feature_matrix, target_matrix)
def _run(example_file_name, top_front_line_dir_name, num_examples,
         example_indices, thetaw_colour_map_name, thetaw_max_colour_percentile,
         output_dir_name):
    """Plots one or more input examples.

    This is effectively the main method.

    :param example_file_name: See documentation at top of file.
    :param top_front_line_dir_name: Same.
    :param num_examples: Same.
    :param example_indices: Same.
    :param thetaw_colour_map_name: Same.
    :param thetaw_max_colour_percentile: Same.
    :param output_dir_name: Same.
    """

    if num_examples <= 0:
        num_examples = None

    if num_examples is None:
        error_checking.assert_is_geq_numpy_array(example_indices, 0)
    else:
        error_checking.assert_is_greater(num_examples, 0)

    error_checking.assert_is_geq(thetaw_max_colour_percentile, 0)
    error_checking.assert_is_leq(thetaw_max_colour_percentile, 100)
    thetaw_colour_map_object = pyplot.cm.get_cmap(thetaw_colour_map_name)

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    print 'Reading normalized examples from: "{0:s}"...'.format(
        example_file_name)

    example_dict = trainval_io.read_downsized_3d_examples(
        netcdf_file_name=example_file_name,
        num_half_rows_to_keep=NUM_HALF_ROWS,
        num_half_columns_to_keep=NUM_HALF_COLUMNS,
        predictor_names_to_keep=NARR_PREDICTOR_NAMES)

    # TODO(thunderhoser): This is a HACK (assuming that normalization method is
    # z-score and not min-max).
    mean_value_matrix = example_dict[trainval_io.FIRST_NORM_PARAM_KEY]
    standard_deviation_matrix = example_dict[trainval_io.SECOND_NORM_PARAM_KEY]

    normalization_dict = {
        ml_utils.MIN_VALUE_MATRIX_KEY: None,
        ml_utils.MAX_VALUE_MATRIX_KEY: None,
        ml_utils.MEAN_VALUE_MATRIX_KEY: mean_value_matrix,
        ml_utils.STDEV_MATRIX_KEY: standard_deviation_matrix
    }

    example_dict[trainval_io.PREDICTOR_MATRIX_KEY] = (
        ml_utils.denormalize_predictors(
            predictor_matrix=example_dict[trainval_io.PREDICTOR_MATRIX_KEY],
            normalization_dict=normalization_dict))

    narr_latitude_matrix_deg, narr_longitude_matrix_deg = (
        nwp_model_utils.get_latlng_grid_point_matrices(
            model_name=nwp_model_utils.NARR_MODEL_NAME))

    narr_rotation_cos_matrix, narr_rotation_sin_matrix = (
        nwp_model_utils.get_wind_rotation_angles(
            latitudes_deg=narr_latitude_matrix_deg,
            longitudes_deg=narr_longitude_matrix_deg,
            model_name=nwp_model_utils.NARR_MODEL_NAME))

    num_examples_total = len(example_dict[trainval_io.TARGET_TIMES_KEY])
    example_indices = numpy.linspace(0,
                                     num_examples_total - 1,
                                     num=num_examples_total,
                                     dtype=int)

    if num_examples is not None:
        num_examples = min([num_examples, num_examples_total])
        example_indices = numpy.random.choice(example_indices,
                                              size=num_examples,
                                              replace=False)

    thetaw_index = NARR_PREDICTOR_NAMES.index(
        processed_narr_io.WET_BULB_THETA_NAME)
    u_wind_index = NARR_PREDICTOR_NAMES.index(
        processed_narr_io.U_WIND_GRID_RELATIVE_NAME)
    v_wind_index = NARR_PREDICTOR_NAMES.index(
        processed_narr_io.V_WIND_GRID_RELATIVE_NAME)

    for i in example_indices:
        this_center_row_index = example_dict[trainval_io.ROW_INDICES_KEY][i]
        this_first_row_index = this_center_row_index - NUM_HALF_ROWS
        this_last_row_index = this_center_row_index + NUM_HALF_ROWS

        this_center_column_index = example_dict[
            trainval_io.COLUMN_INDICES_KEY][i]
        this_first_column_index = this_center_column_index - NUM_HALF_COLUMNS
        this_last_column_index = this_center_column_index + NUM_HALF_COLUMNS

        this_u_wind_matrix_m_s01 = example_dict[
            trainval_io.PREDICTOR_MATRIX_KEY][i, ..., u_wind_index]
        this_v_wind_matrix_m_s01 = example_dict[
            trainval_io.PREDICTOR_MATRIX_KEY][i, ..., v_wind_index]
        this_cos_matrix = narr_rotation_cos_matrix[this_first_row_index:(
            this_last_row_index +
            1), this_first_column_index:(this_last_column_index + 1)]
        this_sin_matrix = narr_rotation_sin_matrix[this_first_row_index:(
            this_last_row_index +
            1), this_first_column_index:(this_last_column_index + 1)]

        this_u_wind_matrix_m_s01, this_v_wind_matrix_m_s01 = (
            nwp_model_utils.rotate_winds_to_earth_relative(
                u_winds_grid_relative_m_s01=this_u_wind_matrix_m_s01,
                v_winds_grid_relative_m_s01=this_v_wind_matrix_m_s01,
                rotation_angle_cosines=this_cos_matrix,
                rotation_angle_sines=this_sin_matrix))

        _, axes_object, basemap_object = nwp_plotting.init_basemap(
            model_name=nwp_model_utils.NARR_MODEL_NAME,
            first_row_in_full_grid=this_first_row_index,
            last_row_in_full_grid=this_last_row_index,
            first_column_in_full_grid=this_first_column_index,
            last_column_in_full_grid=this_last_column_index,
            resolution_string='i')

        plotting_utils.plot_coastlines(basemap_object=basemap_object,
                                       axes_object=axes_object,
                                       line_colour=BORDER_COLOUR,
                                       line_width=BORDER_WIDTH)
        plotting_utils.plot_countries(basemap_object=basemap_object,
                                      axes_object=axes_object,
                                      line_colour=BORDER_COLOUR,
                                      line_width=BORDER_WIDTH)
        plotting_utils.plot_states_and_provinces(basemap_object=basemap_object,
                                                 axes_object=axes_object,
                                                 line_colour=BORDER_COLOUR,
                                                 line_width=BORDER_WIDTH)
        plotting_utils.plot_parallels(
            basemap_object=basemap_object,
            axes_object=axes_object,
            bottom_left_lat_deg=-90.,
            upper_right_lat_deg=90.,
            parallel_spacing_deg=PARALLEL_SPACING_DEG)
        plotting_utils.plot_meridians(
            basemap_object=basemap_object,
            axes_object=axes_object,
            bottom_left_lng_deg=0.,
            upper_right_lng_deg=360.,
            meridian_spacing_deg=MERIDIAN_SPACING_DEG)

        this_thetaw_matrix_kelvins = example_dict[
            trainval_io.PREDICTOR_MATRIX_KEY][i, ..., thetaw_index]

        this_min_value = numpy.percentile(this_thetaw_matrix_kelvins,
                                          100. - thetaw_max_colour_percentile)
        this_max_value = numpy.percentile(this_thetaw_matrix_kelvins,
                                          thetaw_max_colour_percentile)

        nwp_plotting.plot_subgrid(
            field_matrix=this_thetaw_matrix_kelvins,
            model_name=nwp_model_utils.NARR_MODEL_NAME,
            axes_object=axes_object,
            basemap_object=basemap_object,
            colour_map=thetaw_colour_map_object,
            min_value_in_colour_map=this_min_value,
            max_value_in_colour_map=this_max_value,
            first_row_in_full_grid=this_first_row_index,
            first_column_in_full_grid=this_first_column_index)

        colour_bar_object = plotting_utils.add_linear_colour_bar(
            axes_object_or_list=axes_object,
            values_to_colour=this_thetaw_matrix_kelvins,
            colour_map=thetaw_colour_map_object,
            colour_min=this_min_value,
            colour_max=this_max_value,
            orientation='vertical',
            extend_min=True,
            extend_max=True,
            fraction_of_axis_length=0.8)

        colour_bar_object.set_label(
            r'Wet-bulb potential temperature ($^{\circ}$C)')

        nwp_plotting.plot_wind_barbs_on_subgrid(
            u_wind_matrix_m_s01=this_u_wind_matrix_m_s01,
            v_wind_matrix_m_s01=this_v_wind_matrix_m_s01,
            model_name=nwp_model_utils.NARR_MODEL_NAME,
            axes_object=axes_object,
            basemap_object=basemap_object,
            first_row_in_full_grid=this_first_row_index,
            first_column_in_full_grid=this_first_column_index,
            barb_length=WIND_BARB_LENGTH,
            empty_barb_radius=EMPTY_WIND_BARB_RADIUS,
            fill_empty_barb=False,
            colour_map=WIND_COLOUR_MAP_OBJECT,
            colour_minimum_kt=MIN_COLOUR_WIND_SPEED_KT,
            colour_maximum_kt=MAX_COLOUR_WIND_SPEED_KT)

        this_front_file_name = fronts_io.find_file_for_one_time(
            top_directory_name=top_front_line_dir_name,
            file_type=fronts_io.POLYLINE_FILE_TYPE,
            valid_time_unix_sec=example_dict[trainval_io.TARGET_TIMES_KEY][i])

        print time_conversion.unix_sec_to_string(
            example_dict[trainval_io.TARGET_TIMES_KEY][i], '%Y-%m-%d-%H')

        this_polyline_table = fronts_io.read_polylines_from_file(
            this_front_file_name)
        this_num_fronts = len(this_polyline_table.index)

        for j in range(this_num_fronts):
            this_front_type_string = this_polyline_table[
                front_utils.FRONT_TYPE_COLUMN].values[j]

            if this_front_type_string == front_utils.WARM_FRONT_STRING_ID:
                this_colour = WARM_FRONT_COLOUR
            else:
                this_colour = COLD_FRONT_COLOUR

            front_plotting.plot_front_with_markers(
                line_latitudes_deg=this_polyline_table[
                    front_utils.LATITUDES_COLUMN].values[j],
                line_longitudes_deg=this_polyline_table[
                    front_utils.LONGITUDES_COLUMN].values[j],
                axes_object=axes_object,
                basemap_object=basemap_object,
                front_type_string=this_polyline_table[
                    front_utils.FRONT_TYPE_COLUMN].values[j],
                marker_colour=this_colour,
                marker_size=FRONT_MARKER_SIZE,
                marker_spacing_metres=FRONT_SPACING_METRES)

        this_output_file_name = '{0:s}/example{1:06d}.jpg'.format(
            output_dir_name, i)

        print 'Saving figure to: "{0:s}"...'.format(this_output_file_name)
        pyplot.savefig(this_output_file_name, dpi=FIGURE_RESOLUTION_DPI)
        pyplot.close()
def _read_examples(top_example_dir_name, first_time_string, last_time_string,
                   num_examples, model_metadata_dict):
    """Reads learning examples for either training or validation.

    :param top_example_dir_name: See doc for either `top_training_dir_name` or
        `top_validn_dir_name` at top of file.
    :param first_time_string: See doc for either `top_training_dir_name` or
        `top_validn_dir_name` at top of file.
    :param last_time_string: See doc for either `top_training_dir_name` or
        `top_validn_dir_name` at top of file.
    :param num_examples: See doc for either `num_training_examples` or
        `num_validn_examples` at top of file.
    :param model_metadata_dict: Dictionary (created by
        `traditional_cnn.read_model_metadata`) for original model, whose
        architecture will be mostly copied to train the new models.
    :return: predictor_matrix: E-by-M-by-N-by-C numpy array of predictor values
        (images).
    :return: target_values: length-E numpy array of target values (integer
        class labels).
    """

    error_checking.assert_is_geq(num_examples, 100)

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)

    example_file_names = trainval_io.find_downsized_3d_example_files(
        top_directory_name=top_example_dir_name, shuffled=True,
        first_batch_number=0, last_batch_number=LARGE_INTEGER)
    random.shuffle(example_file_names)

    predictor_matrix = None
    target_matrix = None

    for this_example_file_name in example_file_names:
        print 'Reading data from: "{0:s}"...'.format(this_example_file_name)

        this_example_dict = trainval_io.read_downsized_3d_examples(
            netcdf_file_name=this_example_file_name,
            predictor_names_to_keep=model_metadata_dict[
                traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
            num_half_rows_to_keep=model_metadata_dict[
                traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
            num_half_columns_to_keep=model_metadata_dict[
                traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY],
            first_time_to_keep_unix_sec=first_time_unix_sec,
            last_time_to_keep_unix_sec=last_time_unix_sec)

        this_predictor_matrix = this_example_dict[
            trainval_io.PREDICTOR_MATRIX_KEY]
        this_target_matrix = this_example_dict[
            trainval_io.TARGET_MATRIX_KEY]

        if predictor_matrix is None:
            predictor_matrix = this_predictor_matrix + 0.
            target_matrix = this_target_matrix + 0
        else:
            predictor_matrix = numpy.concatenate(
                (predictor_matrix, this_predictor_matrix), axis=0)
            target_matrix = numpy.concatenate(
                (target_matrix, this_target_matrix), axis=0)

        if predictor_matrix.shape[0] > num_examples:
            predictor_matrix = predictor_matrix[:num_examples, ...]
            target_matrix = target_matrix[:num_examples, ...]

        num_examples_by_class = numpy.sum(target_matrix, axis=0)
        print 'Number of examples in each class: {0:s}\n'.format(
            str(num_examples_by_class))

        if predictor_matrix.shape[0] >= num_examples:
            break

    return predictor_matrix, numpy.argmax(target_matrix, axis=1)
Esempio n. 7
0
def _run(model_file_name, example_file_name, num_examples, example_indices,
         component_type_string, target_class, layer_name, ideal_activation,
         neuron_indices, channel_index, output_file_name):
    """Creates saliency map for each example, based on the same CNN.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param example_file_name: Same.
    :param num_examples: Same.
    :param example_indices: Same.
    :param component_type_string: Same.
    :param target_class: Same.
    :param layer_name: Same.
    :param ideal_activation: Same.
    :param neuron_indices: Same.
    :param channel_index: Same.
    :param output_file_name: Same.
    """

    if num_examples <= 0:
        num_examples = None

    if num_examples is None:
        error_checking.assert_is_geq_numpy_array(example_indices, 0)
    else:
        error_checking.assert_is_greater(num_examples, 0)

    print 'Reading model from: "{0:s}"...'.format(model_file_name)
    model_object = traditional_cnn.read_keras_model(model_file_name)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=model_file_name)

    print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    print 'Reading normalized examples from: "{0:s}"...'.format(
        example_file_name)
    example_dict = trainval_io.read_downsized_3d_examples(
        netcdf_file_name=example_file_name,
        predictor_names_to_keep=model_metadata_dict[
            traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
        num_half_rows_to_keep=model_metadata_dict[
            traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
        num_half_columns_to_keep=model_metadata_dict[
            traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY])

    predictor_matrix = example_dict[trainval_io.PREDICTOR_MATRIX_KEY]
    if num_examples is not None:
        num_examples_total = predictor_matrix.shape[0]
        example_indices = numpy.linspace(0,
                                         num_examples_total - 1,
                                         num=num_examples_total,
                                         dtype=int)

        num_examples = min([num_examples, num_examples_total])
        example_indices = numpy.random.choice(example_indices,
                                              size=num_examples,
                                              replace=False)

    predictor_matrix = predictor_matrix[example_indices, ...]

    if component_type_string == CLASS_COMPONENT_TYPE_STRING:
        print 'Computing saliency maps for target class {0:d}...'.format(
            target_class)

        saliency_matrix = (
            gg_saliency_maps.get_saliency_maps_for_class_activation(
                model_object=model_object,
                target_class=target_class,
                list_of_input_matrices=[predictor_matrix])[0])

    elif component_type_string == NEURON_COMPONENT_TYPE_STRING:
        print('Computing saliency maps for neuron {0:s} in layer "{1:s}"...'
              ).format(str(neuron_indices), layer_name)

        saliency_matrix = (
            gg_saliency_maps.get_saliency_maps_for_neuron_activation(
                model_object=model_object,
                layer_name=layer_name,
                neuron_indices=neuron_indices,
                list_of_input_matrices=[predictor_matrix],
                ideal_activation=ideal_activation)[0])

    else:
        print('Computing saliency maps for channel {0:d} in layer "{1:s}"...'
              ).format(channel_index, layer_name)

        saliency_matrix = (
            gg_saliency_maps.get_saliency_maps_for_channel_activation(
                model_object=model_object,
                layer_name=layer_name,
                channel_index=channel_index,
                list_of_input_matrices=[predictor_matrix],
                stat_function_for_neuron_activations=K.max,
                ideal_activation=ideal_activation)[0])

    print 'Writing results to: "{0:s}"...'.format(output_file_name)
    ge_saliency_maps.write_file(pickle_file_name=output_file_name,
                                normalized_predictor_matrix=predictor_matrix,
                                saliency_matrix=saliency_matrix,
                                model_file_name=model_file_name,
                                component_type_string=component_type_string,
                                target_class=target_class,
                                layer_name=layer_name,
                                ideal_activation=ideal_activation,
                                neuron_indices=neuron_indices,
                                channel_index=channel_index)
Esempio n. 8
0
def _run(model_file_name, example_file_name, num_examples, example_indices,
         component_type_string, target_class, layer_name, ideal_activation,
         neuron_indices, channel_index, num_iterations, learning_rate,
         output_file_name):
    """Runs backwards optimization on a trained CNN.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param example_file_name: Same.
    :param num_examples: Same.
    :param example_indices: Same.
    :param component_type_string: Same.
    :param target_class: Same.
    :param layer_name: Same.
    :param ideal_activation: Same.
    :param neuron_indices: Same.
    :param channel_index: Same.
    :param num_iterations: Same.
    :param learning_rate: Same.
    :param output_file_name: Same.
    """

    if num_examples <= 0:
        num_examples = None

    print 'Reading model from: "{0:s}"...'.format(model_file_name)
    model_object = traditional_cnn.read_keras_model(model_file_name)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=model_file_name)

    print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    print 'Reading normalized examples from: "{0:s}"...'.format(
        example_file_name)
    example_dict = trainval_io.read_downsized_3d_examples(
        netcdf_file_name=example_file_name,
        predictor_names_to_keep=model_metadata_dict[
            traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
        num_half_rows_to_keep=model_metadata_dict[
            traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
        num_half_columns_to_keep=model_metadata_dict[
            traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY])

    predictor_matrix = example_dict[trainval_io.PREDICTOR_MATRIX_KEY]

    if num_examples is None:
        error_checking.assert_is_geq_numpy_array(example_indices, 0)
        num_examples = len(example_indices)
    else:
        error_checking.assert_is_greater(num_examples, 0)

        num_examples_total = predictor_matrix.shape[0]
        example_indices = numpy.linspace(0,
                                         num_examples_total - 1,
                                         num=num_examples_total,
                                         dtype=int)

        num_examples = min([num_examples, num_examples_total])
        example_indices = numpy.random.choice(example_indices,
                                              size=num_examples,
                                              replace=False)

    predictor_matrix = predictor_matrix[example_indices, ...]
    optimized_predictor_matrix = numpy.full(predictor_matrix.shape, numpy.nan)
    print SEPARATOR_STRING

    for i in range(num_examples):
        if component_type_string == CLASS_COMPONENT_TYPE_STRING:
            print(
                'Optimizing {0:d}th of {1:d} images for target class {2:d}...'
            ).format(i + 1, num_examples, target_class)

            optimized_predictor_matrix[i, ...] = (
                backwards_opt.optimize_input_for_class(
                    model_object=model_object,
                    target_class=target_class,
                    init_function_or_matrices=[predictor_matrix[[i], ...]],
                    num_iterations=num_iterations,
                    learning_rate=learning_rate)[0])

        elif component_type_string == NEURON_COMPONENT_TYPE_STRING:
            print(
                'Optimizing {0:d}th of {1:d} images for neuron {2:s} in layer '
                '"{3:s}"...').format(i + 1, num_examples, str(neuron_indices),
                                     layer_name)

            optimized_predictor_matrix[i, ...] = (
                backwards_opt.optimize_input_for_neuron(
                    model_object=model_object,
                    layer_name=layer_name,
                    neuron_indices=neuron_indices,
                    init_function_or_matrices=[predictor_matrix[[i], ...]],
                    num_iterations=num_iterations,
                    learning_rate=learning_rate,
                    ideal_activation=ideal_activation)[0])

        else:
            print(
                'Optimizing {0:d}th of {1:d} images for channel {2:d} in layer '
                '"{3:s}"...').format(i + 1, num_examples, channel_index,
                                     layer_name)

            optimized_predictor_matrix[i, ...] = (
                backwards_opt.optimize_input_for_channel(
                    model_object=model_object,
                    layer_name=layer_name,
                    channel_index=channel_index,
                    init_function_or_matrices=[predictor_matrix[[i], ...]],
                    stat_function_for_neuron_activations=K.max,
                    num_iterations=num_iterations,
                    learning_rate=learning_rate,
                    ideal_activation=ideal_activation)[0])

        print SEPARATOR_STRING

    print 'Writing results to: "{0:s}"...'.format(output_file_name)
    backwards_opt.write_results(
        pickle_file_name=output_file_name,
        list_of_optimized_input_matrices=[optimized_predictor_matrix],
        model_file_name=model_file_name,
        init_function_name_or_matrices=[predictor_matrix],
        num_iterations=num_iterations,
        learning_rate=learning_rate,
        component_type_string=component_type_string,
        target_class=target_class,
        layer_name=layer_name,
        neuron_indices=neuron_indices,
        channel_index=channel_index,
        ideal_activation=ideal_activation)
def _read_examples(top_example_dir_name, first_time_string, last_time_string,
                   num_times, num_examples_per_time, model_metadata_dict):
    """Reads learning examples.

    These and the trained model are the main inputs to the permutation test.

    :param top_example_dir_name: See documentation at top of file.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param num_times: Same.
    :param num_examples_per_time: Same.
    :param model_metadata_dict: Dictionary with metadata for trained model
        (created by `traditional_cnn.read_model_metadata`).
    :return: predictor_matrix: E-by-M-by-N-by-C numpy array of predictor values
        (images).
    :return: target_values: length-E numpy array of target values (integer
        class labels).
    """

    error_checking.assert_is_greater(num_times, 0)
    error_checking.assert_is_geq(num_examples_per_time, 10)

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)

    example_file_names = trainval_io.find_downsized_3d_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_target_time_unix_sec=first_time_unix_sec,
        last_target_time_unix_sec=last_time_unix_sec)

    num_times = min([num_times, len(example_file_names)])
    random.shuffle(example_file_names)
    example_file_names = example_file_names[:num_times]

    predictor_matrix = None
    target_matrix = None

    for i in range(num_times):
        print 'Reading data from: "{0:s}"...'.format(example_file_names[i])

        this_example_dict = trainval_io.read_downsized_3d_examples(
            netcdf_file_name=example_file_names[i],
            predictor_names_to_keep=model_metadata_dict[
                traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
            num_half_rows_to_keep=model_metadata_dict[
                traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
            num_half_columns_to_keep=model_metadata_dict[
                traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY],
            first_time_to_keep_unix_sec=first_time_unix_sec,
            last_time_to_keep_unix_sec=last_time_unix_sec)

        this_num_examples_total = this_example_dict[
            trainval_io.PREDICTOR_MATRIX_KEY].shape[0]
        this_num_examples_to_keep = min(
            [num_examples_per_time, this_num_examples_total])

        these_example_indices = numpy.linspace(0,
                                               this_num_examples_total - 1,
                                               num=this_num_examples_total,
                                               dtype=int)
        these_example_indices = numpy.random.choice(
            these_example_indices,
            size=this_num_examples_to_keep,
            replace=False)

        this_predictor_matrix = this_example_dict[
            trainval_io.PREDICTOR_MATRIX_KEY][these_example_indices, ...]
        this_target_matrix = this_example_dict[trainval_io.TARGET_MATRIX_KEY][
            these_example_indices, ...]

        if predictor_matrix is None:
            predictor_matrix = this_predictor_matrix + 0.
            target_matrix = this_target_matrix + 0
        else:
            predictor_matrix = numpy.concatenate(
                (predictor_matrix, this_predictor_matrix), axis=0)
            target_matrix = numpy.concatenate(
                (target_matrix, this_target_matrix), axis=0)

        num_examples_by_class = numpy.sum(target_matrix, axis=0)
        print 'Number of examples in each class: {0:s}\n'.format(
            str(num_examples_by_class))

    return predictor_matrix, numpy.argmax(target_matrix, axis=1)
Esempio n. 10
0
def _run(model_file_name, example_file_name, num_examples, example_indices,
         layer_names, top_output_dir_name):
    """Plots feature maps for each example and CNN layer.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param example_file_name: Same.
    :param num_examples: Same.
    :param example_indices: Same.
    :param layer_names: Same.
    :param top_output_dir_name: Same.
    """

    if num_examples <= 0:
        num_examples = None

    if num_examples is None:
        error_checking.assert_is_geq_numpy_array(example_indices, 0)
    else:
        error_checking.assert_is_greater(num_examples, 0)

    print 'Reading model from: "{0:s}"...'.format(model_file_name)
    model_object = traditional_cnn.read_keras_model(model_file_name)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=model_file_name)

    print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    print 'Reading normalized examples from: "{0:s}"...'.format(
        example_file_name)
    example_dict = trainval_io.read_downsized_3d_examples(
        netcdf_file_name=example_file_name,
        predictor_names_to_keep=model_metadata_dict[
            traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
        num_half_rows_to_keep=model_metadata_dict[
            traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
        num_half_columns_to_keep=model_metadata_dict[
            traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY])

    print SEPARATOR_STRING
    predictor_matrix = example_dict[trainval_io.PREDICTOR_MATRIX_KEY]

    if num_examples is not None:
        num_examples_total = predictor_matrix.shape[0]
        example_indices = numpy.linspace(0,
                                         num_examples_total - 1,
                                         num=num_examples_total,
                                         dtype=int)

        num_examples = min([num_examples, num_examples_total])
        example_indices = numpy.random.choice(example_indices,
                                              size=num_examples,
                                              replace=False)

    predictor_matrix = predictor_matrix[example_indices, ...]
    num_examples = predictor_matrix.shape[0]

    num_layers = len(layer_names)
    feature_matrix_by_layer = [None] * num_layers

    for k in range(num_layers):
        print 'Creating feature maps for layer "{0:s}"...'.format(
            layer_names[k])

        this_partial_model_object = cnn.model_to_feature_generator(
            model_object=model_object, feature_layer_name=layer_names[k])

        feature_matrix_by_layer[k] = this_partial_model_object.predict(
            predictor_matrix, batch_size=num_examples)

    print SEPARATOR_STRING

    for k in range(num_layers):
        this_output_dir_name = '{0:s}/{1:s}'.format(top_output_dir_name,
                                                    layer_names[k])
        file_system_utils.mkdir_recursive_if_necessary(
            directory_name=this_output_dir_name)

        _plot_feature_maps_one_layer(feature_matrix=feature_matrix_by_layer[k],
                                     layer_name=layer_names[k],
                                     output_dir_name=this_output_dir_name)
        print SEPARATOR_STRING