Exemple #1
0
def _vertex_list_to_polygon_list(vertex_rows, vertex_columns):
    """This method is the inverse of `_polygon_list_to_vertex_list`.

    P = number of polygons

    :param vertex_rows: See doc for `_polygon_list_to_vertex_list`.
    :param vertex_columns: Same.
    :return: polygon_objects_grid_coords: Same.
    :return: polygon_to_first_vertex_indices: length-P numpy array of indices.
        If polygon_to_first_vertex_indices[j] = i, the first vertex in the
        [j]th polygon is the [i]th vertex in the input arrays.
    :raises: ValueError: if row and column lists have NaN's at different
        locations.
    """

    if len(vertex_rows) == 0:
        return [], numpy.array([], dtype=int)

    nan_row_indices = numpy.where(numpy.isnan(vertex_rows))[0]
    nan_column_indices = numpy.where(numpy.isnan(vertex_columns))[0]

    if not numpy.array_equal(nan_row_indices, nan_column_indices):
        error_string = ('Row ({0:s}) and column ({1:s}) lists have NaN'
                        's at different '
                        'locations.').format(str(nan_row_indices),
                                             str(nan_column_indices))

        raise ValueError(error_string)

    polygon_to_first_vertex_indices = numpy.concatenate(
        (numpy.array([0], dtype=int), nan_row_indices + 1))

    vertex_rows_by_polygon = general_utils.split_array_by_nan(vertex_rows)
    vertex_columns_by_polygon = general_utils.split_array_by_nan(
        vertex_columns)

    num_polygons = len(vertex_rows_by_polygon)
    polygon_objects_grid_coords = []

    for i in range(num_polygons):
        this_polygon_object = polygons.vertex_arrays_to_polygon_object(
            exterior_x_coords=vertex_columns_by_polygon[i],
            exterior_y_coords=vertex_rows_by_polygon[i])

        polygon_objects_grid_coords.append(this_polygon_object)

    return polygon_objects_grid_coords, polygon_to_first_vertex_indices
    def test_split_array_by_nan_1nan(self):
        """Ensures correct output from split_array_by_nan.

        In this case, input array has one NaN.
        """

        this_list_of_arrays = general_utils.split_array_by_nan(
            ARRAY_WITH_ONE_NAN)
        self.assertTrue(
            len(this_list_of_arrays) == len(ARRAY_WITH_ONE_NAN_AS_LIST))

        for i in range(len(this_list_of_arrays)):
            self.assertTrue(
                numpy.allclose(this_list_of_arrays[i],
                               ARRAY_WITH_ONE_NAN_AS_LIST[i]))
def _run(model_file_name, component_type_string, target_class, layer_name,
         neuron_indices_flattened, channel_indices, top_example_dir_name,
         first_spc_date_string, last_spc_date_string, output_file_name):
    """Creates activation maps for one class, neuron, or channel of a CNN.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param component_type_string: Same.
    :param target_class: Same.
    :param layer_name: Same.
    :param neuron_indices_flattened: Same.
    :param channel_indices: Same.
    :param top_example_dir_name: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param output_file_name: Same.
    """

    # Check input args.
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)
    model_interpretation.check_component_type(component_type_string)

    if component_type_string == CHANNEL_COMPONENT_TYPE_STRING:
        error_checking.assert_is_geq_numpy_array(channel_indices, 0)
    if component_type_string == NEURON_COMPONENT_TYPE_STRING:
        neuron_indices_flattened = neuron_indices_flattened.astype(float)
        neuron_indices_flattened[neuron_indices_flattened < 0] = numpy.nan

        neuron_indices_2d_list = general_utils.split_array_by_nan(
            neuron_indices_flattened)
        neuron_index_matrix = numpy.array(neuron_indices_2d_list, dtype=int)
    else:
        neuron_index_matrix = None

    # Read model and metadata.
    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    metadata_file_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0])

    print('Reading metadata from: "{0:s}"...'.format(metadata_file_name))
    model_metadata_dict = cnn.read_model_metadata(metadata_file_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    # Create generator.
    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    training_option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = None
    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names
    training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = (
        time_conversion.get_start_of_spc_date(first_spc_date_string))
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = (
        time_conversion.get_end_of_spc_date(last_spc_date_string))

    if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            list_of_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY],
            num_examples_total=LARGE_INTEGER)

    elif model_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict, num_examples_total=LARGE_INTEGER)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict, num_examples_total=LARGE_INTEGER)

    # Compute activation for each example (storm object) and model component.
    full_id_strings = []
    storm_times_unix_sec = numpy.array([], dtype=int)
    activation_matrix = None

    print(SEPARATOR_STRING)

    for _ in range(len(example_file_names)):
        try:
            this_storm_object_dict = next(generator_object)
        except StopIteration:
            break

        this_list_of_input_matrices = this_storm_object_dict[
            testing_io.INPUT_MATRICES_KEY]
        these_id_strings = this_storm_object_dict[testing_io.FULL_IDS_KEY]
        these_times_unix_sec = this_storm_object_dict[
            testing_io.STORM_TIMES_KEY]

        full_id_strings += these_id_strings
        storm_times_unix_sec = numpy.concatenate(
            (storm_times_unix_sec, these_times_unix_sec))

        if component_type_string == CLASS_COMPONENT_TYPE_STRING:
            print('Computing activations for target class {0:d}...'.format(
                target_class))

            this_activation_matrix = (
                model_activation.get_class_activation_for_examples(
                    model_object=model_object,
                    target_class=target_class,
                    list_of_input_matrices=this_list_of_input_matrices))

            this_activation_matrix = numpy.reshape(
                this_activation_matrix, (len(this_activation_matrix), 1))

        elif component_type_string == NEURON_COMPONENT_TYPE_STRING:
            this_activation_matrix = None

            for j in range(neuron_index_matrix.shape[0]):
                print((
                    'Computing activations for neuron {0:s} in layer "{1:s}"...'
                ).format(str(neuron_index_matrix[j, :]), layer_name))

                these_activations = (
                    model_activation.get_neuron_activation_for_examples(
                        model_object=model_object,
                        layer_name=layer_name,
                        neuron_indices=neuron_index_matrix[j, :],
                        list_of_input_matrices=this_list_of_input_matrices))

                these_activations = numpy.reshape(these_activations,
                                                  (len(these_activations), 1))

                if this_activation_matrix is None:
                    this_activation_matrix = these_activations + 0.
                else:
                    this_activation_matrix = numpy.concatenate(
                        (this_activation_matrix, these_activations), axis=1)
        else:
            this_activation_matrix = None

            for this_channel_index in channel_indices:
                print(('Computing activations for channel {0:d} in layer '
                       '"{1:s}"...').format(this_channel_index, layer_name))

                these_activations = (
                    model_activation.get_channel_activation_for_examples(
                        model_object=model_object,
                        layer_name=layer_name,
                        channel_index=this_channel_index,
                        list_of_input_matrices=this_list_of_input_matrices,
                        stat_function_for_neuron_activations=K.max))

                these_activations = numpy.reshape(these_activations,
                                                  (len(these_activations), 1))

                if this_activation_matrix is None:
                    this_activation_matrix = these_activations + 0.
                else:
                    this_activation_matrix = numpy.concatenate(
                        (this_activation_matrix, these_activations), axis=1)

        if activation_matrix is None:
            activation_matrix = this_activation_matrix + 0.
        else:
            activation_matrix = numpy.concatenate(
                (activation_matrix, this_activation_matrix), axis=0)

        print(SEPARATOR_STRING)

    print('Writing activations to file: "{0:s}"...'.format(output_file_name))
    model_activation.write_file(pickle_file_name=output_file_name,
                                activation_matrix=activation_matrix,
                                full_id_strings=full_id_strings,
                                storm_times_unix_sec=storm_times_unix_sec,
                                model_file_name=model_file_name,
                                component_type_string=component_type_string,
                                target_class=target_class,
                                layer_name=layer_name,
                                neuron_index_matrix=neuron_index_matrix,
                                channel_indices=channel_indices)
Exemple #4
0
def _run(top_linkage_dir_name, spc_date_string, min_lead_times_sec,
         max_lead_times_sec, min_link_distances_metres,
         max_link_distances_metres, event_type_string,
         wind_speed_percentile_level, wind_speed_cutoffs_kt,
         top_output_dir_name):
    """Computes target value for ea storm object, lead-time window, and buffer.

    This is effectively the main method.

    :param top_linkage_dir_name: See documentation at top of file.
    :param spc_date_string: Same.
    :param min_lead_times_sec: Same.
    :param max_lead_times_sec: Same.
    :param min_link_distances_metres: Same.
    :param max_link_distances_metres: Same.
    :param event_type_string: Same.
    :param wind_speed_percentile_level: Same.
    :param wind_speed_cutoffs_kt: Same.
    :param top_output_dir_name: Same.
    """

    num_lead_time_windows = len(min_lead_times_sec)
    error_checking.assert_is_numpy_array(
        max_lead_times_sec,
        exact_dimensions=numpy.array([num_lead_time_windows])
    )

    num_distance_buffers = len(min_link_distances_metres)
    error_checking.assert_is_numpy_array(
        max_link_distances_metres,
        exact_dimensions=numpy.array([num_distance_buffers])
    )

    linkage_file_name = linkage.find_linkage_file(
        top_directory_name=top_linkage_dir_name,
        event_type_string=event_type_string, spc_date_string=spc_date_string)

    print 'Reading data from: "{0:s}"...'.format(linkage_file_name)
    storm_to_events_table = linkage.read_linkage_file(linkage_file_name)

    if event_type_string == linkage.WIND_EVENT_STRING:
        list_of_cutoff_arrays_kt = general_utils.split_array_by_nan(
            wind_speed_cutoffs_kt)
        num_cutoff_sets = len(wind_speed_cutoffs_kt)
    else:
        list_of_cutoff_arrays_kt = None
        num_cutoff_sets = 1

    target_names = []

    for i in range(num_lead_time_windows):
        for j in range(num_distance_buffers):
            for k in range(num_cutoff_sets):
                if event_type_string == linkage.WIND_EVENT_STRING:
                    this_target_name = target_val_utils.target_params_to_name(
                        min_lead_time_sec=min_lead_times_sec[i],
                        max_lead_time_sec=max_lead_times_sec[i],
                        min_link_distance_metres=min_link_distances_metres[j],
                        max_link_distance_metres=max_link_distances_metres[j],
                        wind_speed_percentile_level=wind_speed_percentile_level,
                        wind_speed_cutoffs_kt=list_of_cutoff_arrays_kt[k])

                    target_names.append(this_target_name)
                    print 'Computing values for "{0:s}"...'.format(
                        target_names[-1])

                    storm_to_events_table = (
                        target_val_utils.create_wind_classification_targets(
                            storm_to_winds_table=storm_to_events_table,
                            min_lead_time_sec=min_lead_times_sec[i],
                            max_lead_time_sec=max_lead_times_sec[i],
                            min_link_distance_metres=min_link_distances_metres[
                                j],
                            max_link_distance_metres=max_link_distances_metres[
                                j],
                            percentile_level=wind_speed_percentile_level,
                            class_cutoffs_kt=list_of_cutoff_arrays_kt[k])
                    )
                else:
                    this_target_name = target_val_utils.target_params_to_name(
                        min_lead_time_sec=min_lead_times_sec[i],
                        max_lead_time_sec=max_lead_times_sec[i],
                        min_link_distance_metres=min_link_distances_metres[j],
                        max_link_distance_metres=max_link_distances_metres[j])

                    target_names.append(this_target_name)
                    print 'Computing values for "{0:s}"...'.format(
                        target_names[-1])

                    storm_to_events_table = (
                        target_val_utils.create_tornado_targets(
                            storm_to_tornadoes_table=storm_to_events_table,
                            min_lead_time_sec=min_lead_times_sec[i],
                            max_lead_time_sec=max_lead_times_sec[i],
                            min_link_distance_metres=min_link_distances_metres[
                                j],
                            max_link_distance_metres=max_link_distances_metres[
                                j]
                        )
                    )

    target_file_name = target_val_utils.find_target_file(
        top_directory_name=top_output_dir_name,
        event_type_string=event_type_string, spc_date_string=spc_date_string,
        raise_error_if_missing=False)

    print 'Writing target values to: "{0:s}"...'.format(target_file_name)
    target_val_utils.write_target_values(
        storm_to_events_table=storm_to_events_table, target_names=target_names,
        netcdf_file_name=target_file_name)
def _compute_targets_one_day(storm_to_events_table, spc_date_string,
                             min_lead_times_sec, max_lead_times_sec,
                             min_link_distances_metres,
                             max_link_distances_metres, event_type_string,
                             wind_speed_percentile_level,
                             wind_speed_cutoffs_kt, top_output_dir_name):
    """Computes target values for one SPC date.

    :param storm_to_events_table: pandas DataFrame returned by
        `linkage.read_linkage_file`.
    :param spc_date_string: SPC date (format "yyyymmdd").
    :param min_lead_times_sec: See documentation at top of file.
    :param max_lead_times_sec: Same.
    :param min_link_distances_metres: Same.
    :param max_link_distances_metres: Same.
    :param event_type_string: Same.
    :param wind_speed_percentile_level: Same.
    :param wind_speed_cutoffs_kt: Same.
    :param top_output_dir_name: Same.
    """

    num_lead_time_windows = len(min_lead_times_sec)
    num_distance_buffers = len(min_link_distances_metres)

    if event_type_string == linkage.WIND_EVENT_STRING:
        list_of_cutoff_arrays_kt = general_utils.split_array_by_nan(
            wind_speed_cutoffs_kt)
        num_cutoff_sets = len(wind_speed_cutoffs_kt)
    else:
        list_of_cutoff_arrays_kt = None
        num_cutoff_sets = 1

    target_names = []

    for i in range(num_lead_time_windows):
        for j in range(num_distance_buffers):
            for k in range(num_cutoff_sets):
                if event_type_string == linkage.WIND_EVENT_STRING:
                    this_target_name = target_val_utils.target_params_to_name(
                        min_lead_time_sec=min_lead_times_sec[i],
                        max_lead_time_sec=max_lead_times_sec[i],
                        min_link_distance_metres=min_link_distances_metres[j],
                        max_link_distance_metres=max_link_distances_metres[j],
                        wind_speed_percentile_level=wind_speed_percentile_level,
                        wind_speed_cutoffs_kt=list_of_cutoff_arrays_kt[k])

                    target_names.append(this_target_name)

                    print(('Computing labels for "{0:s}" on SPC date {1:s}...'
                           ).format(this_target_name, spc_date_string))

                    storm_to_events_table = (
                        target_val_utils.create_wind_classification_targets(
                            storm_to_winds_table=storm_to_events_table,
                            min_lead_time_sec=min_lead_times_sec[i],
                            max_lead_time_sec=max_lead_times_sec[i],
                            min_link_distance_metres=min_link_distances_metres[
                                j],
                            max_link_distance_metres=max_link_distances_metres[
                                j],
                            percentile_level=wind_speed_percentile_level,
                            class_cutoffs_kt=list_of_cutoff_arrays_kt[k]))
                else:
                    genesis_only = (event_type_string ==
                                    linkage.TORNADOGENESIS_EVENT_STRING)

                    this_target_name = target_val_utils.target_params_to_name(
                        min_lead_time_sec=min_lead_times_sec[i],
                        max_lead_time_sec=max_lead_times_sec[i],
                        min_link_distance_metres=min_link_distances_metres[j],
                        max_link_distance_metres=max_link_distances_metres[j],
                        genesis_only=genesis_only)

                    target_names.append(this_target_name)

                    print(('Computing labels for "{0:s}" on SPC date {1:s}...'
                           ).format(this_target_name, spc_date_string))

                    storm_to_events_table = (
                        target_val_utils.create_tornado_targets(
                            storm_to_tornadoes_table=storm_to_events_table,
                            min_lead_time_sec=min_lead_times_sec[i],
                            max_lead_time_sec=max_lead_times_sec[i],
                            min_link_distance_metres=min_link_distances_metres[
                                j],
                            max_link_distance_metres=max_link_distances_metres[
                                j],
                            genesis_only=genesis_only))

    target_file_name = target_val_utils.find_target_file(
        top_directory_name=top_output_dir_name,
        event_type_string=event_type_string,
        spc_date_string=spc_date_string,
        raise_error_if_missing=False)

    print('Writing target values to: "{0:s}"...'.format(target_file_name))
    target_val_utils.write_target_values(
        storm_to_events_table=storm_to_events_table,
        target_names=target_names,
        netcdf_file_name=target_file_name)