def _vertex_list_to_polygon_list(vertex_rows, vertex_columns): """This method is the inverse of `_polygon_list_to_vertex_list`. P = number of polygons :param vertex_rows: See doc for `_polygon_list_to_vertex_list`. :param vertex_columns: Same. :return: polygon_objects_grid_coords: Same. :return: polygon_to_first_vertex_indices: length-P numpy array of indices. If polygon_to_first_vertex_indices[j] = i, the first vertex in the [j]th polygon is the [i]th vertex in the input arrays. :raises: ValueError: if row and column lists have NaN's at different locations. """ if len(vertex_rows) == 0: return [], numpy.array([], dtype=int) nan_row_indices = numpy.where(numpy.isnan(vertex_rows))[0] nan_column_indices = numpy.where(numpy.isnan(vertex_columns))[0] if not numpy.array_equal(nan_row_indices, nan_column_indices): error_string = ('Row ({0:s}) and column ({1:s}) lists have NaN' 's at different ' 'locations.').format(str(nan_row_indices), str(nan_column_indices)) raise ValueError(error_string) polygon_to_first_vertex_indices = numpy.concatenate( (numpy.array([0], dtype=int), nan_row_indices + 1)) vertex_rows_by_polygon = general_utils.split_array_by_nan(vertex_rows) vertex_columns_by_polygon = general_utils.split_array_by_nan( vertex_columns) num_polygons = len(vertex_rows_by_polygon) polygon_objects_grid_coords = [] for i in range(num_polygons): this_polygon_object = polygons.vertex_arrays_to_polygon_object( exterior_x_coords=vertex_columns_by_polygon[i], exterior_y_coords=vertex_rows_by_polygon[i]) polygon_objects_grid_coords.append(this_polygon_object) return polygon_objects_grid_coords, polygon_to_first_vertex_indices
def test_split_array_by_nan_1nan(self): """Ensures correct output from split_array_by_nan. In this case, input array has one NaN. """ this_list_of_arrays = general_utils.split_array_by_nan( ARRAY_WITH_ONE_NAN) self.assertTrue( len(this_list_of_arrays) == len(ARRAY_WITH_ONE_NAN_AS_LIST)) for i in range(len(this_list_of_arrays)): self.assertTrue( numpy.allclose(this_list_of_arrays[i], ARRAY_WITH_ONE_NAN_AS_LIST[i]))
def _run(model_file_name, component_type_string, target_class, layer_name, neuron_indices_flattened, channel_indices, top_example_dir_name, first_spc_date_string, last_spc_date_string, output_file_name): """Creates activation maps for one class, neuron, or channel of a CNN. This is effectively the main method. :param model_file_name: See documentation at top of file. :param component_type_string: Same. :param target_class: Same. :param layer_name: Same. :param neuron_indices_flattened: Same. :param channel_indices: Same. :param top_example_dir_name: Same. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param output_file_name: Same. """ # Check input args. file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) model_interpretation.check_component_type(component_type_string) if component_type_string == CHANNEL_COMPONENT_TYPE_STRING: error_checking.assert_is_geq_numpy_array(channel_indices, 0) if component_type_string == NEURON_COMPONENT_TYPE_STRING: neuron_indices_flattened = neuron_indices_flattened.astype(float) neuron_indices_flattened[neuron_indices_flattened < 0] = numpy.nan neuron_indices_2d_list = general_utils.split_array_by_nan( neuron_indices_flattened) neuron_index_matrix = numpy.array(neuron_indices_2d_list, dtype=int) else: neuron_index_matrix = None # Read model and metadata. print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) metadata_file_name = '{0:s}/model_metadata.p'.format( os.path.split(model_file_name)[0]) print('Reading metadata from: "{0:s}"...'.format(metadata_file_name)) model_metadata_dict = cnn.read_model_metadata(metadata_file_name) training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] # Create generator. example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) training_option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = None training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = ( time_conversion.get_start_of_spc_date(first_spc_date_string)) training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = ( time_conversion.get_end_of_spc_date(last_spc_date_string)) if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, list_of_operation_dicts=model_metadata_dict[ cnn.LAYER_OPERATIONS_KEY], num_examples_total=LARGE_INTEGER) elif model_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, num_examples_total=LARGE_INTEGER) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, num_examples_total=LARGE_INTEGER) # Compute activation for each example (storm object) and model component. full_id_strings = [] storm_times_unix_sec = numpy.array([], dtype=int) activation_matrix = None print(SEPARATOR_STRING) for _ in range(len(example_file_names)): try: this_storm_object_dict = next(generator_object) except StopIteration: break this_list_of_input_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] these_id_strings = this_storm_object_dict[testing_io.FULL_IDS_KEY] these_times_unix_sec = this_storm_object_dict[ testing_io.STORM_TIMES_KEY] full_id_strings += these_id_strings storm_times_unix_sec = numpy.concatenate( (storm_times_unix_sec, these_times_unix_sec)) if component_type_string == CLASS_COMPONENT_TYPE_STRING: print('Computing activations for target class {0:d}...'.format( target_class)) this_activation_matrix = ( model_activation.get_class_activation_for_examples( model_object=model_object, target_class=target_class, list_of_input_matrices=this_list_of_input_matrices)) this_activation_matrix = numpy.reshape( this_activation_matrix, (len(this_activation_matrix), 1)) elif component_type_string == NEURON_COMPONENT_TYPE_STRING: this_activation_matrix = None for j in range(neuron_index_matrix.shape[0]): print(( 'Computing activations for neuron {0:s} in layer "{1:s}"...' ).format(str(neuron_index_matrix[j, :]), layer_name)) these_activations = ( model_activation.get_neuron_activation_for_examples( model_object=model_object, layer_name=layer_name, neuron_indices=neuron_index_matrix[j, :], list_of_input_matrices=this_list_of_input_matrices)) these_activations = numpy.reshape(these_activations, (len(these_activations), 1)) if this_activation_matrix is None: this_activation_matrix = these_activations + 0. else: this_activation_matrix = numpy.concatenate( (this_activation_matrix, these_activations), axis=1) else: this_activation_matrix = None for this_channel_index in channel_indices: print(('Computing activations for channel {0:d} in layer ' '"{1:s}"...').format(this_channel_index, layer_name)) these_activations = ( model_activation.get_channel_activation_for_examples( model_object=model_object, layer_name=layer_name, channel_index=this_channel_index, list_of_input_matrices=this_list_of_input_matrices, stat_function_for_neuron_activations=K.max)) these_activations = numpy.reshape(these_activations, (len(these_activations), 1)) if this_activation_matrix is None: this_activation_matrix = these_activations + 0. else: this_activation_matrix = numpy.concatenate( (this_activation_matrix, these_activations), axis=1) if activation_matrix is None: activation_matrix = this_activation_matrix + 0. else: activation_matrix = numpy.concatenate( (activation_matrix, this_activation_matrix), axis=0) print(SEPARATOR_STRING) print('Writing activations to file: "{0:s}"...'.format(output_file_name)) model_activation.write_file(pickle_file_name=output_file_name, activation_matrix=activation_matrix, full_id_strings=full_id_strings, storm_times_unix_sec=storm_times_unix_sec, model_file_name=model_file_name, component_type_string=component_type_string, target_class=target_class, layer_name=layer_name, neuron_index_matrix=neuron_index_matrix, channel_indices=channel_indices)
def _run(top_linkage_dir_name, spc_date_string, min_lead_times_sec, max_lead_times_sec, min_link_distances_metres, max_link_distances_metres, event_type_string, wind_speed_percentile_level, wind_speed_cutoffs_kt, top_output_dir_name): """Computes target value for ea storm object, lead-time window, and buffer. This is effectively the main method. :param top_linkage_dir_name: See documentation at top of file. :param spc_date_string: Same. :param min_lead_times_sec: Same. :param max_lead_times_sec: Same. :param min_link_distances_metres: Same. :param max_link_distances_metres: Same. :param event_type_string: Same. :param wind_speed_percentile_level: Same. :param wind_speed_cutoffs_kt: Same. :param top_output_dir_name: Same. """ num_lead_time_windows = len(min_lead_times_sec) error_checking.assert_is_numpy_array( max_lead_times_sec, exact_dimensions=numpy.array([num_lead_time_windows]) ) num_distance_buffers = len(min_link_distances_metres) error_checking.assert_is_numpy_array( max_link_distances_metres, exact_dimensions=numpy.array([num_distance_buffers]) ) linkage_file_name = linkage.find_linkage_file( top_directory_name=top_linkage_dir_name, event_type_string=event_type_string, spc_date_string=spc_date_string) print 'Reading data from: "{0:s}"...'.format(linkage_file_name) storm_to_events_table = linkage.read_linkage_file(linkage_file_name) if event_type_string == linkage.WIND_EVENT_STRING: list_of_cutoff_arrays_kt = general_utils.split_array_by_nan( wind_speed_cutoffs_kt) num_cutoff_sets = len(wind_speed_cutoffs_kt) else: list_of_cutoff_arrays_kt = None num_cutoff_sets = 1 target_names = [] for i in range(num_lead_time_windows): for j in range(num_distance_buffers): for k in range(num_cutoff_sets): if event_type_string == linkage.WIND_EVENT_STRING: this_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[j], max_link_distance_metres=max_link_distances_metres[j], wind_speed_percentile_level=wind_speed_percentile_level, wind_speed_cutoffs_kt=list_of_cutoff_arrays_kt[k]) target_names.append(this_target_name) print 'Computing values for "{0:s}"...'.format( target_names[-1]) storm_to_events_table = ( target_val_utils.create_wind_classification_targets( storm_to_winds_table=storm_to_events_table, min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[ j], max_link_distance_metres=max_link_distances_metres[ j], percentile_level=wind_speed_percentile_level, class_cutoffs_kt=list_of_cutoff_arrays_kt[k]) ) else: this_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[j], max_link_distance_metres=max_link_distances_metres[j]) target_names.append(this_target_name) print 'Computing values for "{0:s}"...'.format( target_names[-1]) storm_to_events_table = ( target_val_utils.create_tornado_targets( storm_to_tornadoes_table=storm_to_events_table, min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[ j], max_link_distance_metres=max_link_distances_metres[ j] ) ) target_file_name = target_val_utils.find_target_file( top_directory_name=top_output_dir_name, event_type_string=event_type_string, spc_date_string=spc_date_string, raise_error_if_missing=False) print 'Writing target values to: "{0:s}"...'.format(target_file_name) target_val_utils.write_target_values( storm_to_events_table=storm_to_events_table, target_names=target_names, netcdf_file_name=target_file_name)
def _compute_targets_one_day(storm_to_events_table, spc_date_string, min_lead_times_sec, max_lead_times_sec, min_link_distances_metres, max_link_distances_metres, event_type_string, wind_speed_percentile_level, wind_speed_cutoffs_kt, top_output_dir_name): """Computes target values for one SPC date. :param storm_to_events_table: pandas DataFrame returned by `linkage.read_linkage_file`. :param spc_date_string: SPC date (format "yyyymmdd"). :param min_lead_times_sec: See documentation at top of file. :param max_lead_times_sec: Same. :param min_link_distances_metres: Same. :param max_link_distances_metres: Same. :param event_type_string: Same. :param wind_speed_percentile_level: Same. :param wind_speed_cutoffs_kt: Same. :param top_output_dir_name: Same. """ num_lead_time_windows = len(min_lead_times_sec) num_distance_buffers = len(min_link_distances_metres) if event_type_string == linkage.WIND_EVENT_STRING: list_of_cutoff_arrays_kt = general_utils.split_array_by_nan( wind_speed_cutoffs_kt) num_cutoff_sets = len(wind_speed_cutoffs_kt) else: list_of_cutoff_arrays_kt = None num_cutoff_sets = 1 target_names = [] for i in range(num_lead_time_windows): for j in range(num_distance_buffers): for k in range(num_cutoff_sets): if event_type_string == linkage.WIND_EVENT_STRING: this_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[j], max_link_distance_metres=max_link_distances_metres[j], wind_speed_percentile_level=wind_speed_percentile_level, wind_speed_cutoffs_kt=list_of_cutoff_arrays_kt[k]) target_names.append(this_target_name) print(('Computing labels for "{0:s}" on SPC date {1:s}...' ).format(this_target_name, spc_date_string)) storm_to_events_table = ( target_val_utils.create_wind_classification_targets( storm_to_winds_table=storm_to_events_table, min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[ j], max_link_distance_metres=max_link_distances_metres[ j], percentile_level=wind_speed_percentile_level, class_cutoffs_kt=list_of_cutoff_arrays_kt[k])) else: genesis_only = (event_type_string == linkage.TORNADOGENESIS_EVENT_STRING) this_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[j], max_link_distance_metres=max_link_distances_metres[j], genesis_only=genesis_only) target_names.append(this_target_name) print(('Computing labels for "{0:s}" on SPC date {1:s}...' ).format(this_target_name, spc_date_string)) storm_to_events_table = ( target_val_utils.create_tornado_targets( storm_to_tornadoes_table=storm_to_events_table, min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[ j], max_link_distance_metres=max_link_distances_metres[ j], genesis_only=genesis_only)) target_file_name = target_val_utils.find_target_file( top_directory_name=top_output_dir_name, event_type_string=event_type_string, spc_date_string=spc_date_string, raise_error_if_missing=False) print('Writing target values to: "{0:s}"...'.format(target_file_name)) target_val_utils.write_target_values( storm_to_events_table=storm_to_events_table, target_names=target_names, netcdf_file_name=target_file_name)