def _find_input_files( top_input_dir_name, first_spc_date_string, last_spc_date_string): """Finds input files (containing unshuffled examples). :param top_input_dir_name: See documentation at top of file. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :return: input_example_file_names: 1-D list of paths to input files. :return: num_input_examples: Total number of examples in these files. """ input_example_file_names = input_examples.find_many_example_files( top_directory_name=top_input_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) num_input_examples = 0 for this_file_name in input_example_file_names: print 'Reading data from: "{0:s}"...'.format(this_file_name) this_example_dict = input_examples.read_example_file( netcdf_file_name=this_file_name, metadata_only=True) num_input_examples += len( this_example_dict[input_examples.STORM_IDS_KEY]) return input_example_file_names, num_input_examples
def _run(input_cnn_file_name, input_upconvnet_file_name, cnn_feature_layer_name, top_training_dir_name, first_training_time_string, last_training_time_string, top_validation_dir_name, first_validation_time_string, last_validation_time_string, num_examples_per_batch, num_epochs, num_training_batches_per_epoch, num_validation_batches_per_epoch, output_dir_name): """Trains upconvnet. This is effectively the main method. :param input_cnn_file_name: See documentation at top of file. :param input_upconvnet_file_name: Same. :param cnn_feature_layer_name: Same. :param top_training_dir_name: Same. :param first_training_time_string: Same. :param last_training_time_string: Same. :param top_validation_dir_name: Same. :param first_validation_time_string: Same. :param last_validation_time_string: Same. :param num_examples_per_batch: Same. :param num_epochs: Same. :param num_training_batches_per_epoch: Same. :param num_validation_batches_per_epoch: Same. :param output_dir_name: Same. """ file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) # argument_file_name = '{0:s}/input_args.p'.format(output_dir_name) # print('Writing input args to: "{0:s}"...'.format(argument_file_name)) # # argument_file_handle = open(argument_file_name, 'wb') # pickle.dump(INPUT_ARG_OBJECT.__dict__, argument_file_handle) # argument_file_handle.close() # # return # Process input args. first_training_time_unix_sec = time_conversion.string_to_unix_sec( first_training_time_string, TIME_FORMAT) last_training_time_unix_sec = time_conversion.string_to_unix_sec( last_training_time_string, TIME_FORMAT) first_validation_time_unix_sec = time_conversion.string_to_unix_sec( first_validation_time_string, TIME_FORMAT) last_validation_time_unix_sec = time_conversion.string_to_unix_sec( last_validation_time_string, TIME_FORMAT) # Find training and validation files. training_file_names = input_examples.find_many_example_files( top_directory_name=top_training_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) validation_file_names = input_examples.find_many_example_files( top_directory_name=top_validation_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) # Read trained CNN. print('Reading trained CNN from: "{0:s}"...'.format(input_cnn_file_name)) cnn_model_object = cnn.read_model(input_cnn_file_name) cnn_model_object.summary() print(SEPARATOR_STRING) cnn_metafile_name = cnn.find_metafile(model_file_name=input_cnn_file_name, raise_error_if_missing=True) print('Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name)) cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name) # Read architecture. print('Reading upconvnet architecture from: "{0:s}"...'.format( input_upconvnet_file_name)) upconvnet_model_object = cnn.read_model(input_upconvnet_file_name) # upconvnet_model_object = keras.models.clone_model(upconvnet_model_object) # TODO(thunderhoser): This is a HACK. upconvnet_model_object.compile(loss=keras.losses.mean_squared_error, optimizer=keras.optimizers.Adam()) upconvnet_model_object.summary() print(SEPARATOR_STRING) upconvnet_metafile_name = cnn.find_metafile( model_file_name='{0:s}/foo.h5'.format(output_dir_name), raise_error_if_missing=False) print('Writing upconvnet metadata to: "{0:s}"...'.format( upconvnet_metafile_name)) upconvnet.write_model_metadata( cnn_file_name=input_cnn_file_name, cnn_feature_layer_name=cnn_feature_layer_name, num_epochs=num_epochs, num_examples_per_batch=num_examples_per_batch, num_training_batches_per_epoch=num_training_batches_per_epoch, training_example_file_names=training_file_names, first_training_time_unix_sec=first_training_time_unix_sec, last_training_time_unix_sec=last_training_time_unix_sec, num_validation_batches_per_epoch=num_validation_batches_per_epoch, validation_example_file_names=validation_file_names, first_validation_time_unix_sec=first_validation_time_unix_sec, last_validation_time_unix_sec=last_validation_time_unix_sec, pickle_file_name=upconvnet_metafile_name) print(SEPARATOR_STRING) upconvnet.train_upconvnet( upconvnet_model_object=upconvnet_model_object, output_dir_name=output_dir_name, cnn_model_object=cnn_model_object, cnn_metadata_dict=cnn_metadata_dict, cnn_feature_layer_name=cnn_feature_layer_name, num_epochs=num_epochs, num_examples_per_batch=num_examples_per_batch, num_training_batches_per_epoch=num_training_batches_per_epoch, training_example_file_names=training_file_names, first_training_time_unix_sec=first_training_time_unix_sec, last_training_time_unix_sec=last_training_time_unix_sec, num_validation_batches_per_epoch=num_validation_batches_per_epoch, validation_example_file_names=validation_file_names, first_validation_time_unix_sec=first_validation_time_unix_sec, last_validation_time_unix_sec=last_validation_time_unix_sec)
def _run(model_file_name, top_example_dir_name, first_spc_date_string, last_spc_date_string, num_examples, do_backwards_test, separate_radar_heights, downsampling_keys, downsampling_values, num_bootstrap_reps, output_file_name): """Runs permutation test for predictor importance. This is effectively the main method. :param model_file_name: See documentation at top of file. :param top_example_dir_name: Same. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param num_examples: Same. :param do_backwards_test: Same. :param separate_radar_heights: Same. :param downsampling_keys: Same. :param downsampling_values: Same. :param num_bootstrap_reps: Same. :param output_file_name: Same. """ print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) metafile_name = cnn.find_metafile(model_file_name=model_file_name) print('Reading metadata from: "{0:s}"...'.format(metafile_name)) cnn_metadata_dict = cnn.read_model_metadata(metafile_name) if len(downsampling_keys) > 1: downsampling_dict = dict( list(zip(downsampling_keys, downsampling_values))) else: downsampling_dict = None training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] training_option_dict[ trainval_io.SAMPLING_FRACTIONS_KEY] = downsampling_dict example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = ( time_conversion.get_start_of_spc_date(first_spc_date_string)) training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = ( time_conversion.get_end_of_spc_date(last_spc_date_string)) training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = ( NUM_EXAMPLES_PER_BATCH) if cnn_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, desired_num_examples=num_examples, list_of_operation_dicts=cnn_metadata_dict[ cnn.LAYER_OPERATIONS_KEY]) elif cnn_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, desired_num_examples=num_examples) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, desired_num_examples=num_examples) full_storm_id_strings = [] storm_times_unix_sec = numpy.array([], dtype=int) target_values = numpy.array([], dtype=int) predictor_matrices = None print(SEPARATOR_STRING) for _ in range(len(example_file_names)): try: this_storm_object_dict = next(generator_object) print(SEPARATOR_STRING) except StopIteration: break full_storm_id_strings += this_storm_object_dict[ testing_io.FULL_IDS_KEY] storm_times_unix_sec = numpy.concatenate( (storm_times_unix_sec, this_storm_object_dict[testing_io.STORM_TIMES_KEY])) these_target_values = this_storm_object_dict[ testing_io.TARGET_ARRAY_KEY] if len(these_target_values.shape) > 1: these_target_values = numpy.argmax(these_target_values, axis=1) target_values = numpy.concatenate((target_values, these_target_values)) these_predictor_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] if predictor_matrices is None: predictor_matrices = copy.deepcopy(these_predictor_matrices) else: for k in range(len(predictor_matrices)): predictor_matrices[k] = numpy.concatenate( (predictor_matrices[k], these_predictor_matrices[k])) print(SEPARATOR_STRING) correlation_matrix, predictor_names = correlation.get_pearson_correlations( predictor_matrices=predictor_matrices, cnn_metadata_dict=cnn_metadata_dict, separate_radar_heights=separate_radar_heights) print(SEPARATOR_STRING) num_predictors = len(predictor_names) for i in range(num_predictors): for j in range(i, num_predictors): print(('Pearson correlation between "{0:s}" and "{1:s}" = {2:.3f}' ).format(predictor_names[i], predictor_names[j], correlation_matrix[i, j])) print(SEPARATOR_STRING) if do_backwards_test: result_dict = permutation.run_backwards_test( model_object=model_object, predictor_matrices=predictor_matrices, target_values=target_values, cnn_metadata_dict=cnn_metadata_dict, cost_function=permutation_utils.negative_auc_function, separate_radar_heights=separate_radar_heights, num_bootstrap_reps=num_bootstrap_reps) else: result_dict = permutation.run_forward_test( model_object=model_object, predictor_matrices=predictor_matrices, target_values=target_values, cnn_metadata_dict=cnn_metadata_dict, cost_function=permutation_utils.negative_auc_function, separate_radar_heights=separate_radar_heights, num_bootstrap_reps=num_bootstrap_reps) print(SEPARATOR_STRING) result_dict[permutation_utils.MODEL_FILE_KEY] = model_file_name result_dict[permutation_utils.TARGET_VALUES_KEY] = target_values result_dict[permutation_utils.FULL_IDS_KEY] = full_storm_id_strings result_dict[permutation_utils.STORM_TIMES_KEY] = storm_times_unix_sec print('Writing results to: "{0:s}"...'.format(output_file_name)) permutation_utils.write_results(result_dict=result_dict, pickle_file_name=output_file_name)
def _run(input_model_file_name, sounding_field_names, normalization_type_string, normalization_param_file_name, min_normalized_value, max_normalized_value, target_name, downsampling_classes, downsampling_fractions, monitor_string, weight_loss_function, x_translations_pixels, y_translations_pixels, ccw_rotation_angles_deg, noise_standard_deviation, num_noisings, flip_in_x, flip_in_y, top_training_dir_name, first_training_time_string, last_training_time_string, num_examples_per_train_batch, top_validation_dir_name, first_validation_time_string, last_validation_time_string, num_examples_per_validn_batch, num_epochs, num_training_batches_per_epoch, num_validation_batches_per_epoch, output_dir_name): """Trains CNN with 2-D and 3-D MYRORSS images. This is effectively the main method. :param input_model_file_name: See documentation at top of file. :param sounding_field_names: Same. :param normalization_type_string: Same. :param normalization_param_file_name: Same. :param min_normalized_value: Same. :param max_normalized_value: Same. :param target_name: Same. :param downsampling_classes: Same. :param downsampling_fractions: Same. :param monitor_string: Same. :param weight_loss_function: Same. :param x_translations_pixels: Same. :param y_translations_pixels: Same. :param ccw_rotation_angles_deg: Same. :param noise_standard_deviation: Same. :param num_noisings: Same. :param flip_in_x: Same. :param flip_in_y: Same. :param top_training_dir_name: Same. :param first_training_time_string: Same. :param last_training_time_string: Same. :param num_examples_per_train_batch: Same. :param top_validation_dir_name: Same. :param first_validation_time_string: Same. :param last_validation_time_string: Same. :param num_examples_per_validn_batch: Same. :param num_epochs: Same. :param num_training_batches_per_epoch: Same. :param num_validation_batches_per_epoch: Same. :param output_dir_name: Same. """ file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) # argument_file_name = '{0:s}/input_args.p'.format(output_dir_name) # print('Writing input args to: "{0:s}"...'.format(argument_file_name)) # # argument_file_handle = open(argument_file_name, 'wb') # pickle.dump(INPUT_ARG_OBJECT.__dict__, argument_file_handle) # argument_file_handle.close() # # return # Process input args. first_training_time_unix_sec = time_conversion.string_to_unix_sec( first_training_time_string, TIME_FORMAT) last_training_time_unix_sec = time_conversion.string_to_unix_sec( last_training_time_string, TIME_FORMAT) first_validation_time_unix_sec = time_conversion.string_to_unix_sec( first_validation_time_string, TIME_FORMAT) last_validation_time_unix_sec = time_conversion.string_to_unix_sec( last_validation_time_string, TIME_FORMAT) if sounding_field_names[0] in ['', 'None']: sounding_field_names = None if len(downsampling_classes) > 1: downsampling_dict = dict( list(zip(downsampling_classes, downsampling_fractions))) else: downsampling_dict = None if (len(x_translations_pixels) == 1 and x_translations_pixels + y_translations_pixels == 0): x_translations_pixels = None y_translations_pixels = None if len(ccw_rotation_angles_deg) == 1 and ccw_rotation_angles_deg[0] == 0: ccw_rotation_angles_deg = None if num_noisings <= 0: num_noisings = 0 noise_standard_deviation = None # Set output locations. output_model_file_name = '{0:s}/model.h5'.format(output_dir_name) history_file_name = '{0:s}/model_history.csv'.format(output_dir_name) tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name) model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name) # Find training and validation files. training_file_names = input_examples.find_many_example_files( top_directory_name=top_training_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) validation_file_names = input_examples.find_many_example_files( top_directory_name=top_validation_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) # Read architecture. print( 'Reading architecture from: "{0:s}"...'.format(input_model_file_name)) model_object = cnn.read_model(input_model_file_name) # model_object = keras.models.clone_model(model_object) # TODO(thunderhoser): This is a HACK. model_object.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.Adam(), metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST) print(SEPARATOR_STRING) model_object.summary() print(SEPARATOR_STRING) # Write metadata. metadata_dict = { cnn.NUM_EPOCHS_KEY: num_epochs, cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch, cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch, cnn.MONITOR_STRING_KEY: monitor_string, cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function, cnn.CONV_2D3D_KEY: True, cnn.VALIDATION_FILES_KEY: validation_file_names, cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec, cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec, cnn.NUM_EX_PER_VALIDN_BATCH_KEY: num_examples_per_validn_batch } if isinstance(model_object.input, list): list_of_input_tensors = model_object.input else: list_of_input_tensors = [model_object.input] upsample_refl = len(list_of_input_tensors) == 2 num_grid_rows = list_of_input_tensors[0].get_shape().as_list()[1] num_grid_columns = list_of_input_tensors[0].get_shape().as_list()[2] if upsample_refl: num_grid_rows = int(numpy.round(num_grid_rows / 2)) num_grid_columns = int(numpy.round(num_grid_columns / 2)) training_option_dict = { trainval_io.EXAMPLE_FILES_KEY: training_file_names, trainval_io.TARGET_NAME_KEY: target_name, trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec, trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec, trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_train_batch, trainval_io.RADAR_FIELDS_KEY: input_examples.AZIMUTHAL_SHEAR_FIELD_NAMES, trainval_io.RADAR_HEIGHTS_KEY: REFLECTIVITY_HEIGHTS_M_AGL, trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names, trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL, trainval_io.NUM_ROWS_KEY: num_grid_rows, trainval_io.NUM_COLUMNS_KEY: num_grid_columns, trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string, trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name, trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value, trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value, trainval_io.BINARIZE_TARGET_KEY: False, trainval_io.SAMPLING_FRACTIONS_KEY: downsampling_dict, trainval_io.LOOP_ONCE_KEY: False, trainval_io.X_TRANSLATIONS_KEY: x_translations_pixels, trainval_io.Y_TRANSLATIONS_KEY: y_translations_pixels, trainval_io.ROTATION_ANGLES_KEY: ccw_rotation_angles_deg, trainval_io.NOISE_STDEV_KEY: noise_standard_deviation, trainval_io.NUM_NOISINGS_KEY: num_noisings, trainval_io.FLIP_X_KEY: flip_in_x, trainval_io.FLIP_Y_KEY: flip_in_y, trainval_io.UPSAMPLE_REFLECTIVITY_KEY: upsample_refl } print('Writing metadata to: "{0:s}"...'.format(model_metafile_name)) cnn.write_model_metadata(pickle_file_name=model_metafile_name, metadata_dict=metadata_dict, training_option_dict=training_option_dict) cnn.train_cnn_2d3d_myrorss( model_object=model_object, model_file_name=output_model_file_name, history_file_name=history_file_name, tensorboard_dir_name=tensorboard_dir_name, num_epochs=num_epochs, num_training_batches_per_epoch=num_training_batches_per_epoch, training_option_dict=training_option_dict, monitor_string=monitor_string, weight_loss_function=weight_loss_function, num_validation_batches_per_epoch=num_validation_batches_per_epoch, validation_file_names=validation_file_names, first_validn_time_unix_sec=first_validation_time_unix_sec, last_validn_time_unix_sec=last_validation_time_unix_sec, num_examples_per_validn_batch=num_examples_per_validn_batch)
def _run(model_file_name, top_example_dir_name, first_spc_date_string, last_spc_date_string, num_examples, num_bootstrap_reps, confidence_level, class_fraction_keys, class_fraction_values, output_dir_name): """Evaluates CNN (convolutional neural net) predictions. This is effectively the main method. :param model_file_name: See documentation at top of file. :param top_example_dir_name: Same. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param num_examples: Same. :param num_bootstrap_reps: Same. :param confidence_level: Same. :param class_fraction_keys: Same. :param class_fraction_values: Same. :param output_dir_name: Same. :raises: ValueError: if the model does multi-class classification. """ print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) num_output_neurons = ( model_object.layers[-1].output.get_shape().as_list()[-1]) if num_output_neurons > 2: error_string = ( 'The model has {0:d} output neurons, which suggests {0:d}-class ' 'classification. This script handles only binary classification.' ).format(num_output_neurons) raise ValueError(error_string) soundings_only = False if isinstance(model_object.input, list): list_of_input_tensors = model_object.input else: list_of_input_tensors = [model_object.input] if len(list_of_input_tensors) == 1: these_spatial_dim = numpy.array( list_of_input_tensors[0].get_shape().as_list()[1:-1], dtype=int) soundings_only = len(these_spatial_dim) == 1 model_directory_name, _ = os.path.split(model_file_name) model_metafile_name = '{0:s}/model_metadata.p'.format(model_directory_name) print('Reading metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = cnn.read_model_metadata(model_metafile_name) training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] if len(class_fraction_keys) > 1: class_to_sampling_fraction_dict = dict( list(zip(class_fraction_keys, class_fraction_values))) else: class_to_sampling_fraction_dict = None training_option_dict[ trainval_io.SAMPLING_FRACTIONS_KEY] = class_to_sampling_fraction_dict example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = ( time_conversion.get_start_of_spc_date(first_spc_date_string)) training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = ( time_conversion.get_end_of_spc_date(last_spc_date_string)) if soundings_only: generator_object = testing_io.sounding_generator( option_dict=training_option_dict, num_examples_total=num_examples) elif model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, list_of_operation_dicts=model_metadata_dict[ cnn.LAYER_OPERATIONS_KEY], num_examples_total=num_examples) elif model_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, num_examples_total=num_examples) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, num_examples_total=num_examples) include_soundings = (training_option_dict[trainval_io.SOUNDING_FIELDS_KEY] is not None) forecast_probabilities = numpy.array([]) observed_labels = numpy.array([], dtype=int) for _ in range(len(example_file_names)): try: this_storm_object_dict = next(generator_object) print(SEPARATOR_STRING) except StopIteration: break observed_labels = numpy.concatenate( (observed_labels, this_storm_object_dict[testing_io.TARGET_ARRAY_KEY])) if soundings_only: these_predictor_matrices = [ this_storm_object_dict[testing_io.SOUNDING_MATRIX_KEY] ] else: these_predictor_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] if include_soundings: this_sounding_matrix = these_predictor_matrices[-1] else: this_sounding_matrix = None if soundings_only: this_probability_matrix = cnn.apply_cnn_soundings_only( model_object=model_object, sounding_matrix=this_sounding_matrix, verbose=True) elif model_metadata_dict[cnn.CONV_2D3D_KEY]: if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]: this_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=these_predictor_matrices[0], sounding_matrix=this_sounding_matrix, verbose=True) else: this_probability_matrix = cnn.apply_2d3d_cnn( model_object=model_object, reflectivity_matrix_dbz=these_predictor_matrices[0], azimuthal_shear_matrix_s01=these_predictor_matrices[1], sounding_matrix=this_sounding_matrix, verbose=True) else: this_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=these_predictor_matrices[0], sounding_matrix=this_sounding_matrix, verbose=True) print(SEPARATOR_STRING) forecast_probabilities = numpy.concatenate( (forecast_probabilities, this_probability_matrix[:, -1])) model_eval_helper.run_evaluation( forecast_probabilities=forecast_probabilities, observed_labels=observed_labels, num_bootstrap_reps=num_bootstrap_reps, confidence_level=confidence_level, output_dir_name=output_dir_name)
def _run(top_example_dir_name, first_spc_date_string, last_spc_date_string, use_low_level, use_mid_level, num_radar_rows, num_radar_columns, output_dir_name): """Uses az-shear thresholds to make probabilistic tornado predictions. This is effectively the main method. :param top_example_dir_name: See documentation at top of file. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param use_low_level: Same. :param use_mid_level: Same. :param num_radar_rows: Same. :param num_radar_columns: Same. :param output_dir_name: Same. :raises: ValueError: if `use_low_level == use_mid_level == False`. """ if num_radar_rows <= 0 or num_radar_columns <= 0: num_reflectivity_rows = None num_reflectivity_columns = None else: num_reflectivity_rows = int(numpy.round(num_radar_rows / 2)) num_reflectivity_columns = int(numpy.round(num_radar_columns / 2)) if not (use_low_level or use_mid_level): error_string = ( 'At least one of `{0:s}` and `{1:s}` must be true.' ).format(LOW_LEVEL_ARG_NAME, MID_LEVEL_ARG_NAME) raise ValueError(error_string) radar_field_names = [] if use_low_level: radar_field_names.append(radar_utils.LOW_LEVEL_SHEAR_NAME) if use_mid_level: radar_field_names.append(radar_utils.MID_LEVEL_SHEAR_NAME) example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) option_dict = { trainval_io.EXAMPLE_FILES_KEY: example_file_names, trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: NUM_EXAMPLES_PER_BATCH, trainval_io.FIRST_STORM_TIME_KEY: time_conversion.get_start_of_spc_date(first_spc_date_string), trainval_io.LAST_STORM_TIME_KEY: time_conversion.get_end_of_spc_date(last_spc_date_string), trainval_io.RADAR_FIELDS_KEY: radar_field_names, trainval_io.RADAR_HEIGHTS_KEY: numpy.array([1000], dtype=int), trainval_io.NUM_ROWS_KEY: num_reflectivity_rows, trainval_io.NUM_COLUMNS_KEY: num_reflectivity_columns, trainval_io.UPSAMPLE_REFLECTIVITY_KEY: False, trainval_io.SOUNDING_FIELDS_KEY: None, trainval_io.SOUNDING_HEIGHTS_KEY: None, trainval_io.NORMALIZATION_TYPE_KEY: None, trainval_io.TARGET_NAME_KEY: TARGET_NAME, trainval_io.BINARIZE_TARGET_KEY: False, trainval_io.SAMPLING_FRACTIONS_KEY: None } generator_object = testing_io.myrorss_generator_2d3d( option_dict=option_dict, desired_num_examples=LARGE_INTEGER) full_storm_id_strings = [] storm_times_unix_sec = numpy.array([], dtype=int) predictor_values = numpy.array([], dtype=float) observed_labels = numpy.array([], dtype=int) while True: try: this_storm_object_dict = next(generator_object) print(SEPARATOR_STRING) except StopIteration: break full_storm_id_strings += this_storm_object_dict[testing_io.FULL_IDS_KEY] storm_times_unix_sec = numpy.concatenate(( storm_times_unix_sec, this_storm_object_dict[testing_io.STORM_TIMES_KEY] )) this_shear_matrix_s01 = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY][1] print(this_shear_matrix_s01.shape) these_predictor_values = numpy.max( this_shear_matrix_s01, axis=(1, 2, 3) ) predictor_values = numpy.concatenate(( predictor_values, these_predictor_values )) observed_labels = numpy.concatenate(( observed_labels, this_storm_object_dict[testing_io.TARGET_ARRAY_KEY] )) forecast_probabilities = ( rankdata(predictor_values, method='average') / len(predictor_values) ) forecast_probabilities = numpy.reshape( forecast_probabilities, (len(forecast_probabilities), 1) ) class_probability_matrix = numpy.hstack(( 1. - forecast_probabilities, forecast_probabilities )) output_file_name = prediction_io.find_ungridded_file( directory_name=output_dir_name, raise_error_if_missing=False) print('Writing results to: "{0:s}"...'.format(output_file_name)) prediction_io.write_ungridded_predictions( netcdf_file_name=output_file_name, class_probability_matrix=class_probability_matrix, observed_labels=observed_labels, storm_ids=full_storm_id_strings, storm_times_unix_sec=storm_times_unix_sec, target_name=TARGET_NAME, model_file_name='None')
def _run(input_cnn_file_name, input_upconvnet_file_name, cnn_feature_layer_name, top_training_dir_name, first_training_time_string, last_training_time_string, top_validation_dir_name, first_validation_time_string, last_validation_time_string, num_examples_per_batch, num_epochs, num_training_batches_per_epoch, num_validation_batches_per_epoch, output_model_file_name): """Trains upconvnet. This is effectively the main method. :param input_cnn_file_name: See documentation at top of file. :param input_upconvnet_file_name: Same. :param cnn_feature_layer_name: Same. :param top_training_dir_name: Same. :param first_training_time_string: Same. :param last_training_time_string: Same. :param top_validation_dir_name: Same. :param first_validation_time_string: Same. :param last_validation_time_string: Same. :param num_examples_per_batch: Same. :param num_epochs: Same. :param num_training_batches_per_epoch: Same. :param num_validation_batches_per_epoch: Same. :param output_model_file_name: Same. """ # Find training and validation files. first_training_time_unix_sec = time_conversion.string_to_unix_sec( first_training_time_string, TIME_FORMAT) last_training_time_unix_sec = time_conversion.string_to_unix_sec( last_training_time_string, TIME_FORMAT) first_validation_time_unix_sec = time_conversion.string_to_unix_sec( first_validation_time_string, TIME_FORMAT) last_validation_time_unix_sec = time_conversion.string_to_unix_sec( last_validation_time_string, TIME_FORMAT) training_file_names = input_examples.find_many_example_files( top_directory_name=top_training_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) validation_file_names = input_examples.find_many_example_files( top_directory_name=top_validation_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) print 'Reading trained CNN from: "{0:s}"...'.format(input_cnn_file_name) cnn_model_object = cnn.read_model(input_cnn_file_name) cnn_model_object.summary() print SEPARATOR_STRING cnn_metafile_name = '{0:s}/model_metadata.p'.format( os.path.split(input_cnn_file_name)[0]) print 'Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name) cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name) print 'Reading upconvnet architecture from: "{0:s}"...'.format( input_upconvnet_file_name) upconvnet_model_object = cnn.read_model(input_upconvnet_file_name) upconvnet_model_object = keras.models.clone_model(upconvnet_model_object) # TODO(thunderhoser): This is a HACK. upconvnet_model_object.compile(loss=keras.losses.mean_squared_error, optimizer=keras.optimizers.Adam()) print SEPARATOR_STRING upconvnet_model_object.summary() print SEPARATOR_STRING upconvnet_metafile_name = '{0:s}/model_metadata.p'.format( os.path.split(output_model_file_name)[0]) print 'Writing upconvnet metadata to: "{0:s}"...'.format( upconvnet_metafile_name) upconvnet.write_model_metadata( cnn_file_name=input_cnn_file_name, cnn_feature_layer_name=cnn_feature_layer_name, num_epochs=num_epochs, num_examples_per_batch=num_examples_per_batch, num_training_batches_per_epoch=num_training_batches_per_epoch, training_example_file_names=training_file_names, first_training_time_unix_sec=first_training_time_unix_sec, last_training_time_unix_sec=last_training_time_unix_sec, num_validation_batches_per_epoch=num_validation_batches_per_epoch, validation_example_file_names=validation_file_names, first_validation_time_unix_sec=first_validation_time_unix_sec, last_validation_time_unix_sec=last_validation_time_unix_sec, pickle_file_name=upconvnet_metafile_name) print SEPARATOR_STRING upconvnet.train_upconvnet( upconvnet_model_object=upconvnet_model_object, output_model_file_name=output_model_file_name, cnn_model_object=cnn_model_object, cnn_feature_layer_name=cnn_feature_layer_name, cnn_metadata_dict=cnn_metadata_dict, num_epochs=num_epochs, num_examples_per_batch=num_examples_per_batch, num_training_batches_per_epoch=num_training_batches_per_epoch, training_example_file_names=training_file_names, first_training_time_unix_sec=first_training_time_unix_sec, last_training_time_unix_sec=last_training_time_unix_sec, num_validation_batches_per_epoch=num_validation_batches_per_epoch, validation_example_file_names=validation_file_names, first_validation_time_unix_sec=first_validation_time_unix_sec, last_validation_time_unix_sec=last_validation_time_unix_sec)
def _run(upconvnet_file_name, top_example_dir_name, first_spc_date_string, last_spc_date_string, num_examples_per_date, downsampling_keys, downsampling_values, top_output_dir_name): """Makes predictions from trained upconvnet. This is effectively the main method. :param upconvnet_file_name: See documentation at top of file. :param top_example_dir_name: Same. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param num_examples_per_date: Same. :param downsampling_keys: Same. :param downsampling_values: Same. :param top_output_dir_name: Same. """ # Process input args. print('Reading upconvnet from: "{0:s}"...'.format(upconvnet_file_name)) upconvnet_model_object = cnn.read_model(upconvnet_file_name) upconvnet_metafile_name = cnn.find_metafile(upconvnet_file_name) print('Reading upconvnet metadata from: "{0:s}"...'.format( upconvnet_metafile_name)) upconvnet_metadata_dict = upconvnet.read_model_metadata( upconvnet_metafile_name) cnn_file_name = upconvnet_metadata_dict[upconvnet.CNN_FILE_KEY] print('Reading CNN from: "{0:s}"...'.format(cnn_file_name)) cnn_model_object = cnn.read_model(cnn_file_name) cnn_metafile_name = cnn.find_metafile(cnn_file_name) print('Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name)) cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name) training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] if len(downsampling_keys) > 1: downsampling_dict = dict( list(zip(downsampling_keys, downsampling_values))) else: downsampling_dict = None training_option_dict[ trainval_io.SAMPLING_FRACTIONS_KEY] = downsampling_dict training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = ( NUM_EXAMPLES_PER_BATCH) training_option_dict[ trainval_io.FIRST_STORM_TIME_KEY] = EARLY_TIME_UNIX_SEC training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = LATE_TIME_UNIX_SEC # Find example files. example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) # Do dirty work. for this_example_file_name in example_file_names: _apply_upconvnet_one_file( example_file_name=this_example_file_name, num_examples=num_examples_per_date, upconvnet_model_object=upconvnet_model_object, cnn_model_object=cnn_model_object, cnn_metadata_dict=cnn_metadata_dict, cnn_feature_layer_name=upconvnet_metadata_dict[ upconvnet.CNN_FEATURE_LAYER_KEY], upconvnet_file_name=upconvnet_file_name, top_output_dir_name=top_output_dir_name) print(SEPARATOR_STRING)
def _run(top_example_dir_name, first_spc_date_string, last_spc_date_string, min_percentile_level, max_percentile_level, num_radar_rows, num_radar_columns, output_file_name): """Finds normalization parameters for GridRad data. This is effectively the main method. :param top_example_dir_name: See documentation at top of file. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param min_percentile_level: Same. :param max_percentile_level: Same. :param num_radar_rows: Same. :param num_radar_columns: Same. :param output_file_name: Same. """ if num_radar_rows <= 0: num_radar_rows = None if num_radar_columns <= 0: num_radar_columns = None first_time_unix_sec = time_conversion.get_start_of_spc_date( first_spc_date_string) last_time_unix_sec = time_conversion.get_end_of_spc_date( last_spc_date_string) # example_file_names = input_examples.find_many_example_files( # top_directory_name=top_example_dir_name, shuffled=True, # first_batch_number=0, last_batch_number=LARGE_INTEGER, # raise_error_if_any_missing=False) example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) this_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_names[0], read_all_target_vars=True) sounding_field_names = this_example_dict[ input_examples.SOUNDING_FIELDS_KEY] sounding_heights_m_agl = this_example_dict[ input_examples.SOUNDING_HEIGHTS_KEY] if input_examples.REFL_IMAGE_MATRIX_KEY in this_example_dict: num_radar_dimensions = -1 else: num_radar_dimensions = (len( this_example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY].shape) - 2) # TODO(thunderhoser): Put this in separate method. if num_radar_dimensions == 3: radar_field_names = this_example_dict[input_examples.RADAR_FIELDS_KEY] radar_heights_m_agl = this_example_dict[ input_examples.RADAR_HEIGHTS_KEY] radar_field_name_by_pair = [] radar_height_by_pair_m_agl = numpy.array([], dtype=int) for this_field_name in radar_field_names: radar_field_name_by_pair += ([this_field_name] * len(radar_heights_m_agl)) radar_height_by_pair_m_agl = numpy.concatenate( (radar_height_by_pair_m_agl, radar_heights_m_agl)) elif num_radar_dimensions == 2: radar_field_name_by_pair = this_example_dict[ input_examples.RADAR_FIELDS_KEY] radar_height_by_pair_m_agl = this_example_dict[ input_examples.RADAR_HEIGHTS_KEY] radar_field_names = list(set(radar_field_name_by_pair)) radar_field_names.sort() else: az_shear_field_names = this_example_dict[ input_examples.RADAR_FIELDS_KEY] radar_field_names = [radar_utils.REFL_NAME] + az_shear_field_names refl_heights_m_agl = this_example_dict[ input_examples.RADAR_HEIGHTS_KEY] radar_field_name_by_pair = ( [radar_utils.REFL_NAME] * len(refl_heights_m_agl) + az_shear_field_names) az_shear_heights_m_agl = numpy.full(len(az_shear_field_names), radar_utils.SHEAR_HEIGHT_M_ASL) radar_height_by_pair_m_agl = numpy.concatenate( (refl_heights_m_agl, az_shear_heights_m_agl)).astype(int) # Initialize parameters. orig_parameter_dict = { NUM_VALUES_KEY: 0, MEAN_VALUE_KEY: 0., MEAN_OF_SQUARES_KEY: 0. } radar_z_score_dict_no_height = {} radar_z_score_dict_with_height = {} radar_freq_dict_no_height = {} num_radar_fields = len(radar_field_names) num_radar_field_height_pairs = len(radar_field_name_by_pair) for j in range(num_radar_fields): radar_z_score_dict_no_height[radar_field_names[j]] = copy.deepcopy( orig_parameter_dict) radar_freq_dict_no_height[radar_field_names[j]] = {} for k in range(num_radar_field_height_pairs): radar_z_score_dict_with_height[ radar_field_name_by_pair[k], radar_height_by_pair_m_agl[k]] = copy.deepcopy(orig_parameter_dict) sounding_z_score_dict_no_height = {} sounding_z_score_dict_with_height = {} sounding_freq_dict_no_height = {} num_sounding_fields = len(sounding_field_names) num_sounding_heights = len(sounding_heights_m_agl) for j in range(num_sounding_fields): sounding_z_score_dict_no_height[sounding_field_names[j]] = ( copy.deepcopy(orig_parameter_dict)) sounding_freq_dict_no_height[sounding_field_names[j]] = {} for k in range(num_sounding_heights): sounding_z_score_dict_with_height[ sounding_field_names[j], sounding_heights_m_agl[k]] = copy.deepcopy(orig_parameter_dict) for this_example_file_name in example_file_names: print('Reading data from: "{0:s}"...'.format(this_example_file_name)) this_example_dict = input_examples.read_example_file( netcdf_file_name=this_example_file_name, read_all_target_vars=True, num_rows_to_keep=num_radar_rows, num_columns_to_keep=num_radar_columns, first_time_to_keep_unix_sec=first_time_unix_sec, last_time_to_keep_unix_sec=last_time_unix_sec) this_num_examples = len(this_example_dict[input_examples.FULL_IDS_KEY]) if this_num_examples == 0: continue for j in range(num_radar_fields): print('Updating normalization params for "{0:s}"...'.format( radar_field_names[j])) if num_radar_dimensions == 3: this_field_index = this_example_dict[ input_examples.RADAR_FIELDS_KEY].index( radar_field_names[j]) this_radar_matrix = this_example_dict[ input_examples.RADAR_IMAGE_MATRIX_KEY][..., this_field_index] elif num_radar_dimensions == 2: all_field_names = numpy.array( this_example_dict[input_examples.RADAR_FIELDS_KEY]) these_field_indices = numpy.where( all_field_names == radar_field_names[j])[0] this_radar_matrix = this_example_dict[ input_examples.RADAR_IMAGE_MATRIX_KEY][..., these_field_indices] else: if radar_field_names[j] == radar_utils.REFL_NAME: this_radar_matrix = this_example_dict[ input_examples.REFL_IMAGE_MATRIX_KEY][..., 0] else: this_field_index = this_example_dict[ input_examples.RADAR_FIELDS_KEY].index( radar_field_names[j]) this_radar_matrix = this_example_dict[ input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY][ ..., this_field_index] radar_z_score_dict_no_height[radar_field_names[j]] = ( _update_z_score_params( z_score_param_dict=radar_z_score_dict_no_height[ radar_field_names[j]], new_data_matrix=this_radar_matrix)) radar_freq_dict_no_height[radar_field_names[j]] = ( _update_frequency_dict( frequency_dict=radar_freq_dict_no_height[ radar_field_names[j]], new_data_matrix=this_radar_matrix, rounding_base=RADAR_INTERVAL_DICT[radar_field_names[j]])) for k in range(num_radar_field_height_pairs): print(('Updating normalization params for "{0:s}" at {1:d} metres ' 'AGL...').format(radar_field_name_by_pair[k], radar_height_by_pair_m_agl[k])) if num_radar_dimensions == 3: this_field_index = this_example_dict[ input_examples.RADAR_FIELDS_KEY].index( radar_field_name_by_pair[k]) this_height_index = numpy.where( this_example_dict[input_examples.RADAR_HEIGHTS_KEY] == radar_height_by_pair_m_agl[k])[0][0] this_radar_matrix = this_example_dict[ input_examples.RADAR_IMAGE_MATRIX_KEY][..., this_height_index, this_field_index] elif num_radar_dimensions == 2: all_field_names = numpy.array( this_example_dict[input_examples.RADAR_FIELDS_KEY]) all_heights_m_agl = this_example_dict[ input_examples.RADAR_HEIGHTS_KEY] this_index = numpy.where( numpy.logical_and( all_field_names == radar_field_name_by_pair[k], all_heights_m_agl == radar_height_by_pair_m_agl[k]))[0][0] this_radar_matrix = this_example_dict[ input_examples.RADAR_IMAGE_MATRIX_KEY][..., this_index] else: if radar_field_name_by_pair[k] == radar_utils.REFL_NAME: this_height_index = numpy.where( this_example_dict[input_examples.RADAR_HEIGHTS_KEY] == radar_height_by_pair_m_agl[k])[0][0] this_radar_matrix = this_example_dict[ input_examples.REFL_IMAGE_MATRIX_KEY][ ..., this_height_index, 0] else: this_field_index = this_example_dict[ input_examples.RADAR_FIELDS_KEY].index( radar_field_name_by_pair[k]) this_radar_matrix = this_example_dict[ input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY][ ..., this_field_index] radar_z_score_dict_with_height[ radar_field_name_by_pair[k], radar_height_by_pair_m_agl[k]] = _update_z_score_params( z_score_param_dict=radar_z_score_dict_with_height[ radar_field_name_by_pair[k], radar_height_by_pair_m_agl[k]], new_data_matrix=this_radar_matrix) for j in range(num_sounding_fields): print('Updating normalization params for "{0:s}"...'.format( sounding_field_names[j])) this_field_index = this_example_dict[ input_examples.SOUNDING_FIELDS_KEY].index( sounding_field_names[j]) this_sounding_matrix = this_example_dict[ input_examples.SOUNDING_MATRIX_KEY][..., this_field_index] sounding_z_score_dict_no_height[sounding_field_names[j]] = ( _update_z_score_params( z_score_param_dict=sounding_z_score_dict_no_height[ sounding_field_names[j]], new_data_matrix=this_sounding_matrix)) sounding_freq_dict_no_height[sounding_field_names[j]] = ( _update_frequency_dict( frequency_dict=sounding_freq_dict_no_height[ sounding_field_names[j]], new_data_matrix=this_sounding_matrix, rounding_base=SOUNDING_INTERVAL_DICT[ sounding_field_names[j]])) for k in range(num_sounding_heights): this_height_index = numpy.where( this_example_dict[input_examples.SOUNDING_HEIGHTS_KEY] == sounding_heights_m_agl[k])[0][0] this_sounding_matrix = this_example_dict[ input_examples.SOUNDING_MATRIX_KEY][..., this_height_index, this_field_index] print(('Updating normalization params for "{0:s}" at {1:d} m ' 'AGL...').format(sounding_field_names[j], sounding_heights_m_agl[k])) sounding_z_score_dict_with_height[ sounding_field_names[j], sounding_heights_m_agl[k]] = _update_z_score_params( z_score_param_dict=sounding_z_score_dict_with_height[ sounding_field_names[j], sounding_heights_m_agl[k]], new_data_matrix=this_sounding_matrix) print(SEPARATOR_STRING) # Convert dictionaries to pandas DataFrames. radar_table_no_height = _convert_normalization_params( z_score_dict_dict=radar_z_score_dict_no_height, frequency_dict_dict=radar_freq_dict_no_height, min_percentile_level=min_percentile_level, max_percentile_level=max_percentile_level) print('Normalization params for each radar field:\n{0:s}\n\n'.format( str(radar_table_no_height))) radar_table_with_height = _convert_normalization_params( z_score_dict_dict=radar_z_score_dict_with_height) print(('Normalization params for each radar field/height pair:\n{0:s}\n\n' ).format(str(radar_table_with_height))) sounding_table_no_height = _convert_normalization_params( z_score_dict_dict=sounding_z_score_dict_no_height, frequency_dict_dict=sounding_freq_dict_no_height, min_percentile_level=min_percentile_level, max_percentile_level=max_percentile_level) print('Normalization params for each sounding field:\n{0:s}\n\n'.format( str(sounding_table_no_height))) sounding_table_with_height = _convert_normalization_params( z_score_dict_dict=sounding_z_score_dict_with_height) print( ('Normalization params for each sounding field/height pair:\n{0:s}\n\n' ).format(str(sounding_table_with_height))) print('Writing normalization params to file: "{0:s}"...'.format( output_file_name)) dl_utils.write_normalization_params( pickle_file_name=output_file_name, radar_table_no_height=radar_table_no_height, radar_table_with_height=radar_table_with_height, sounding_table_no_height=sounding_table_no_height, sounding_table_with_height=sounding_table_with_height)
def _run(model_file_name, component_type_string, target_class, layer_name, neuron_indices_flattened, channel_indices, top_example_dir_name, first_spc_date_string, last_spc_date_string, output_file_name): """Creates activation maps for one class, neuron, or channel of a CNN. This is effectively the main method. :param model_file_name: See documentation at top of file. :param component_type_string: Same. :param target_class: Same. :param layer_name: Same. :param neuron_indices_flattened: Same. :param channel_indices: Same. :param top_example_dir_name: Same. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param output_file_name: Same. """ # Check input args. file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) model_interpretation.check_component_type(component_type_string) if component_type_string == CHANNEL_COMPONENT_TYPE_STRING: error_checking.assert_is_geq_numpy_array(channel_indices, 0) if component_type_string == NEURON_COMPONENT_TYPE_STRING: neuron_indices_flattened = neuron_indices_flattened.astype(float) neuron_indices_flattened[neuron_indices_flattened < 0] = numpy.nan neuron_indices_2d_list = general_utils.split_array_by_nan( neuron_indices_flattened) neuron_index_matrix = numpy.array(neuron_indices_2d_list, dtype=int) else: neuron_index_matrix = None # Read model and metadata. print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) metadata_file_name = '{0:s}/model_metadata.p'.format( os.path.split(model_file_name)[0]) print('Reading metadata from: "{0:s}"...'.format(metadata_file_name)) model_metadata_dict = cnn.read_model_metadata(metadata_file_name) training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] # Create generator. example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) training_option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = None training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = ( time_conversion.get_start_of_spc_date(first_spc_date_string)) training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = ( time_conversion.get_end_of_spc_date(last_spc_date_string)) if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, list_of_operation_dicts=model_metadata_dict[ cnn.LAYER_OPERATIONS_KEY], num_examples_total=LARGE_INTEGER) elif model_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, num_examples_total=LARGE_INTEGER) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, num_examples_total=LARGE_INTEGER) # Compute activation for each example (storm object) and model component. full_id_strings = [] storm_times_unix_sec = numpy.array([], dtype=int) activation_matrix = None print(SEPARATOR_STRING) for _ in range(len(example_file_names)): try: this_storm_object_dict = next(generator_object) except StopIteration: break this_list_of_input_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] these_id_strings = this_storm_object_dict[testing_io.FULL_IDS_KEY] these_times_unix_sec = this_storm_object_dict[ testing_io.STORM_TIMES_KEY] full_id_strings += these_id_strings storm_times_unix_sec = numpy.concatenate( (storm_times_unix_sec, these_times_unix_sec)) if component_type_string == CLASS_COMPONENT_TYPE_STRING: print('Computing activations for target class {0:d}...'.format( target_class)) this_activation_matrix = ( model_activation.get_class_activation_for_examples( model_object=model_object, target_class=target_class, list_of_input_matrices=this_list_of_input_matrices)) this_activation_matrix = numpy.reshape( this_activation_matrix, (len(this_activation_matrix), 1)) elif component_type_string == NEURON_COMPONENT_TYPE_STRING: this_activation_matrix = None for j in range(neuron_index_matrix.shape[0]): print(( 'Computing activations for neuron {0:s} in layer "{1:s}"...' ).format(str(neuron_index_matrix[j, :]), layer_name)) these_activations = ( model_activation.get_neuron_activation_for_examples( model_object=model_object, layer_name=layer_name, neuron_indices=neuron_index_matrix[j, :], list_of_input_matrices=this_list_of_input_matrices)) these_activations = numpy.reshape(these_activations, (len(these_activations), 1)) if this_activation_matrix is None: this_activation_matrix = these_activations + 0. else: this_activation_matrix = numpy.concatenate( (this_activation_matrix, these_activations), axis=1) else: this_activation_matrix = None for this_channel_index in channel_indices: print(('Computing activations for channel {0:d} in layer ' '"{1:s}"...').format(this_channel_index, layer_name)) these_activations = ( model_activation.get_channel_activation_for_examples( model_object=model_object, layer_name=layer_name, channel_index=this_channel_index, list_of_input_matrices=this_list_of_input_matrices, stat_function_for_neuron_activations=K.max)) these_activations = numpy.reshape(these_activations, (len(these_activations), 1)) if this_activation_matrix is None: this_activation_matrix = these_activations + 0. else: this_activation_matrix = numpy.concatenate( (this_activation_matrix, these_activations), axis=1) if activation_matrix is None: activation_matrix = this_activation_matrix + 0. else: activation_matrix = numpy.concatenate( (activation_matrix, this_activation_matrix), axis=0) print(SEPARATOR_STRING) print('Writing activations to file: "{0:s}"...'.format(output_file_name)) model_activation.write_file(pickle_file_name=output_file_name, activation_matrix=activation_matrix, full_id_strings=full_id_strings, storm_times_unix_sec=storm_times_unix_sec, model_file_name=model_file_name, component_type_string=component_type_string, target_class=target_class, layer_name=layer_name, neuron_index_matrix=neuron_index_matrix, channel_indices=channel_indices)
def _run(model_file_name, top_example_dir_name, first_spc_date_string, last_spc_date_string, num_examples, class_fraction_keys, class_fraction_values, num_bootstrap_iters, bootstrap_confidence_level, output_file_name): """Runs permutation test for predictor importance. This is effectively the main method. :param model_file_name: See documentation at top of file. :param top_example_dir_name: Same. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param num_examples: Same. :param class_fraction_keys: Same. :param class_fraction_values: Same. :param num_bootstrap_iters: Same. :param bootstrap_confidence_level: Same. :param output_file_name: Same. """ print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) model_directory_name, _ = os.path.split(model_file_name) metadata_file_name = '{0:s}/model_metadata.p'.format(model_directory_name) print('Reading metadata from: "{0:s}"...'.format(metadata_file_name)) model_metadata_dict = cnn.read_model_metadata(metadata_file_name) training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] if len(class_fraction_keys) > 1: class_to_sampling_fraction_dict = dict( list(zip(class_fraction_keys, class_fraction_values))) else: class_to_sampling_fraction_dict = None training_option_dict[ trainval_io.SAMPLING_FRACTIONS_KEY] = class_to_sampling_fraction_dict example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = ( time_conversion.get_start_of_spc_date(first_spc_date_string)) training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = ( time_conversion.get_end_of_spc_date(last_spc_date_string)) if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, list_of_operation_dicts=model_metadata_dict[ cnn.LAYER_OPERATIONS_KEY], num_examples_total=num_examples) elif model_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, num_examples_total=num_examples) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, num_examples_total=num_examples) full_id_strings = [] storm_times_unix_sec = numpy.array([], dtype=int) target_values = numpy.array([], dtype=int) list_of_predictor_matrices = None print(SEPARATOR_STRING) for _ in range(len(example_file_names)): try: this_storm_object_dict = next(generator_object) print(SEPARATOR_STRING) except StopIteration: break full_id_strings += this_storm_object_dict[testing_io.FULL_IDS_KEY] storm_times_unix_sec = numpy.concatenate( (storm_times_unix_sec, this_storm_object_dict[testing_io.STORM_TIMES_KEY])) these_target_values = this_storm_object_dict[ testing_io.TARGET_ARRAY_KEY] if len(these_target_values.shape) > 1: these_target_values = numpy.argmax(these_target_values, axis=1) target_values = numpy.concatenate((target_values, these_target_values)) these_predictor_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] if list_of_predictor_matrices is None: list_of_predictor_matrices = copy.deepcopy( these_predictor_matrices) else: for k in range(len(list_of_predictor_matrices)): list_of_predictor_matrices[k] = numpy.concatenate( (list_of_predictor_matrices[k], these_predictor_matrices[k])) predictor_names_by_matrix = _create_predictor_names( model_metadata_dict=model_metadata_dict, list_of_predictor_matrices=list_of_predictor_matrices) for i in range(len(predictor_names_by_matrix)): print('Predictors in {0:d}th matrix:\n{1:s}\n'.format( i + 1, str(predictor_names_by_matrix[i]))) print(SEPARATOR_STRING) list_of_layer_operation_dicts = model_metadata_dict[ cnn.LAYER_OPERATIONS_KEY] if list_of_layer_operation_dicts is not None: correlation_matrix, predictor_names = _get_pearson_correlations( list_of_predictor_matrices=list_of_predictor_matrices, predictor_names_by_matrix=predictor_names_by_matrix, sounding_heights_m_agl=training_option_dict[ trainval_io.SOUNDING_HEIGHTS_KEY]) for i in range(len(predictor_names)): for j in range(i, len(predictor_names)): print(( 'Pearson correlation between "{0:s}" and "{1:s}" = {2:.4f}' ).format(predictor_names[i], predictor_names[j], correlation_matrix[i, j])) print('\n') if model_metadata_dict[cnn.CONV_2D3D_KEY]: prediction_function = permutation.prediction_function_2d3d_cnn else: num_radar_dimensions = len(list_of_predictor_matrices[0].shape) - 2 if num_radar_dimensions == 2: prediction_function = permutation.prediction_function_2d_cnn else: prediction_function = permutation.prediction_function_3d_cnn print(SEPARATOR_STRING) result_dict = permutation.run_permutation_test( model_object=model_object, list_of_input_matrices=list_of_predictor_matrices, predictor_names_by_matrix=predictor_names_by_matrix, target_values=target_values, prediction_function=prediction_function, cost_function=permutation.negative_auc_function, num_bootstrap_iters=num_bootstrap_iters, bootstrap_confidence_level=bootstrap_confidence_level) print(SEPARATOR_STRING) result_dict[permutation.MODEL_FILE_KEY] = model_file_name result_dict[permutation.TARGET_VALUES_KEY] = target_values result_dict[permutation.FULL_IDS_KEY] = full_id_strings result_dict[permutation.STORM_TIMES_KEY] = storm_times_unix_sec print('Writing results to: "{0:s}"...'.format(output_file_name)) permutation.write_results(result_dict=result_dict, pickle_file_name=output_file_name)
def _run(input_model_file_name, sounding_field_names, normalization_type_string, normalization_param_file_name, min_normalized_value, max_normalized_value, target_name, downsampling_classes, downsampling_fractions, monitor_string, weight_loss_function, top_training_dir_name, first_training_time_string, last_training_time_string, num_examples_per_train_batch, top_validation_dir_name, first_validation_time_string, last_validation_time_string, num_examples_per_validn_batch, num_epochs, num_training_batches_per_epoch, num_validation_batches_per_epoch, output_dir_name): """Trains CNN with soundings only. This is effectively the main method. :param input_model_file_name: See documentation at top of file. :param sounding_field_names: Same. :param normalization_type_string: Same. :param normalization_param_file_name: Same. :param min_normalized_value: Same. :param max_normalized_value: Same. :param target_name: Same. :param downsampling_classes: Same. :param downsampling_fractions: Same. :param monitor_string: Same. :param weight_loss_function: Same. :param top_training_dir_name: Same. :param first_training_time_string: Same. :param last_training_time_string: Same. :param num_examples_per_train_batch: Same. :param top_validation_dir_name: Same. :param first_validation_time_string: Same. :param last_validation_time_string: Same. :param num_examples_per_validn_batch: Same. :param num_epochs: Same. :param num_training_batches_per_epoch: Same. :param num_validation_batches_per_epoch: Same. :param output_dir_name: Same. """ # Process input args. first_training_time_unix_sec = time_conversion.string_to_unix_sec( first_training_time_string, TIME_FORMAT) last_training_time_unix_sec = time_conversion.string_to_unix_sec( last_training_time_string, TIME_FORMAT) first_validation_time_unix_sec = time_conversion.string_to_unix_sec( first_validation_time_string, TIME_FORMAT) last_validation_time_unix_sec = time_conversion.string_to_unix_sec( last_validation_time_string, TIME_FORMAT) if len(downsampling_classes) > 1: downsampling_dict = dict(list(zip( downsampling_classes, downsampling_fractions ))) else: downsampling_dict = None # Set output locations. file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) output_model_file_name = '{0:s}/model.h5'.format(output_dir_name) history_file_name = '{0:s}/model_history.csv'.format(output_dir_name) tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name) model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name) # Find training and validation files. training_file_names = input_examples.find_many_example_files( top_directory_name=top_training_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) validation_file_names = input_examples.find_many_example_files( top_directory_name=top_validation_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) # Read architecture. print('Reading architecture from: "{0:s}"...'.format(input_model_file_name)) model_object = cnn.read_model(input_model_file_name) # model_object = keras.models.clone_model(model_object) # TODO(thunderhoser): This is a HACK. model_object.compile( loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.Adam(), metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST) print(SEPARATOR_STRING) model_object.summary() print(SEPARATOR_STRING) # Write metadata. metadata_dict = { cnn.NUM_EPOCHS_KEY: num_epochs, cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch, cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch, cnn.MONITOR_STRING_KEY: monitor_string, cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function, cnn.CONV_2D3D_KEY: False, cnn.VALIDATION_FILES_KEY: validation_file_names, cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec, cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec, cnn.NUM_EX_PER_VALIDN_BATCH_KEY: num_examples_per_validn_batch } training_option_dict = { trainval_io.EXAMPLE_FILES_KEY: training_file_names, trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_train_batch, trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec, trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec, trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names, trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL, trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string, trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name, trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value, trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value, trainval_io.TARGET_NAME_KEY: target_name, trainval_io.BINARIZE_TARGET_KEY: False, trainval_io.SAMPLING_FRACTIONS_KEY: downsampling_dict, trainval_io.LOOP_ONCE_KEY: False } print('Writing metadata to: "{0:s}"...'.format(model_metafile_name)) cnn.write_model_metadata( pickle_file_name=model_metafile_name, metadata_dict=metadata_dict, training_option_dict=training_option_dict) cnn.train_cnn_with_soundings( model_object=model_object, model_file_name=output_model_file_name, history_file_name=history_file_name, tensorboard_dir_name=tensorboard_dir_name, num_epochs=num_epochs, num_training_batches_per_epoch=num_training_batches_per_epoch, training_option_dict=training_option_dict, monitor_string=monitor_string, weight_loss_function=weight_loss_function, num_validation_batches_per_epoch=num_validation_batches_per_epoch, validation_file_names=validation_file_names, first_validn_time_unix_sec=first_validation_time_unix_sec, last_validn_time_unix_sec=last_validation_time_unix_sec, num_examples_per_validn_batch=num_examples_per_validn_batch)
def _run(input_model_file_name, radar_field_name_by_channel, layer_op_name_by_channel, min_height_by_channel_m_agl, max_height_by_channel_m_agl, sounding_field_names, normalization_type_string, normalization_param_file_name, min_normalized_value, max_normalized_value, target_name, shuffle_target, downsampling_classes, downsampling_fractions, monitor_string, weight_loss_function, x_translations_pixels, y_translations_pixels, ccw_rotation_angles_deg, noise_standard_deviation, num_noisings, flip_in_x, flip_in_y, top_training_dir_name, first_training_time_string, last_training_time_string, num_examples_per_train_batch, top_validation_dir_name, first_validation_time_string, last_validation_time_string, num_examples_per_validn_batch, num_epochs, num_training_batches_per_epoch, num_validation_batches_per_epoch, output_dir_name): """Trains CNN with 2-D GridRad images. This is effectively the main method. :param input_model_file_name: See documentation at top of file. :param radar_field_name_by_channel: Same. :param layer_op_name_by_channel: Same. :param min_height_by_channel_m_agl: Same. :param max_height_by_channel_m_agl: Same. :param sounding_field_names: Same. :param normalization_type_string: Same. :param normalization_param_file_name: Same. :param min_normalized_value: Same. :param max_normalized_value: Same. :param target_name: Same. :param shuffle_target: Same. :param downsampling_classes: Same. :param downsampling_fractions: Same. :param monitor_string: Same. :param weight_loss_function: Same. :param x_translations_pixels: Same. :param y_translations_pixels: Same. :param ccw_rotation_angles_deg: Same. :param noise_standard_deviation: Same. :param num_noisings: Same. :param flip_in_x: Same. :param flip_in_y: Same. :param top_training_dir_name: Same. :param first_training_time_string: Same. :param last_training_time_string: Same. :param num_examples_per_train_batch: Same. :param top_validation_dir_name: Same. :param first_validation_time_string: Same. :param last_validation_time_string: Same. :param num_examples_per_validn_batch: Same. :param num_epochs: Same. :param num_training_batches_per_epoch: Same. :param num_validation_batches_per_epoch: Same. :param output_dir_name: Same. """ # Process input args. first_training_time_unix_sec = time_conversion.string_to_unix_sec( first_training_time_string, TIME_FORMAT) last_training_time_unix_sec = time_conversion.string_to_unix_sec( last_training_time_string, TIME_FORMAT) if top_validation_dir_name in ['', 'None']: top_validation_dir_name = None num_validation_batches_per_epoch = 0 first_validation_time_unix_sec = 0 last_validation_time_unix_sec = 0 else: first_validation_time_unix_sec = time_conversion.string_to_unix_sec( first_validation_time_string, TIME_FORMAT) last_validation_time_unix_sec = time_conversion.string_to_unix_sec( last_validation_time_string, TIME_FORMAT) if sounding_field_names[0] in ['', 'None']: sounding_field_names = None if len(downsampling_classes) > 1: class_to_sampling_fraction_dict = dict( list(zip(downsampling_classes, downsampling_fractions))) else: class_to_sampling_fraction_dict = None if (len(x_translations_pixels) == 1 and x_translations_pixels + y_translations_pixels == 0): x_translations_pixels = None y_translations_pixels = None if len(ccw_rotation_angles_deg) == 1 and ccw_rotation_angles_deg[0] == 0: ccw_rotation_angles_deg = None if num_noisings <= 0: num_noisings = 0 noise_standard_deviation = None num_channels = len(radar_field_name_by_channel) expected_dimensions = numpy.array([num_channels], dtype=int) error_checking.assert_is_numpy_array(numpy.array(layer_op_name_by_channel), exact_dimensions=expected_dimensions) error_checking.assert_is_numpy_array(min_height_by_channel_m_agl, exact_dimensions=expected_dimensions) error_checking.assert_is_numpy_array(max_height_by_channel_m_agl, exact_dimensions=expected_dimensions) list_of_layer_operation_dicts = [{}] * num_channels for m in range(num_channels): list_of_layer_operation_dicts[m] = { input_examples.RADAR_FIELD_KEY: radar_field_name_by_channel[m], input_examples.OPERATION_NAME_KEY: layer_op_name_by_channel[m], input_examples.MIN_HEIGHT_KEY: min_height_by_channel_m_agl[m], input_examples.MAX_HEIGHT_KEY: max_height_by_channel_m_agl[m] } # Set output locations. file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) output_model_file_name = '{0:s}/model.h5'.format(output_dir_name) history_file_name = '{0:s}/model_history.csv'.format(output_dir_name) tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name) model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name) # Find training and validation files. training_file_names = input_examples.find_many_example_files( top_directory_name=top_training_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) if top_validation_dir_name is None: validation_file_names = [] else: validation_file_names = input_examples.find_many_example_files( top_directory_name=top_validation_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) # Read architecture. print( 'Reading architecture from: "{0:s}"...'.format(input_model_file_name)) model_object = cnn.read_model(input_model_file_name) # model_object = clone_model(model_object) # TODO(thunderhoser): This is a HACK. model_object.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.Adam(), metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST) print(SEPARATOR_STRING) model_object.summary() print(SEPARATOR_STRING) print(K.eval(model_object.get_layer(name='radar_conv2d_2').weights[0])) print(SEPARATOR_STRING) # Write metadata. metadata_dict = { cnn.NUM_EPOCHS_KEY: num_epochs, cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch, cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch, cnn.MONITOR_STRING_KEY: monitor_string, cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function, cnn.CONV_2D3D_KEY: False, cnn.VALIDATION_FILES_KEY: validation_file_names, cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec, cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec, cnn.NUM_EX_PER_VALIDN_BATCH_KEY: num_examples_per_validn_batch } input_tensor = model_object.input if isinstance(input_tensor, list): input_tensor = input_tensor[0] num_grid_rows = input_tensor.get_shape().as_list()[1] num_grid_columns = input_tensor.get_shape().as_list()[2] training_option_dict = { trainval_io.EXAMPLE_FILES_KEY: training_file_names, trainval_io.TARGET_NAME_KEY: target_name, trainval_io.SHUFFLE_TARGET_KEY: shuffle_target, trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec, trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec, trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_train_batch, trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names, trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL, trainval_io.NUM_ROWS_KEY: num_grid_rows, trainval_io.NUM_COLUMNS_KEY: num_grid_columns, trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string, trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name, trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value, trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value, trainval_io.BINARIZE_TARGET_KEY: False, trainval_io.SAMPLING_FRACTIONS_KEY: class_to_sampling_fraction_dict, trainval_io.LOOP_ONCE_KEY: False, trainval_io.X_TRANSLATIONS_KEY: x_translations_pixels, trainval_io.Y_TRANSLATIONS_KEY: y_translations_pixels, trainval_io.ROTATION_ANGLES_KEY: ccw_rotation_angles_deg, trainval_io.NOISE_STDEV_KEY: noise_standard_deviation, trainval_io.NUM_NOISINGS_KEY: num_noisings, trainval_io.FLIP_X_KEY: flip_in_x, trainval_io.FLIP_Y_KEY: flip_in_y } print('Writing metadata to: "{0:s}"...'.format(model_metafile_name)) cnn.write_model_metadata( pickle_file_name=model_metafile_name, metadata_dict=metadata_dict, training_option_dict=training_option_dict, list_of_layer_operation_dicts=list_of_layer_operation_dicts) cnn.train_cnn_gridrad_2d_reduced( model_object=model_object, model_file_name=output_model_file_name, history_file_name=history_file_name, tensorboard_dir_name=tensorboard_dir_name, num_epochs=num_epochs, num_training_batches_per_epoch=num_training_batches_per_epoch, training_option_dict=training_option_dict, list_of_layer_operation_dicts=list_of_layer_operation_dicts, monitor_string=monitor_string, weight_loss_function=weight_loss_function, num_validation_batches_per_epoch=num_validation_batches_per_epoch, validation_file_names=validation_file_names, first_validn_time_unix_sec=first_validation_time_unix_sec, last_validn_time_unix_sec=last_validation_time_unix_sec, num_examples_per_validn_batch=num_examples_per_validn_batch)
def _write_metadata_one_cnn(model_object, argument_dict): """Writes metadata for one CNN to file. :param model_object: Untrained CNN (instance of `keras.models.Model` or `keras.models.Sequential`). :param argument_dict: See doc for `_train_one_cnn`. :return: metadata_dict: See doc for `cnn.write_model_metadata`. :return: training_option_dict: Same. """ from gewittergefahr.deep_learning import cnn from gewittergefahr.deep_learning import input_examples from gewittergefahr.deep_learning import \ training_validation_io as trainval_io from gewittergefahr.scripts import deep_learning_helper as dl_helper # Read input args. sounding_field_names = argument_dict[dl_helper.SOUNDING_FIELDS_ARG_NAME] radar_field_name_by_channel = argument_dict[RADAR_FIELDS_KEY] layer_op_name_by_channel = argument_dict[LAYER_OPERATIONS_KEY] min_height_by_channel_m_agl = argument_dict[MIN_HEIGHTS_KEY] max_height_by_channel_m_agl = argument_dict[MAX_HEIGHTS_KEY] normalization_type_string = argument_dict[ dl_helper.NORMALIZATION_TYPE_ARG_NAME] normalization_file_name = argument_dict[ dl_helper.NORMALIZATION_FILE_ARG_NAME] min_normalized_value = argument_dict[dl_helper.MIN_NORM_VALUE_ARG_NAME] max_normalized_value = argument_dict[dl_helper.MAX_NORM_VALUE_ARG_NAME] target_name = argument_dict[dl_helper.TARGET_NAME_ARG_NAME] downsampling_classes = numpy.array( argument_dict[dl_helper.DOWNSAMPLING_CLASSES_ARG_NAME], dtype=int) downsampling_fractions = numpy.array( argument_dict[dl_helper.DOWNSAMPLING_FRACTIONS_ARG_NAME], dtype=float) monitor_string = argument_dict[dl_helper.MONITOR_ARG_NAME] weight_loss_function = bool(argument_dict[dl_helper.WEIGHT_LOSS_ARG_NAME]) x_translations_pixels = numpy.array( argument_dict[dl_helper.X_TRANSLATIONS_ARG_NAME], dtype=int) y_translations_pixels = numpy.array( argument_dict[dl_helper.Y_TRANSLATIONS_ARG_NAME], dtype=int) ccw_rotation_angles_deg = numpy.array( argument_dict[dl_helper.ROTATION_ANGLES_ARG_NAME], dtype=float) noise_standard_deviation = argument_dict[dl_helper.NOISE_STDEV_ARG_NAME] num_noisings = argument_dict[dl_helper.NUM_NOISINGS_ARG_NAME] flip_in_x = bool(argument_dict[dl_helper.FLIP_X_ARG_NAME]) flip_in_y = bool(argument_dict[dl_helper.FLIP_Y_ARG_NAME]) top_training_dir_name = argument_dict[dl_helper.TRAINING_DIR_ARG_NAME] first_training_time_string = argument_dict[ dl_helper.FIRST_TRAINING_TIME_ARG_NAME] last_training_time_string = argument_dict[ dl_helper.LAST_TRAINING_TIME_ARG_NAME] num_examples_per_train_batch = argument_dict[ dl_helper.NUM_EX_PER_TRAIN_ARG_NAME] top_validation_dir_name = argument_dict[dl_helper.VALIDATION_DIR_ARG_NAME] first_validation_time_string = argument_dict[ dl_helper.FIRST_VALIDATION_TIME_ARG_NAME] last_validation_time_string = argument_dict[ dl_helper.LAST_VALIDATION_TIME_ARG_NAME] num_examples_per_validn_batch = argument_dict[ dl_helper.NUM_EX_PER_VALIDN_ARG_NAME] num_epochs = argument_dict[dl_helper.NUM_EPOCHS_ARG_NAME] num_training_batches_per_epoch = argument_dict[ dl_helper.NUM_TRAINING_BATCHES_ARG_NAME] num_validation_batches_per_epoch = argument_dict[ dl_helper.NUM_VALIDATION_BATCHES_ARG_NAME] output_dir_name = argument_dict[dl_helper.OUTPUT_DIR_ARG_NAME] # Process input args. first_training_time_unix_sec = time_conversion.string_to_unix_sec( first_training_time_string, TIME_FORMAT) last_training_time_unix_sec = time_conversion.string_to_unix_sec( last_training_time_string, TIME_FORMAT) first_validation_time_unix_sec = time_conversion.string_to_unix_sec( first_validation_time_string, TIME_FORMAT) last_validation_time_unix_sec = time_conversion.string_to_unix_sec( last_validation_time_string, TIME_FORMAT) if sounding_field_names[0] in ['', 'None']: sounding_field_names = None num_channels = len(radar_field_name_by_channel) layer_operation_dicts = [{}] * num_channels for k in range(num_channels): layer_operation_dicts[k] = { input_examples.RADAR_FIELD_KEY: radar_field_name_by_channel[k], input_examples.OPERATION_NAME_KEY: layer_op_name_by_channel[k], input_examples.MIN_HEIGHT_KEY: min_height_by_channel_m_agl[k], input_examples.MAX_HEIGHT_KEY: max_height_by_channel_m_agl[k] } if len(downsampling_classes) > 1: downsampling_dict = dict( list(zip(downsampling_classes, downsampling_fractions))) else: downsampling_dict = None translate_flag = (len(x_translations_pixels) > 1 or x_translations_pixels[0] != 0 or y_translations_pixels[0] != 0) if not translate_flag: x_translations_pixels = None y_translations_pixels = None if len(ccw_rotation_angles_deg) == 1 and ccw_rotation_angles_deg[0] == 0: ccw_rotation_angles_deg = None if num_noisings <= 0: num_noisings = 0 noise_standard_deviation = None # Find training and validation files. training_file_names = input_examples.find_many_example_files( top_directory_name=top_training_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) validation_file_names = input_examples.find_many_example_files( top_directory_name=top_validation_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) # Write metadata. metadata_dict = { cnn.NUM_EPOCHS_KEY: num_epochs, cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch, cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch, cnn.MONITOR_STRING_KEY: monitor_string, cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function, cnn.CONV_2D3D_KEY: False, cnn.VALIDATION_FILES_KEY: validation_file_names, cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec, cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec, cnn.LAYER_OPERATIONS_KEY: layer_operation_dicts, cnn.NUM_EX_PER_VALIDN_BATCH_KEY: num_examples_per_validn_batch } input_tensor = model_object.input if isinstance(input_tensor, list): input_tensor = input_tensor[0] num_grid_rows = input_tensor.get_shape().as_list()[1] num_grid_columns = input_tensor.get_shape().as_list()[2] training_option_dict = { trainval_io.EXAMPLE_FILES_KEY: training_file_names, trainval_io.TARGET_NAME_KEY: target_name, trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec, trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec, trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_train_batch, trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names, trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL, trainval_io.NUM_ROWS_KEY: num_grid_rows, trainval_io.NUM_COLUMNS_KEY: num_grid_columns, trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string, trainval_io.NORMALIZATION_FILE_KEY: normalization_file_name, trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value, trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value, trainval_io.BINARIZE_TARGET_KEY: False, trainval_io.SAMPLING_FRACTIONS_KEY: downsampling_dict, trainval_io.LOOP_ONCE_KEY: False, trainval_io.X_TRANSLATIONS_KEY: x_translations_pixels, trainval_io.Y_TRANSLATIONS_KEY: y_translations_pixels, trainval_io.ROTATION_ANGLES_KEY: ccw_rotation_angles_deg, trainval_io.NOISE_STDEV_KEY: noise_standard_deviation, trainval_io.NUM_NOISINGS_KEY: num_noisings, trainval_io.FLIP_X_KEY: flip_in_x, trainval_io.FLIP_Y_KEY: flip_in_y } file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name) print('Writing metadata to: "{0:s}"...'.format(metafile_name)) cnn.write_model_metadata(pickle_file_name=metafile_name, metadata_dict=metadata_dict, training_option_dict=training_option_dict) return metadata_dict, training_option_dict
def _run(input_model_file_name, radar_field_names, sounding_field_names, normalization_type_string, normalization_param_file_name, min_normalized_value, max_normalized_value, downsampling_keys, downsampling_fractions, monitor_string, weight_loss_function, refl_masking_threshold_dbz, x_translations_pixels, y_translations_pixels, ccw_rotation_angles_deg, noise_standard_deviation, num_noisings, flip_in_x, flip_in_y, top_training_dir_name, first_training_time_string, last_training_time_string, top_validation_dir_name, first_validation_time_string, last_validation_time_string, num_examples_per_batch, num_epochs, num_training_batches_per_epoch, num_validation_batches_per_epoch, output_dir_name): """Trains CNN with native (3-D) GridRad images. This is effectively the main method. :param input_model_file_name: See documentation at top of file. :param radar_field_names: Same. :param sounding_field_names: Same. :param normalization_type_string: Same. :param normalization_param_file_name: Same. :param min_normalized_value: Same. :param max_normalized_value: Same. :param downsampling_keys: Same. :param downsampling_fractions: Same. :param monitor_string: Same. :param weight_loss_function: Same. :param refl_masking_threshold_dbz: Same. :param x_translations_pixels: Same. :param y_translations_pixels: Same. :param ccw_rotation_angles_deg: Same. :param noise_standard_deviation: Same. :param num_noisings: Same. :param flip_in_x: Same. :param flip_in_y: Same. :param top_training_dir_name: Same. :param first_training_time_string: Same. :param last_training_time_string: Same. :param top_validation_dir_name: Same. :param first_validation_time_string: Same. :param last_validation_time_string: Same. :param num_examples_per_batch: Same. :param num_epochs: Same. :param num_training_batches_per_epoch: Same. :param num_validation_batches_per_epoch: Same. :param output_dir_name: Same. """ # Process input args. first_training_time_unix_sec = time_conversion.string_to_unix_sec( first_training_time_string, TIME_FORMAT) last_training_time_unix_sec = time_conversion.string_to_unix_sec( last_training_time_string, TIME_FORMAT) first_validation_time_unix_sec = time_conversion.string_to_unix_sec( first_validation_time_string, TIME_FORMAT) last_validation_time_unix_sec = time_conversion.string_to_unix_sec( last_validation_time_string, TIME_FORMAT) if sounding_field_names[0] in ['', 'None']: sounding_field_names = None if len(downsampling_keys) > 1: class_to_sampling_fraction_dict = dict( zip(downsampling_keys, downsampling_fractions)) else: class_to_sampling_fraction_dict = None if (len(x_translations_pixels) == 1 and x_translations_pixels + y_translations_pixels == 0): x_translations_pixels = None y_translations_pixels = None if len(ccw_rotation_angles_deg) == 1 and ccw_rotation_angles_deg[0] == 0: ccw_rotation_angles_deg = None if num_noisings <= 0: num_noisings = 0 noise_standard_deviation = None # Set output locations. file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) output_model_file_name = '{0:s}/model.h5'.format(output_dir_name) history_file_name = '{0:s}/model_history.csv'.format(output_dir_name) tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name) model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name) # Find training and validation files. training_file_names = input_examples.find_many_example_files( top_directory_name=top_training_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) validation_file_names = input_examples.find_many_example_files( top_directory_name=top_validation_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) # Read architecture. print 'Reading architecture from: "{0:s}"...'.format(input_model_file_name) model_object = cnn.read_model(input_model_file_name) model_object = keras.models.clone_model(model_object) # TODO(thunderhoser): This is a HACK. model_object.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.Adam(), metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST) print SEPARATOR_STRING model_object.summary() print SEPARATOR_STRING # Write metadata. this_example_dict = input_examples.read_example_file( netcdf_file_name=training_file_names[0], metadata_only=True) target_name = this_example_dict[input_examples.TARGET_NAME_KEY] metadata_dict = { cnn.TARGET_NAME_KEY: target_name, cnn.NUM_EPOCHS_KEY: num_epochs, cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch, cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch, cnn.MONITOR_STRING_KEY: monitor_string, cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function, cnn.USE_2D3D_CONVOLUTION_KEY: False, cnn.VALIDATION_FILES_KEY: validation_file_names, cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec, cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec } input_tensor = model_object.input if isinstance(input_tensor, list): input_tensor = input_tensor[0] num_grid_rows = input_tensor.get_shape().as_list()[1] num_grid_columns = input_tensor.get_shape().as_list()[2] training_option_dict = { trainval_io.EXAMPLE_FILES_KEY: training_file_names, trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec, trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec, trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_batch, trainval_io.RADAR_FIELDS_KEY: radar_field_names, trainval_io.RADAR_HEIGHTS_KEY: RADAR_HEIGHTS_M_AGL, trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names, trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL, trainval_io.NUM_ROWS_KEY: num_grid_rows, trainval_io.NUM_COLUMNS_KEY: num_grid_columns, trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string, trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name, trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value, trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value, trainval_io.BINARIZE_TARGET_KEY: False, trainval_io.SAMPLING_FRACTIONS_KEY: class_to_sampling_fraction_dict, trainval_io.LOOP_ONCE_KEY: False, trainval_io.REFLECTIVITY_MASK_KEY: refl_masking_threshold_dbz, trainval_io.X_TRANSLATIONS_KEY: x_translations_pixels, trainval_io.Y_TRANSLATIONS_KEY: y_translations_pixels, trainval_io.ROTATION_ANGLES_KEY: ccw_rotation_angles_deg, trainval_io.NOISE_STDEV_KEY: noise_standard_deviation, trainval_io.NUM_NOISINGS_KEY: num_noisings, trainval_io.FLIP_X_KEY: flip_in_x, trainval_io.FLIP_Y_KEY: flip_in_y } print 'Writing metadata to: "{0:s}"...'.format(model_metafile_name) cnn.write_model_metadata(pickle_file_name=model_metafile_name, metadata_dict=metadata_dict, training_option_dict=training_option_dict) cnn.train_cnn_2d_or_3d( model_object=model_object, model_file_name=output_model_file_name, history_file_name=history_file_name, tensorboard_dir_name=tensorboard_dir_name, num_epochs=num_epochs, num_training_batches_per_epoch=num_training_batches_per_epoch, training_option_dict=training_option_dict, monitor_string=monitor_string, weight_loss_function=weight_loss_function, num_validation_batches_per_epoch=num_validation_batches_per_epoch, validation_file_names=validation_file_names, first_validn_time_unix_sec=first_validation_time_unix_sec, last_validn_time_unix_sec=last_validation_time_unix_sec)
def _write_metadata_one_model(argument_dict): """Writes metadata for one upconvnet to file. :param argument_dict: See doc for `_train_one_upconvnet`. :return: metadata_dict: See doc for `upconvnet.write_model_metadata`. """ from gewittergefahr.deep_learning import cnn from gewittergefahr.deep_learning import upconvnet from gewittergefahr.deep_learning import input_examples from gewittergefahr.scripts import train_upconvnet # Read input args. cnn_file_name = argument_dict[train_upconvnet.CNN_FILE_ARG_NAME] cnn_feature_layer_name = argument_dict[ train_upconvnet.FEATURE_LAYER_ARG_NAME] top_training_dir_name = argument_dict[train_upconvnet.TRAINING_DIR_ARG_NAME] first_training_time_string = argument_dict[ train_upconvnet.FIRST_TRAINING_TIME_ARG_NAME ] last_training_time_string = argument_dict[ train_upconvnet.LAST_TRAINING_TIME_ARG_NAME] top_validation_dir_name = argument_dict[ train_upconvnet.VALIDATION_DIR_ARG_NAME ] first_validation_time_string = argument_dict[ train_upconvnet.FIRST_VALIDATION_TIME_ARG_NAME ] last_validation_time_string = argument_dict[ train_upconvnet.LAST_VALIDATION_TIME_ARG_NAME] num_examples_per_batch = argument_dict[ train_upconvnet.NUM_EX_PER_BATCH_ARG_NAME ] num_epochs = argument_dict[train_upconvnet.NUM_EPOCHS_ARG_NAME] num_training_batches_per_epoch = argument_dict[ train_upconvnet.NUM_TRAINING_BATCHES_ARG_NAME ] num_validation_batches_per_epoch = argument_dict[ train_upconvnet.NUM_VALIDATION_BATCHES_ARG_NAME ] output_dir_name = argument_dict[train_upconvnet.OUTPUT_DIR_ARG_NAME] # Process input args. first_training_time_unix_sec = time_conversion.string_to_unix_sec( first_training_time_string, TIME_FORMAT) last_training_time_unix_sec = time_conversion.string_to_unix_sec( last_training_time_string, TIME_FORMAT) first_validation_time_unix_sec = time_conversion.string_to_unix_sec( first_validation_time_string, TIME_FORMAT) last_validation_time_unix_sec = time_conversion.string_to_unix_sec( last_validation_time_string, TIME_FORMAT) # Find training and validation files. training_file_names = input_examples.find_many_example_files( top_directory_name=top_training_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) validation_file_names = input_examples.find_many_example_files( top_directory_name=top_validation_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) # Write metadata. upconvnet_metafile_name = cnn.find_metafile( model_file_name='{0:s}/foo.h5'.format(output_dir_name), raise_error_if_missing=False ) print('Writing upconvnet metadata to: "{0:s}"...'.format( upconvnet_metafile_name )) return upconvnet.write_model_metadata( cnn_file_name=cnn_file_name, cnn_feature_layer_name=cnn_feature_layer_name, num_epochs=num_epochs, num_examples_per_batch=num_examples_per_batch, num_training_batches_per_epoch=num_training_batches_per_epoch, training_example_file_names=training_file_names, first_training_time_unix_sec=first_training_time_unix_sec, last_training_time_unix_sec=last_training_time_unix_sec, num_validation_batches_per_epoch=num_validation_batches_per_epoch, validation_example_file_names=validation_file_names, first_validation_time_unix_sec=first_validation_time_unix_sec, last_validation_time_unix_sec=last_validation_time_unix_sec, pickle_file_name=upconvnet_metafile_name)
def _run(model_file_name, top_example_dir_name, first_spc_date_string, last_spc_date_string, num_examples, class_fraction_keys, class_fraction_values, output_dir_name): """Makes predictions from trained CNN. This is effectively the main method. :param model_file_name: See documentation at top of file. :param top_example_dir_name: Same. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param num_examples: Same. :param class_fraction_keys: Same. :param class_fraction_values: Same. :param output_dir_name: Same. :raises: ValueError: if the model does multi-class classification. """ print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) num_output_neurons = ( model_object.layers[-1].output.get_shape().as_list()[-1] ) if num_output_neurons > 2: error_string = ( 'The model has {0:d} output neurons, which suggests {0:d}-class ' 'classification. This script handles only binary classification.' ).format(num_output_neurons) raise ValueError(error_string) soundings_only = False if isinstance(model_object.input, list): list_of_input_tensors = model_object.input else: list_of_input_tensors = [model_object.input] if len(list_of_input_tensors) == 1: these_spatial_dim = numpy.array( list_of_input_tensors[0].get_shape().as_list()[1:-1], dtype=int ) soundings_only = len(these_spatial_dim) == 1 model_directory_name, _ = os.path.split(model_file_name) model_metafile_name = '{0:s}/model_metadata.p'.format(model_directory_name) print('Reading metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = cnn.read_model_metadata(model_metafile_name) training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] if len(class_fraction_keys) > 1: downsampling_dict = dict(list(zip( class_fraction_keys, class_fraction_values ))) else: downsampling_dict = None training_option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = downsampling_dict example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = ( time_conversion.get_start_of_spc_date(first_spc_date_string) ) training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = ( time_conversion.get_end_of_spc_date(last_spc_date_string) ) training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = ( NUM_EXAMPLES_PER_BATCH ) if soundings_only: generator_object = testing_io.sounding_generator( option_dict=training_option_dict, desired_num_examples=num_examples) elif model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, desired_num_examples=num_examples, list_of_operation_dicts=model_metadata_dict[ cnn.LAYER_OPERATIONS_KEY] ) elif model_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, desired_num_examples=num_examples) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, desired_num_examples=num_examples) include_soundings = ( training_option_dict[trainval_io.SOUNDING_FIELDS_KEY] is not None ) full_storm_id_strings = [] storm_times_unix_sec = numpy.array([], dtype=int) observed_labels = numpy.array([], dtype=int) class_probability_matrix = None while True: try: this_storm_object_dict = next(generator_object) print(SEPARATOR_STRING) except StopIteration: break full_storm_id_strings += this_storm_object_dict[testing_io.FULL_IDS_KEY] storm_times_unix_sec = numpy.concatenate(( storm_times_unix_sec, this_storm_object_dict[testing_io.STORM_TIMES_KEY] )) observed_labels = numpy.concatenate(( observed_labels, this_storm_object_dict[testing_io.TARGET_ARRAY_KEY] )) if soundings_only: these_predictor_matrices = [ this_storm_object_dict[testing_io.SOUNDING_MATRIX_KEY] ] else: these_predictor_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] if include_soundings: this_sounding_matrix = these_predictor_matrices[-1] else: this_sounding_matrix = None if soundings_only: this_probability_matrix = cnn.apply_cnn_soundings_only( model_object=model_object, sounding_matrix=this_sounding_matrix, verbose=True) elif model_metadata_dict[cnn.CONV_2D3D_KEY]: if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]: this_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=these_predictor_matrices[0], sounding_matrix=this_sounding_matrix, verbose=True) else: this_probability_matrix = cnn.apply_2d3d_cnn( model_object=model_object, reflectivity_matrix_dbz=these_predictor_matrices[0], azimuthal_shear_matrix_s01=these_predictor_matrices[1], sounding_matrix=this_sounding_matrix, verbose=True) else: this_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=these_predictor_matrices[0], sounding_matrix=this_sounding_matrix, verbose=True) print(SEPARATOR_STRING) if class_probability_matrix is None: class_probability_matrix = this_probability_matrix + 0. else: class_probability_matrix = numpy.concatenate( (class_probability_matrix, this_probability_matrix), axis=0 ) output_file_name = prediction_io.find_ungridded_file( directory_name=output_dir_name, raise_error_if_missing=False) print('Writing results to: "{0:s}"...'.format(output_file_name)) prediction_io.write_ungridded_predictions( netcdf_file_name=output_file_name, class_probability_matrix=class_probability_matrix, observed_labels=observed_labels, storm_ids=full_storm_id_strings, storm_times_unix_sec=storm_times_unix_sec, target_name=training_option_dict[trainval_io.TARGET_NAME_KEY], model_file_name=model_file_name )