def _run(model_file_name, top_example_dir_name, first_time_string, last_time_string, num_times, num_examples_per_time, output_file_name): """Runs permutation test for predictor importance. This is effectively the main method. :param model_file_name: See documentation at top of file. :param top_example_dir_name: Same. :param first_time_string: Same. :param last_time_string: Same. :param num_times: Same. :param num_examples_per_time: Same. :param output_file_name: Same. """ print 'Reading model from: "{0:s}"...'.format(model_file_name) model_object = traditional_cnn.read_keras_model(model_file_name) model_metafile_name = traditional_cnn.find_metafile( model_file_name=model_file_name) print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name) model_metadata_dict = traditional_cnn.read_model_metadata( model_metafile_name) print SEPARATOR_STRING predictor_matrix, target_values = _read_examples( top_example_dir_name=top_example_dir_name, first_time_string=first_time_string, last_time_string=last_time_string, num_times=num_times, num_examples_per_time=num_examples_per_time, model_metadata_dict=model_metadata_dict) print SEPARATOR_STRING narr_predictor_names = model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY] result_dict = permutation.run_permutation_test( model_object=model_object, list_of_input_matrices=[predictor_matrix], predictor_names_by_matrix=[narr_predictor_names], target_values=target_values, prediction_function=_prediction_function, cost_function=permutation.cross_entropy_function) print SEPARATOR_STRING print 'Writing results to: "{0:s}"...'.format(output_file_name) permutation.write_results(result_dict=result_dict, pickle_file_name=output_file_name)
def _run(upconvnet_file_name, top_example_dir_name, first_time_string, last_time_string, num_baseline_examples, num_test_examples, percent_svd_variance_to_keep, top_output_dir_name): """Runs novelty detection. :param upconvnet_file_name: See documentation at top of file. :param top_example_dir_name: Same. :param first_time_string: Same. :param last_time_string: Same. :param num_baseline_examples: Same. :param num_test_examples: Same. :param percent_svd_variance_to_keep: Same. :param top_output_dir_name: Same. """ # Read upconvnet and metadata. ucn_metafile_name = traditional_cnn.find_metafile( model_file_name=upconvnet_file_name, raise_error_if_missing=True) print('Reading trained upconvnet from: "{0:s}"...'.format( upconvnet_file_name)) ucn_model_object = traditional_cnn.read_keras_model(upconvnet_file_name) print('Reading upconvnet metadata from: "{0:s}"...'.format( ucn_metafile_name)) ucn_metadata_dict = upconvnet.read_model_metadata(ucn_metafile_name) # Read CNN and metadata. cnn_file_name = ucn_metadata_dict[upconvnet.CNN_FILE_NAME_KEY] cnn_metafile_name = traditional_cnn.find_metafile( model_file_name=cnn_file_name, raise_error_if_missing=True) print 'Reading trained CNN from: "{0:s}"...'.format(cnn_file_name) cnn_model_object = traditional_cnn.read_keras_model(cnn_file_name) print 'Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name) cnn_metadata_dict = traditional_cnn.read_model_metadata(cnn_metafile_name) print SEPARATOR_STRING baseline_image_matrix, test_image_matrix = _find_baseline_and_test_examples( top_example_dir_name=top_example_dir_name, first_time_string=first_time_string, last_time_string=last_time_string, num_baseline_examples=num_baseline_examples, num_test_examples=num_test_examples, cnn_model_object=cnn_model_object, cnn_metadata_dict=cnn_metadata_dict) print SEPARATOR_STRING novelty_dict = novelty_detection.do_novelty_detection( baseline_image_matrix=baseline_image_matrix, test_image_matrix=test_image_matrix, cnn_model_object=cnn_model_object, cnn_feature_layer_name=traditional_cnn.get_flattening_layer( cnn_model_object), ucn_model_object=ucn_model_object, num_novel_test_images=num_test_examples, norm_function=None, denorm_function=None, percent_svd_variance_to_keep=percent_svd_variance_to_keep) print SEPARATOR_STRING novelty_dict[novelty_detection.UCN_FILE_NAME_KEY] = upconvnet_file_name novelty_file_name = '{0:s}/novelty_results.p'.format(top_output_dir_name) print 'Writing results to: "{0:s}"...\n'.format(novelty_file_name) novelty_detection.write_results(novelty_dict=novelty_dict, pickle_file_name=novelty_file_name) for i in range(num_test_examples): _plot_results(novelty_dict=novelty_dict, narr_predictor_names=cnn_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY], test_index=i, top_output_dir_name=top_output_dir_name) print '\n'
def _run(model_file_name, first_time_string, last_time_string, randomize_times, num_target_times, use_isotonic_regression, top_narr_directory_name, top_frontal_grid_dir_name, output_dir_name): """Applies traditional CNN to full grids. This is effectively the main method. :param model_file_name: See documentation at top of file. :param first_time_string: Same. :param last_time_string: Same. :param randomize_times: Same. :param num_target_times: Same. :param use_isotonic_regression: Same. :param top_narr_directory_name: Same. :param top_frontal_grid_dir_name: Same. :param output_dir_name: Same. """ first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, INPUT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, INPUT_TIME_FORMAT) target_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=NARR_TIME_INTERVAL_SEC, include_endpoint=True) if randomize_times: error_checking.assert_is_leq( num_target_times, len(target_times_unix_sec)) numpy.random.shuffle(target_times_unix_sec) target_times_unix_sec = target_times_unix_sec[:num_target_times] print 'Reading model from: "{0:s}"...'.format(model_file_name) model_object = traditional_cnn.read_keras_model(model_file_name) model_metafile_name = traditional_cnn.find_metafile( model_file_name=model_file_name, raise_error_if_missing=True) print 'Reading model metadata from: "{0:s}"...'.format( model_metafile_name) model_metadata_dict = traditional_cnn.read_model_metadata( model_metafile_name) if use_isotonic_regression: isotonic_file_name = isotonic_regression.find_model_file( base_model_file_name=model_file_name, raise_error_if_missing=True) print 'Reading isotonic-regression models from: "{0:s}"...'.format( isotonic_file_name) isotonic_model_object_by_class = ( isotonic_regression.read_model_for_each_class(isotonic_file_name) ) else: isotonic_model_object_by_class = None if model_metadata_dict[traditional_cnn.NUM_LEAD_TIME_STEPS_KEY] is None: num_dimensions = 3 else: num_dimensions = 4 num_classes = len(model_metadata_dict[traditional_cnn.CLASS_FRACTIONS_KEY]) num_target_times = len(target_times_unix_sec) print SEPARATOR_STRING for i in range(num_target_times): if num_dimensions == 3: (this_class_probability_matrix, this_target_matrix ) = traditional_cnn.apply_model_to_3d_example( model_object=model_object, target_time_unix_sec=target_times_unix_sec[i], top_narr_directory_name=top_narr_directory_name, top_frontal_grid_dir_name=top_frontal_grid_dir_name, narr_predictor_names=model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY], pressure_level_mb=model_metadata_dict[ traditional_cnn.PRESSURE_LEVEL_KEY], dilation_distance_metres=model_metadata_dict[ traditional_cnn.DILATION_DISTANCE_FOR_TARGET_KEY], num_rows_in_half_grid=model_metadata_dict[ traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY], num_columns_in_half_grid=model_metadata_dict[ traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY], num_classes=num_classes, isotonic_model_object_by_class=isotonic_model_object_by_class, narr_mask_matrix=model_metadata_dict[ traditional_cnn.NARR_MASK_MATRIX_KEY]) else: (this_class_probability_matrix, this_target_matrix ) = traditional_cnn.apply_model_to_4d_example( model_object=model_object, target_time_unix_sec=target_times_unix_sec[i], predictor_time_step_offsets=model_metadata_dict[ traditional_cnn.PREDICTOR_TIME_STEP_OFFSETS_KEY], num_lead_time_steps=model_metadata_dict[ traditional_cnn.NUM_LEAD_TIME_STEPS_KEY], top_narr_directory_name=top_narr_directory_name, top_frontal_grid_dir_name=top_frontal_grid_dir_name, narr_predictor_names=model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY], pressure_level_mb=model_metadata_dict[ traditional_cnn.PRESSURE_LEVEL_KEY], dilation_distance_metres=model_metadata_dict[ traditional_cnn.DILATION_DISTANCE_FOR_TARGET_KEY], num_rows_in_half_grid=model_metadata_dict[ traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY], num_columns_in_half_grid=model_metadata_dict[ traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY], num_classes=num_classes, isotonic_model_object_by_class=isotonic_model_object_by_class, narr_mask_matrix=model_metadata_dict[ traditional_cnn.NARR_MASK_MATRIX_KEY]) this_target_matrix[this_target_matrix == -1] = 0 print MINOR_SEPARATOR_STRING this_prediction_file_name = ml_utils.find_gridded_prediction_file( directory_name=output_dir_name, first_target_time_unix_sec=target_times_unix_sec[i], last_target_time_unix_sec=target_times_unix_sec[i], raise_error_if_missing=False) print 'Writing gridded predictions to file: "{0:s}"...'.format( this_prediction_file_name) ml_utils.write_gridded_predictions( pickle_file_name=this_prediction_file_name, class_probability_matrix=this_class_probability_matrix, target_times_unix_sec=target_times_unix_sec[[i]], model_file_name=model_file_name, used_isotonic_regression=use_isotonic_regression, target_matrix=this_target_matrix) if i != num_target_times - 1: print SEPARATOR_STRING
def _run(model_file_name, first_eval_time_string, last_eval_time_string, num_times, num_examples_per_time, dilation_distance_metres, use_isotonic_regression, top_narr_directory_name, top_frontal_grid_dir_name, output_dir_name): """Evaluates CNN trained by patch classification. This is effectively the main method. :param model_file_name: See documentation at top of file. :param first_eval_time_string: Same. :param last_eval_time_string: Same. :param num_times: Same. :param num_examples_per_time: Same. :param dilation_distance_metres: Same. :param use_isotonic_regression: Same. :param top_narr_directory_name: Same. :param top_frontal_grid_dir_name: Same. :param output_dir_name: Same. """ first_eval_time_unix_sec = time_conversion.string_to_unix_sec( first_eval_time_string, INPUT_TIME_FORMAT) last_eval_time_unix_sec = time_conversion.string_to_unix_sec( last_eval_time_string, INPUT_TIME_FORMAT) print 'Reading model from: "{0:s}"...'.format(model_file_name) model_object = traditional_cnn.read_keras_model(model_file_name) model_metafile_name = traditional_cnn.find_metafile( model_file_name=model_file_name, raise_error_if_missing=True) print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name) model_metadata_dict = traditional_cnn.read_model_metadata( model_metafile_name) if dilation_distance_metres < 0: dilation_distance_metres = model_metadata_dict[ traditional_cnn.DILATION_DISTANCE_FOR_TARGET_KEY] + 0. if use_isotonic_regression: isotonic_file_name = isotonic_regression.find_model_file( base_model_file_name=model_file_name, raise_error_if_missing=True) print 'Reading isotonic-regression models from: "{0:s}"...'.format( isotonic_file_name) isotonic_model_object_by_class = ( isotonic_regression.read_model_for_each_class(isotonic_file_name)) else: isotonic_model_object_by_class = None num_classes = len(model_metadata_dict[traditional_cnn.CLASS_FRACTIONS_KEY]) print SEPARATOR_STRING class_probability_matrix, observed_labels = ( eval_utils.downsized_examples_to_eval_pairs( model_object=model_object, first_target_time_unix_sec=first_eval_time_unix_sec, last_target_time_unix_sec=last_eval_time_unix_sec, num_target_times_to_sample=num_times, num_examples_per_time=num_examples_per_time, top_narr_directory_name=top_narr_directory_name, top_frontal_grid_dir_name=top_frontal_grid_dir_name, narr_predictor_names=model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY], pressure_level_mb=model_metadata_dict[ traditional_cnn.PRESSURE_LEVEL_KEY], dilation_distance_metres=dilation_distance_metres, num_rows_in_half_grid=model_metadata_dict[ traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY], num_columns_in_half_grid=model_metadata_dict[ traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY], num_classes=num_classes, predictor_time_step_offsets=model_metadata_dict[ traditional_cnn.PREDICTOR_TIME_STEP_OFFSETS_KEY], num_lead_time_steps=model_metadata_dict[ traditional_cnn.NUM_LEAD_TIME_STEPS_KEY], isotonic_model_object_by_class=isotonic_model_object_by_class, narr_mask_matrix=model_metadata_dict[ traditional_cnn.NARR_MASK_MATRIX_KEY])) print SEPARATOR_STRING model_eval_helper.run_evaluation( class_probability_matrix=class_probability_matrix, observed_labels=observed_labels, output_dir_name=output_dir_name)
def _run(input_file_name, colour_map_name, min_colour_percentile, max_colour_percentile, same_cmap_for_all_predictors, top_output_dir_name): """Plots results of backwards optimization. This is effectively the main method. :param input_file_name: See documentation at top of file. :param colour_map_name: Same. :param min_colour_percentile: Same. :param max_colour_percentile: Same. :param same_cmap_for_all_predictors: Same. :param top_output_dir_name: Same. """ original_output_dir_name = '{0:s}/original'.format(top_output_dir_name) optimized_output_dir_name = '{0:s}/optimized'.format(top_output_dir_name) file_system_utils.mkdir_recursive_if_necessary( directory_name=original_output_dir_name) file_system_utils.mkdir_recursive_if_necessary( directory_name=optimized_output_dir_name) error_checking.assert_is_geq(min_colour_percentile, 0.) error_checking.assert_is_leq(max_colour_percentile, 100.) error_checking.assert_is_greater(max_colour_percentile, min_colour_percentile) colour_map_object = pyplot.cm.get_cmap(colour_map_name) print 'Reading data from: "{0:s}"...'.format(input_file_name) this_list, bwo_metadata_dict = bwo.read_results(input_file_name) optimized_predictor_matrix = this_list[0] num_examples = optimized_predictor_matrix.shape[0] del this_list original_predictor_matrix = bwo_metadata_dict[bwo.INIT_FUNCTION_KEY][0] model_metafile_name = traditional_cnn.find_metafile( model_file_name=bwo_metadata_dict[bwo.MODEL_FILE_NAME_KEY]) print 'Reading metadata from: "{0:s}"...'.format(model_metafile_name) model_metadata_dict = traditional_cnn.read_model_metadata( model_metafile_name) narr_predictor_names = model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY] num_predictors = len(narr_predictor_names) try: example_plotting.get_wind_indices(narr_predictor_names) plot_wind_barbs = True except ValueError: plot_wind_barbs = False for i in range(num_examples): this_combined_matrix = numpy.concatenate( (original_predictor_matrix[i, ...], optimized_predictor_matrix[i, ...]), axis=0) if same_cmap_for_all_predictors: this_min_colour_value = numpy.percentile(this_combined_matrix, min_colour_percentile) this_max_colour_value = numpy.percentile(this_combined_matrix, max_colour_percentile) this_min_cval_by_predictor = numpy.full(num_predictors, this_min_colour_value) this_max_cval_by_predictor = numpy.full(num_predictors, this_max_colour_value) else: this_min_cval_by_predictor = numpy.full(num_predictors, numpy.nan) this_max_cval_by_predictor = this_min_cval_by_predictor + 0. for k in range(num_predictors): this_min_cval_by_predictor[k] = numpy.percentile( this_combined_matrix[..., k], min_colour_percentile) this_max_cval_by_predictor[k] = numpy.percentile( this_combined_matrix[..., k], max_colour_percentile) this_figure_file_name = '{0:s}/example{1:06d}_original.jpg'.format( original_output_dir_name, i) if plot_wind_barbs: example_plotting.plot_many_predictors_with_barbs( predictor_matrix=original_predictor_matrix[i, ...], predictor_names=narr_predictor_names, cmap_object_by_predictor=[colour_map_object] * num_predictors, min_colour_value_by_predictor=this_min_cval_by_predictor, max_colour_value_by_predictor=this_max_cval_by_predictor) else: example_plotting.plot_many_predictors_sans_barbs( predictor_matrix=original_predictor_matrix[i, ...], predictor_names=narr_predictor_names, cmap_object_by_predictor=[colour_map_object] * num_predictors, min_colour_value_by_predictor=this_min_cval_by_predictor, max_colour_value_by_predictor=this_max_cval_by_predictor) print 'Saving figure to: "{0:s}"...'.format(this_figure_file_name) pyplot.savefig(this_figure_file_name, dpi=FIGURE_RESOLUTION_DPI) pyplot.close() this_figure_file_name = '{0:s}/example{1:06d}_optimized.jpg'.format( optimized_output_dir_name, i) if plot_wind_barbs: example_plotting.plot_many_predictors_with_barbs( predictor_matrix=optimized_predictor_matrix[i, ...], predictor_names=narr_predictor_names, cmap_object_by_predictor=[colour_map_object] * num_predictors, min_colour_value_by_predictor=this_min_cval_by_predictor, max_colour_value_by_predictor=this_max_cval_by_predictor) else: example_plotting.plot_many_predictors_sans_barbs( predictor_matrix=optimized_predictor_matrix[i, ...], predictor_names=narr_predictor_names, cmap_object_by_predictor=[colour_map_object] * num_predictors, min_colour_value_by_predictor=this_min_cval_by_predictor, max_colour_value_by_predictor=this_max_cval_by_predictor) print 'Saving figure to: "{0:s}"...'.format(this_figure_file_name) pyplot.savefig(this_figure_file_name, dpi=FIGURE_RESOLUTION_DPI) pyplot.close()
def _run(input_cnn_file_name, use_batch_norm_for_out_layer, use_transposed_conv, use_conv_for_out_layer, smoothing_radius_px, top_training_dir_name, first_training_time_string, last_training_time_string, top_validation_dir_name, first_validation_time_string, last_validation_time_string, num_examples_per_batch, num_epochs, num_training_batches_per_epoch, num_validation_batches_per_epoch, output_model_file_name): """Trains upconvnet. This is effectively the main method. :param input_cnn_file_name: See documentation at top of file. :param use_batch_norm_for_out_layer: Same. :param use_transposed_conv: Same. :param use_conv_for_out_layer: Same. :param smoothing_radius_px: Same. :param top_training_dir_name: Same. :param first_training_time_string: Same. :param last_training_time_string: Same. :param top_validation_dir_name: Same. :param first_validation_time_string: Same. :param last_validation_time_string: Same. :param num_examples_per_batch: Same. :param num_epochs: Same. :param num_training_batches_per_epoch: Same. :param num_validation_batches_per_epoch: Same. :param output_model_file_name: Same. """ first_training_time_unix_sec = time_conversion.string_to_unix_sec( first_training_time_string, TIME_FORMAT) last_training_time_unix_sec = time_conversion.string_to_unix_sec( last_training_time_string, TIME_FORMAT) first_validation_time_unix_sec = time_conversion.string_to_unix_sec( first_validation_time_string, TIME_FORMAT) last_validation_time_unix_sec = time_conversion.string_to_unix_sec( last_validation_time_string, TIME_FORMAT) if smoothing_radius_px <= 0: smoothing_radius_px = None print 'Reading trained CNN from: "{0:s}"...'.format(input_cnn_file_name) cnn_model_object = traditional_cnn.read_keras_model(input_cnn_file_name) cnn_metafile_name = traditional_cnn.find_metafile( model_file_name=input_cnn_file_name, raise_error_if_missing=True) print 'Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name) cnn_metadata_dict = traditional_cnn.read_model_metadata( cnn_metafile_name) cnn_feature_layer_name = traditional_cnn.get_flattening_layer( cnn_model_object) cnn_feature_layer_object = cnn_model_object.get_layer( name=cnn_feature_layer_name) cnn_feature_dimensions = numpy.array( cnn_feature_layer_object.input.shape[1:], dtype=int) num_input_features = numpy.prod(cnn_feature_dimensions) first_num_rows = cnn_feature_dimensions[0] first_num_columns = cnn_feature_dimensions[1] num_output_channels = numpy.array( cnn_model_object.input.shape[1:], dtype=int )[-1] ucn_metafile_name = traditional_cnn.find_metafile( model_file_name=output_model_file_name, raise_error_if_missing=False) print 'Writing upconvnet metadata to: "{0:s}"...'.format(ucn_metafile_name) upconvnet.write_model_metadata( pickle_file_name=ucn_metafile_name, top_training_dir_name=top_training_dir_name, first_training_time_unix_sec=first_training_time_unix_sec, last_training_time_unix_sec=last_training_time_unix_sec, cnn_model_file_name=input_cnn_file_name, cnn_feature_layer_name=cnn_feature_layer_name, num_epochs=num_epochs, num_examples_per_batch=num_examples_per_batch, num_training_batches_per_epoch=num_training_batches_per_epoch, num_validation_batches_per_epoch=num_validation_batches_per_epoch, top_validation_dir_name=top_validation_dir_name, first_validation_time_unix_sec=first_validation_time_unix_sec, last_validation_time_unix_sec=last_validation_time_unix_sec) print SEPARATOR_STRING ucn_model_object = upconvnet.create_net( num_input_features=num_input_features, first_num_rows=first_num_rows, first_num_columns=first_num_columns, upsampling_factors=UPSAMPLING_FACTORS, num_output_channels=num_output_channels, use_activation_for_out_layer=False, use_bn_for_out_layer=use_batch_norm_for_out_layer, use_transposed_conv=use_transposed_conv, use_conv_for_out_layer=use_conv_for_out_layer, smoothing_radius_px=smoothing_radius_px) print SEPARATOR_STRING upconvnet.train_upconvnet( ucn_model_object=ucn_model_object, top_training_dir_name=top_training_dir_name, first_training_time_unix_sec=first_training_time_unix_sec, last_training_time_unix_sec=last_training_time_unix_sec, cnn_model_object=cnn_model_object, cnn_feature_layer_name=cnn_feature_layer_name, cnn_metadata_dict=cnn_metadata_dict, num_examples_per_batch=num_examples_per_batch, num_epochs=num_epochs, num_training_batches_per_epoch=num_training_batches_per_epoch, output_model_file_name=output_model_file_name, num_validation_batches_per_epoch=num_validation_batches_per_epoch, top_validation_dir_name=top_validation_dir_name, first_validation_time_unix_sec=first_validation_time_unix_sec, last_validation_time_unix_sec=last_validation_time_unix_sec)
def _run(orig_model_file_name, top_training_dir_name, first_training_time_string, last_training_time_string, num_training_examples, top_validn_dir_name, first_validn_time_string, last_validn_time_string, num_validn_examples, narr_predictor_names, num_training_examples_per_batch, num_epochs, min_loss_decrease, min_percentage_loss_decrease, num_steps_for_loss_decrease, output_file_name): """Runs sequential forward selection. This is effectively the main method. :param orig_model_file_name: See documentation at top of file. :param top_training_dir_name: Same. :param first_training_time_string: Same. :param last_training_time_string: Same. :param num_training_examples: Same. :param top_validn_dir_name: Same. :param first_validn_time_string: Same. :param last_validn_time_string: Same. :param num_validn_examples: Same. :param narr_predictor_names: Same. :param num_training_examples_per_batch: Same. :param num_epochs: Same. :param min_loss_decrease: Same. :param min_percentage_loss_decrease: Same. :param num_steps_for_loss_decrease: Same. :param output_file_name: Same. """ print 'Reading original model from: "{0:s}"...'.format(orig_model_file_name) orig_model_object = traditional_cnn.read_keras_model(orig_model_file_name) model_metafile_name = traditional_cnn.find_metafile( model_file_name=orig_model_file_name) print 'Reading model metadata from: "{0:s}"...'.format( model_metafile_name) model_metadata_dict = traditional_cnn.read_model_metadata( model_metafile_name) print SEPARATOR_STRING training_predictor_matrix, training_target_values = _read_examples( top_example_dir_name=top_training_dir_name, first_time_string=first_training_time_string, last_time_string=last_training_time_string, num_examples=num_training_examples, model_metadata_dict=model_metadata_dict) print SEPARATOR_STRING validn_predictor_matrix, validn_target_values = _read_examples( top_example_dir_name=top_validn_dir_name, first_time_string=first_validn_time_string, last_time_string=last_validn_time_string, num_examples=num_validn_examples, model_metadata_dict=model_metadata_dict) print SEPARATOR_STRING # TODO(thunderhoser): I could make the code more efficient by making # `narr_predictor_names` an input arg to `_read_examples`. if narr_predictor_names[0] in ['', 'None']: narr_predictor_names = model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY] training_function = sequential_selection.create_training_function( num_training_examples_per_batch=num_training_examples_per_batch, num_epochs=num_epochs) result_dict = sequential_selection.run_sfs( list_of_training_matrices=[training_predictor_matrix], training_target_values=training_target_values, list_of_validation_matrices=[validn_predictor_matrix], validation_target_values=validn_target_values, predictor_names_by_matrix=[narr_predictor_names], model_builder=_create_model_builder(orig_model_object), training_function=training_function, min_loss_decrease=min_loss_decrease, min_percentage_loss_decrease=min_percentage_loss_decrease, num_steps_for_loss_decrease=num_steps_for_loss_decrease) print SEPARATOR_STRING result_dict.update({ ORIG_MODEL_FILE_ARG_NAME: orig_model_file_name, TRAINING_DIR_ARG_NAME: top_training_dir_name, FIRST_TRAINING_TIME_ARG_NAME: first_training_time_string, LAST_TRAINING_TIME_ARG_NAME: last_training_time_string, NUM_TRAINING_EXAMPLES_ARG_NAME: num_training_examples, VALIDN_DIR_ARG_NAME: top_validn_dir_name, FIRST_VALIDN_TIME_ARG_NAME: first_validn_time_string, LAST_VALIDN_TIME_ARG_NAME: last_validn_time_string, NUM_VALIDN_EXAMPLES_ARG_NAME: num_validn_examples }) print 'Writing results to: "{0:s}"...'.format(output_file_name) sequential_selection.write_results( result_dict=result_dict, pickle_file_name=output_file_name)
def _run(model_file_name, example_file_name, num_examples, example_indices, component_type_string, target_class, layer_name, ideal_activation, neuron_indices, channel_index, output_file_name): """Creates saliency map for each example, based on the same CNN. This is effectively the main method. :param model_file_name: See documentation at top of file. :param example_file_name: Same. :param num_examples: Same. :param example_indices: Same. :param component_type_string: Same. :param target_class: Same. :param layer_name: Same. :param ideal_activation: Same. :param neuron_indices: Same. :param channel_index: Same. :param output_file_name: Same. """ if num_examples <= 0: num_examples = None if num_examples is None: error_checking.assert_is_geq_numpy_array(example_indices, 0) else: error_checking.assert_is_greater(num_examples, 0) print 'Reading model from: "{0:s}"...'.format(model_file_name) model_object = traditional_cnn.read_keras_model(model_file_name) model_metafile_name = traditional_cnn.find_metafile( model_file_name=model_file_name) print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name) model_metadata_dict = traditional_cnn.read_model_metadata( model_metafile_name) print 'Reading normalized examples from: "{0:s}"...'.format( example_file_name) example_dict = trainval_io.read_downsized_3d_examples( netcdf_file_name=example_file_name, predictor_names_to_keep=model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY], num_half_rows_to_keep=model_metadata_dict[ traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY], num_half_columns_to_keep=model_metadata_dict[ traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY]) predictor_matrix = example_dict[trainval_io.PREDICTOR_MATRIX_KEY] if num_examples is not None: num_examples_total = predictor_matrix.shape[0] example_indices = numpy.linspace(0, num_examples_total - 1, num=num_examples_total, dtype=int) num_examples = min([num_examples, num_examples_total]) example_indices = numpy.random.choice(example_indices, size=num_examples, replace=False) predictor_matrix = predictor_matrix[example_indices, ...] if component_type_string == CLASS_COMPONENT_TYPE_STRING: print 'Computing saliency maps for target class {0:d}...'.format( target_class) saliency_matrix = ( gg_saliency_maps.get_saliency_maps_for_class_activation( model_object=model_object, target_class=target_class, list_of_input_matrices=[predictor_matrix])[0]) elif component_type_string == NEURON_COMPONENT_TYPE_STRING: print('Computing saliency maps for neuron {0:s} in layer "{1:s}"...' ).format(str(neuron_indices), layer_name) saliency_matrix = ( gg_saliency_maps.get_saliency_maps_for_neuron_activation( model_object=model_object, layer_name=layer_name, neuron_indices=neuron_indices, list_of_input_matrices=[predictor_matrix], ideal_activation=ideal_activation)[0]) else: print('Computing saliency maps for channel {0:d} in layer "{1:s}"...' ).format(channel_index, layer_name) saliency_matrix = ( gg_saliency_maps.get_saliency_maps_for_channel_activation( model_object=model_object, layer_name=layer_name, channel_index=channel_index, list_of_input_matrices=[predictor_matrix], stat_function_for_neuron_activations=K.max, ideal_activation=ideal_activation)[0]) print 'Writing results to: "{0:s}"...'.format(output_file_name) ge_saliency_maps.write_file(pickle_file_name=output_file_name, normalized_predictor_matrix=predictor_matrix, saliency_matrix=saliency_matrix, model_file_name=model_file_name, component_type_string=component_type_string, target_class=target_class, layer_name=layer_name, ideal_activation=ideal_activation, neuron_indices=neuron_indices, channel_index=channel_index)
def _run(model_file_name, example_file_name, num_examples, example_indices, component_type_string, target_class, layer_name, ideal_activation, neuron_indices, channel_index, num_iterations, learning_rate, output_file_name): """Runs backwards optimization on a trained CNN. This is effectively the main method. :param model_file_name: See documentation at top of file. :param example_file_name: Same. :param num_examples: Same. :param example_indices: Same. :param component_type_string: Same. :param target_class: Same. :param layer_name: Same. :param ideal_activation: Same. :param neuron_indices: Same. :param channel_index: Same. :param num_iterations: Same. :param learning_rate: Same. :param output_file_name: Same. """ if num_examples <= 0: num_examples = None print 'Reading model from: "{0:s}"...'.format(model_file_name) model_object = traditional_cnn.read_keras_model(model_file_name) model_metafile_name = traditional_cnn.find_metafile( model_file_name=model_file_name) print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name) model_metadata_dict = traditional_cnn.read_model_metadata( model_metafile_name) print 'Reading normalized examples from: "{0:s}"...'.format( example_file_name) example_dict = trainval_io.read_downsized_3d_examples( netcdf_file_name=example_file_name, predictor_names_to_keep=model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY], num_half_rows_to_keep=model_metadata_dict[ traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY], num_half_columns_to_keep=model_metadata_dict[ traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY]) predictor_matrix = example_dict[trainval_io.PREDICTOR_MATRIX_KEY] if num_examples is None: error_checking.assert_is_geq_numpy_array(example_indices, 0) num_examples = len(example_indices) else: error_checking.assert_is_greater(num_examples, 0) num_examples_total = predictor_matrix.shape[0] example_indices = numpy.linspace(0, num_examples_total - 1, num=num_examples_total, dtype=int) num_examples = min([num_examples, num_examples_total]) example_indices = numpy.random.choice(example_indices, size=num_examples, replace=False) predictor_matrix = predictor_matrix[example_indices, ...] optimized_predictor_matrix = numpy.full(predictor_matrix.shape, numpy.nan) print SEPARATOR_STRING for i in range(num_examples): if component_type_string == CLASS_COMPONENT_TYPE_STRING: print( 'Optimizing {0:d}th of {1:d} images for target class {2:d}...' ).format(i + 1, num_examples, target_class) optimized_predictor_matrix[i, ...] = ( backwards_opt.optimize_input_for_class( model_object=model_object, target_class=target_class, init_function_or_matrices=[predictor_matrix[[i], ...]], num_iterations=num_iterations, learning_rate=learning_rate)[0]) elif component_type_string == NEURON_COMPONENT_TYPE_STRING: print( 'Optimizing {0:d}th of {1:d} images for neuron {2:s} in layer ' '"{3:s}"...').format(i + 1, num_examples, str(neuron_indices), layer_name) optimized_predictor_matrix[i, ...] = ( backwards_opt.optimize_input_for_neuron( model_object=model_object, layer_name=layer_name, neuron_indices=neuron_indices, init_function_or_matrices=[predictor_matrix[[i], ...]], num_iterations=num_iterations, learning_rate=learning_rate, ideal_activation=ideal_activation)[0]) else: print( 'Optimizing {0:d}th of {1:d} images for channel {2:d} in layer ' '"{3:s}"...').format(i + 1, num_examples, channel_index, layer_name) optimized_predictor_matrix[i, ...] = ( backwards_opt.optimize_input_for_channel( model_object=model_object, layer_name=layer_name, channel_index=channel_index, init_function_or_matrices=[predictor_matrix[[i], ...]], stat_function_for_neuron_activations=K.max, num_iterations=num_iterations, learning_rate=learning_rate, ideal_activation=ideal_activation)[0]) print SEPARATOR_STRING print 'Writing results to: "{0:s}"...'.format(output_file_name) backwards_opt.write_results( pickle_file_name=output_file_name, list_of_optimized_input_matrices=[optimized_predictor_matrix], model_file_name=model_file_name, init_function_name_or_matrices=[predictor_matrix], num_iterations=num_iterations, learning_rate=learning_rate, component_type_string=component_type_string, target_class=target_class, layer_name=layer_name, neuron_indices=neuron_indices, channel_index=channel_index, ideal_activation=ideal_activation)
def _run(input_file_name, predictor_colour_map_name, min_colour_prctile_for_predictors, max_colour_prctile_for_predictors, saliency_colour_map_name, max_colour_prctile_for_saliency, saliency_contour_line_width, num_saliency_contours, output_dir_name): """Plots saliency maps. This is effectively the main method. :param input_file_name: See documentation at top of file. :param predictor_colour_map_name: Same. :param min_colour_prctile_for_predictors: Same. :param max_colour_prctile_for_predictors: Same. :param saliency_colour_map_name: Same. :param max_colour_prctile_for_saliency: Same. :param saliency_contour_line_width: Same. :param num_saliency_contours: Same. :param output_dir_name: Same. """ file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) error_checking.assert_is_geq(min_colour_prctile_for_predictors, 0.) error_checking.assert_is_leq(max_colour_prctile_for_predictors, 100.) error_checking.assert_is_greater(max_colour_prctile_for_predictors, min_colour_prctile_for_predictors) error_checking.assert_is_geq(max_colour_prctile_for_saliency, 0.) error_checking.assert_is_leq(max_colour_prctile_for_saliency, 100.) error_checking.assert_is_geq(num_saliency_contours, 2) num_saliency_contours = 1 + int( number_rounding.floor_to_nearest(num_saliency_contours, 2)) half_num_saliency_contours = (num_saliency_contours - 1) / 2 predictor_colour_map_object = pyplot.cm.get_cmap(predictor_colour_map_name) saliency_colour_map_object = pyplot.cm.get_cmap(saliency_colour_map_name) print 'Reading data from: "{0:s}"...'.format(input_file_name) predictor_matrix, saliency_matrix, saliency_metadata_dict = ( saliency_maps.read_file(input_file_name)) model_metafile_name = traditional_cnn.find_metafile( model_file_name=saliency_metadata_dict[ saliency_maps.MODEL_FILE_NAME_KEY]) print 'Reading metadata from: "{0:s}"...'.format(model_metafile_name) model_metadata_dict = traditional_cnn.read_model_metadata( model_metafile_name) narr_predictor_names = model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY] num_predictors = len(narr_predictor_names) num_examples = predictor_matrix.shape[0] for i in range(num_examples): this_min_cval_by_predictor = numpy.full(num_predictors, numpy.nan) this_max_cval_by_predictor = this_min_cval_by_predictor + 0. for k in range(num_predictors): this_min_cval_by_predictor[k] = numpy.percentile( predictor_matrix[i, ..., k], min_colour_prctile_for_predictors) this_max_cval_by_predictor[k] = numpy.percentile( predictor_matrix[i, ..., k], max_colour_prctile_for_predictors) _, these_axes_objects = example_plotting.plot_many_predictors_sans_barbs( predictor_matrix=predictor_matrix[i, ...], predictor_names=narr_predictor_names, cmap_object_by_predictor=[predictor_colour_map_object] * num_predictors, min_colour_value_by_predictor=this_min_cval_by_predictor, max_colour_value_by_predictor=this_max_cval_by_predictor) this_max_abs_contour_level = numpy.percentile( numpy.absolute(saliency_matrix[i, ...]), max_colour_prctile_for_saliency) this_contour_interval = (this_max_abs_contour_level / half_num_saliency_contours) saliency_plotting.plot_many_2d_grids( saliency_matrix_3d=saliency_matrix[i, ...], axes_objects_2d_list=these_axes_objects, colour_map_object=saliency_colour_map_object, max_absolute_contour_level=this_max_abs_contour_level, contour_interval=this_contour_interval, line_width=saliency_contour_line_width) this_figure_file_name = '{0:s}/example{1:06d}_saliency.jpg'.format( output_dir_name, i) print 'Saving figure to: "{0:s}"...'.format(this_figure_file_name) pyplot.savefig(this_figure_file_name, dpi=FIGURE_RESOLUTION_DPI) pyplot.close()
def _run(upconvnet_file_name, example_file_name, num_examples, example_indices, top_output_dir_name): """Applies upconvnet to one or more examples. This is effectively the main method. :param upconvnet_file_name: See documentation at top of file. :param example_file_name: Same. :param num_examples: Same. :param example_indices: Same. :param top_output_dir_name: Same. """ # Check input args. if num_examples <= 0: num_examples = None if num_examples is None: error_checking.assert_is_geq_numpy_array(example_indices, 0) else: error_checking.assert_is_greater(num_examples, 0) # Read upconvnet and metadata. ucn_metafile_name = traditional_cnn.find_metafile( model_file_name=upconvnet_file_name, raise_error_if_missing=True) print('Reading trained upconvnet from: "{0:s}"...'.format( upconvnet_file_name)) ucn_model_object = traditional_cnn.read_keras_model(upconvnet_file_name) print('Reading upconvnet metadata from: "{0:s}"...'.format( ucn_metafile_name)) ucn_metadata_dict = upconvnet.read_model_metadata(ucn_metafile_name) # Read CNN and metadata. cnn_file_name = ucn_metadata_dict[upconvnet.CNN_FILE_NAME_KEY] cnn_metafile_name = traditional_cnn.find_metafile( model_file_name=cnn_file_name, raise_error_if_missing=True) print 'Reading trained CNN from: "{0:s}"...'.format(cnn_file_name) cnn_model_object = traditional_cnn.read_keras_model(cnn_file_name) print 'Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name) cnn_metadata_dict = traditional_cnn.read_model_metadata(cnn_metafile_name) print SEPARATOR_STRING actual_image_matrix = _read_input_examples( example_file_name=example_file_name, cnn_metadata_dict=cnn_metadata_dict, num_examples=num_examples, example_indices=example_indices) print SEPARATOR_STRING reconstructed_image_matrix = upconvnet.apply_upconvnet( actual_image_matrix=actual_image_matrix, cnn_model_object=cnn_model_object, ucn_model_object=ucn_model_object) print SEPARATOR_STRING _plot_examples(actual_image_matrix=actual_image_matrix, reconstructed_image_matrix=reconstructed_image_matrix, narr_predictor_names=cnn_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY], top_output_dir_name=top_output_dir_name)
def _run(model_file_name, example_file_name, num_examples, example_indices, layer_names, top_output_dir_name): """Plots feature maps for each example and CNN layer. This is effectively the main method. :param model_file_name: See documentation at top of file. :param example_file_name: Same. :param num_examples: Same. :param example_indices: Same. :param layer_names: Same. :param top_output_dir_name: Same. """ if num_examples <= 0: num_examples = None if num_examples is None: error_checking.assert_is_geq_numpy_array(example_indices, 0) else: error_checking.assert_is_greater(num_examples, 0) print 'Reading model from: "{0:s}"...'.format(model_file_name) model_object = traditional_cnn.read_keras_model(model_file_name) model_metafile_name = traditional_cnn.find_metafile( model_file_name=model_file_name) print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name) model_metadata_dict = traditional_cnn.read_model_metadata( model_metafile_name) print 'Reading normalized examples from: "{0:s}"...'.format( example_file_name) example_dict = trainval_io.read_downsized_3d_examples( netcdf_file_name=example_file_name, predictor_names_to_keep=model_metadata_dict[ traditional_cnn.NARR_PREDICTOR_NAMES_KEY], num_half_rows_to_keep=model_metadata_dict[ traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY], num_half_columns_to_keep=model_metadata_dict[ traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY]) print SEPARATOR_STRING predictor_matrix = example_dict[trainval_io.PREDICTOR_MATRIX_KEY] if num_examples is not None: num_examples_total = predictor_matrix.shape[0] example_indices = numpy.linspace(0, num_examples_total - 1, num=num_examples_total, dtype=int) num_examples = min([num_examples, num_examples_total]) example_indices = numpy.random.choice(example_indices, size=num_examples, replace=False) predictor_matrix = predictor_matrix[example_indices, ...] num_examples = predictor_matrix.shape[0] num_layers = len(layer_names) feature_matrix_by_layer = [None] * num_layers for k in range(num_layers): print 'Creating feature maps for layer "{0:s}"...'.format( layer_names[k]) this_partial_model_object = cnn.model_to_feature_generator( model_object=model_object, feature_layer_name=layer_names[k]) feature_matrix_by_layer[k] = this_partial_model_object.predict( predictor_matrix, batch_size=num_examples) print SEPARATOR_STRING for k in range(num_layers): this_output_dir_name = '{0:s}/{1:s}'.format(top_output_dir_name, layer_names[k]) file_system_utils.mkdir_recursive_if_necessary( directory_name=this_output_dir_name) _plot_feature_maps_one_layer(feature_matrix=feature_matrix_by_layer[k], layer_name=layer_names[k], output_dir_name=this_output_dir_name) print SEPARATOR_STRING