def _run(example_file_name, output_dir_name): """Plots predictors and targets for one example. This is effectively the main method. :param example_file_name: See documentation at top of file. :param output_dir_name: Same. """ print('Reading data from: "{0:s}"...'.format(example_file_name)) example_dict = example_io.read_file(example_file_name) cloud_layer_counts = example_utils.find_cloud_layers( example_dict=example_dict, min_path_kg_m02=MIN_CLOUD_LAYER_PATH_KG_M02, for_ice=False)[1] desired_indices = numpy.where(cloud_layer_counts > 1)[0] example_dict = example_utils.subset_by_index( example_dict=example_dict, desired_indices=desired_indices) liquid_water_paths_kg_m02 = example_utils.get_field_from_dict( example_dict=example_dict, field_name=example_utils.LIQUID_WATER_PATH_NAME, height_m_agl=10.) sort_indices = numpy.argsort(-1 * liquid_water_paths_kg_m02) desired_index = sort_indices[10] _do_plotting(example_dict=example_dict, example_index=desired_index, output_dir_name=output_dir_name)
def _run(tropical_example_dir_name, non_tropical_example_dir_name, num_histogram_bins, output_dir_name): """Plots distribution of each target variable. This is effectively the main method. :param tropical_example_dir_name: See documentation at top of file. :param non_tropical_example_dir_name: Same. :param num_histogram_bins: Same. :param output_dir_name: Same. """ file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) first_time_unix_sec = ( time_conversion.first_and_last_times_in_year(FIRST_YEAR)[0]) last_time_unix_sec = ( time_conversion.first_and_last_times_in_year(LAST_YEAR)[-1]) example_file_names = example_io.find_many_files( directory_name=tropical_example_dir_name, first_time_unix_sec=first_time_unix_sec, last_time_unix_sec=last_time_unix_sec, raise_error_if_all_missing=True, raise_error_if_any_missing=True) example_file_names += example_io.find_many_files( directory_name=non_tropical_example_dir_name, first_time_unix_sec=first_time_unix_sec, last_time_unix_sec=last_time_unix_sec, raise_error_if_all_missing=True, raise_error_if_any_missing=True) example_dicts = [] for this_file_name in example_file_names: print('Reading data from: "{0:s}"...'.format(this_file_name)) this_example_dict = example_io.read_file(this_file_name) this_example_dict = example_utils.subset_by_field( example_dict=this_example_dict, field_names=TARGET_NAMES_IN_FILE) example_dicts.append(this_example_dict) example_dict = example_utils.concat_examples(example_dicts) del example_dicts letter_label = None panel_file_names = [] for this_target_name in TARGET_NAMES: if this_target_name in TARGET_NAMES_IN_FILE: these_target_values = example_utils.get_field_from_dict( example_dict=example_dict, field_name=this_target_name) else: down_fluxes_w_m02 = example_utils.get_field_from_dict( example_dict=example_dict, field_name=example_utils.SHORTWAVE_SURFACE_DOWN_FLUX_NAME) up_fluxes_w_m02 = example_utils.get_field_from_dict( example_dict=example_dict, field_name=example_utils.SHORTWAVE_TOA_UP_FLUX_NAME) these_target_values = down_fluxes_w_m02 - up_fluxes_w_m02 these_target_values = numpy.ravel(these_target_values) if letter_label is None: letter_label = 'a' else: letter_label = chr(ord(letter_label) + 1) this_file_name = _plot_histogram_one_target( target_values=these_target_values, target_name=this_target_name, num_bins=num_histogram_bins, letter_label=letter_label, output_dir_name=output_dir_name) panel_file_names.append(this_file_name) concat_file_name = '{0:s}/target_distributions.jpg'.format(output_dir_name) print('Concatenating panels to: "{0:s}"...'.format(concat_file_name)) imagemagick_utils.concatenate_images(input_file_names=panel_file_names, output_file_name=concat_file_name, num_panel_rows=2, num_panel_columns=2, border_width_pixels=25) imagemagick_utils.trim_whitespace(input_file_name=concat_file_name, output_file_name=concat_file_name)
def _run(model_file_name, example_dir_name, first_time_string, last_time_string, exclude_summit_greenland, output_file_name): """Applies trained neural net in inference mode. This is effectively the main method. :param model_file_name: See documentation at top of file. :param example_dir_name: Same. :param first_time_string: Same. :param last_time_string: Same. :param exclude_summit_greenland: Same. :param output_file_name: Same. """ first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, TIME_FORMAT) print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = neural_net.read_model(model_file_name) metafile_name = neural_net.find_metafile( model_dir_name=os.path.split(model_file_name)[0], raise_error_if_missing=True) print('Reading metadata from: "{0:s}"...'.format(metafile_name)) metadata_dict = neural_net.read_metafile(metafile_name) generator_option_dict = copy.deepcopy( metadata_dict[neural_net.TRAINING_OPTIONS_KEY]) generator_option_dict[neural_net.EXAMPLE_DIRECTORY_KEY] = example_dir_name generator_option_dict[neural_net.FIRST_TIME_KEY] = first_time_unix_sec generator_option_dict[neural_net.LAST_TIME_KEY] = last_time_unix_sec vector_target_norm_type_string = copy.deepcopy( generator_option_dict[neural_net.VECTOR_TARGET_NORM_TYPE_KEY]) scalar_target_norm_type_string = copy.deepcopy( generator_option_dict[neural_net.SCALAR_TARGET_NORM_TYPE_KEY]) generator_option_dict[neural_net.VECTOR_TARGET_NORM_TYPE_KEY] = None generator_option_dict[neural_net.SCALAR_TARGET_NORM_TYPE_KEY] = None net_type_string = metadata_dict[neural_net.NET_TYPE_KEY] predictor_matrix, target_array, example_id_strings = neural_net.create_data( option_dict=generator_option_dict, for_inference=True, net_type_string=net_type_string, exclude_summit_greenland=exclude_summit_greenland) print(SEPARATOR_STRING) exec_start_time_unix_sec = time.time() prediction_array = neural_net.apply_model( model_object=model_object, predictor_matrix=predictor_matrix, num_examples_per_batch=NUM_EXAMPLES_PER_BATCH, net_type_string=net_type_string, verbose=True) print(SEPARATOR_STRING) print('Time to apply neural net = {0:.4f} seconds'.format( time.time() - exec_start_time_unix_sec)) vector_target_matrix = target_array[0] vector_prediction_matrix = prediction_array[0] if len(target_array) == 2: scalar_target_matrix = target_array[1] scalar_prediction_matrix = prediction_array[1] else: scalar_target_matrix = None scalar_prediction_matrix = None target_example_dict = _targets_numpy_to_dict( scalar_target_matrix=scalar_target_matrix, vector_target_matrix=vector_target_matrix, model_metadata_dict=metadata_dict) prediction_example_dict = _targets_numpy_to_dict( scalar_target_matrix=scalar_prediction_matrix, vector_target_matrix=vector_prediction_matrix, model_metadata_dict=metadata_dict) normalization_file_name = ( generator_option_dict[neural_net.NORMALIZATION_FILE_KEY]) print(('Reading training examples (for normalization) from: "{0:s}"...' ).format(normalization_file_name)) training_example_dict = example_io.read_file(normalization_file_name) training_example_dict = example_utils.subset_by_height( example_dict=training_example_dict, heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY]) num_examples = len(example_id_strings) num_heights = len(prediction_example_dict[example_utils.HEIGHTS_KEY]) this_dict = { example_utils.VECTOR_PREDICTOR_NAMES_KEY: [], example_utils.VECTOR_PREDICTOR_VALS_KEY: numpy.full((num_examples, num_heights, 0), 0.), example_utils.SCALAR_PREDICTOR_NAMES_KEY: [], example_utils.SCALAR_PREDICTOR_VALS_KEY: numpy.full((num_examples, 0), 0.) } target_example_dict.update(this_dict) prediction_example_dict.update(this_dict) if vector_target_norm_type_string is not None: print('Denormalizing predicted vectors...') # down_flux_inc_matrix_w_m03 = example_utils.get_field_from_dict( # example_dict=prediction_example_dict, # field_name=example_utils.SHORTWAVE_DOWN_FLUX_INC_NAME # ) # print(down_flux_inc_matrix_w_m03[0, ...]) # print('\n') prediction_example_dict = normalization.denormalize_data( new_example_dict=prediction_example_dict, training_example_dict=training_example_dict, normalization_type_string=vector_target_norm_type_string, min_normalized_value=generator_option_dict[ neural_net.VECTOR_TARGET_MIN_VALUE_KEY], max_normalized_value=generator_option_dict[ neural_net.VECTOR_TARGET_MAX_VALUE_KEY], separate_heights=True, apply_to_predictors=False, apply_to_vector_targets=True, apply_to_scalar_targets=False) # down_flux_inc_matrix_w_m03 = example_utils.get_field_from_dict( # example_dict=prediction_example_dict, # field_name=example_utils.SHORTWAVE_DOWN_FLUX_INC_NAME # ) # print(down_flux_inc_matrix_w_m03[0, ...]) # print('\n\n\n') if scalar_target_norm_type_string is not None: print('Denormalizing predicted scalars...') prediction_example_dict = normalization.denormalize_data( new_example_dict=prediction_example_dict, training_example_dict=training_example_dict, normalization_type_string=scalar_target_norm_type_string, min_normalized_value=generator_option_dict[ neural_net.SCALAR_TARGET_MIN_VALUE_KEY], max_normalized_value=generator_option_dict[ neural_net.SCALAR_TARGET_MAX_VALUE_KEY], separate_heights=True, apply_to_predictors=False, apply_to_vector_targets=False, apply_to_scalar_targets=True) add_heating_rate = generator_option_dict[neural_net.OMIT_HEATING_RATE_KEY] if add_heating_rate: pressure_matrix_pascals = _get_unnormalized_pressure( model_metadata_dict=metadata_dict, example_id_strings=example_id_strings) prediction_example_dict = _get_predicted_heating_rates( prediction_example_dict=prediction_example_dict, pressure_matrix_pascals=pressure_matrix_pascals, model_metadata_dict=metadata_dict) vector_target_names = ( generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY]) if example_utils.SHORTWAVE_HEATING_RATE_NAME in vector_target_names: heating_rate_index = vector_target_names.index( example_utils.SHORTWAVE_HEATING_RATE_NAME) heights_m_agl = generator_option_dict[neural_net.HEIGHTS_KEY] height_indices = numpy.where( heights_m_agl >= ZERO_HEATING_HEIGHT_M_AGL)[0] vector_target_matrix = ( prediction_example_dict[example_utils.VECTOR_TARGET_VALS_KEY]) vector_target_matrix[..., heating_rate_index][..., height_indices] = 0. prediction_example_dict[example_utils.VECTOR_TARGET_VALS_KEY] = ( vector_target_matrix) all_heights_m_agl = generator_option_dict[neural_net.HEIGHTS_KEY] desired_heights_m_agl = ( all_heights_m_agl[all_heights_m_agl < MAX_HEIGHT_M_AGL]) target_example_dict = example_utils.subset_by_height( example_dict=target_example_dict, heights_m_agl=desired_heights_m_agl) prediction_example_dict = example_utils.subset_by_height( example_dict=prediction_example_dict, heights_m_agl=desired_heights_m_agl) print('Writing target (actual) and predicted values to: "{0:s}"...'.format( output_file_name)) prediction_io.write_file(netcdf_file_name=output_file_name, scalar_target_matrix=target_example_dict[ example_utils.SCALAR_TARGET_VALS_KEY], vector_target_matrix=target_example_dict[ example_utils.VECTOR_TARGET_VALS_KEY], scalar_prediction_matrix=prediction_example_dict[ example_utils.SCALAR_TARGET_VALS_KEY], vector_prediction_matrix=prediction_example_dict[ example_utils.VECTOR_TARGET_VALS_KEY], heights_m_agl=desired_heights_m_agl, example_id_strings=example_id_strings, model_file_name=model_file_name)
def _run(evaluation_file_names, line_styles, line_colour_strings, set_descriptions_verbose, confidence_level, use_log_scale, plot_by_height, output_dir_name): """Plots model evaluation. This is effectively the main method. :param evaluation_file_names: See documentation at top of file. :param line_styles: Same. :param line_colour_strings: Same. :param set_descriptions_verbose: Same. :param confidence_level: Same. :param use_log_scale: Same. :param plot_by_height: Same. :param output_dir_name: Same. """ # Check input args. file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) if confidence_level < 0: confidence_level = None if confidence_level is not None: error_checking.assert_is_geq(confidence_level, 0.9) error_checking.assert_is_less_than(confidence_level, 1.) num_evaluation_sets = len(evaluation_file_names) expected_dim = numpy.array([num_evaluation_sets], dtype=int) error_checking.assert_is_string_list(line_styles) error_checking.assert_is_numpy_array(numpy.array(line_styles), exact_dimensions=expected_dim) error_checking.assert_is_string_list(set_descriptions_verbose) error_checking.assert_is_numpy_array(numpy.array(set_descriptions_verbose), exact_dimensions=expected_dim) set_descriptions_verbose = [ s.replace('_', ' ') for s in set_descriptions_verbose ] set_descriptions_abbrev = [ s.lower().replace(' ', '-') for s in set_descriptions_verbose ] error_checking.assert_is_string_list(line_colour_strings) error_checking.assert_is_numpy_array(numpy.array(line_colour_strings), exact_dimensions=expected_dim) line_colours = [ numpy.fromstring(s, dtype=float, sep='_') / 255 for s in line_colour_strings ] for i in range(num_evaluation_sets): error_checking.assert_is_numpy_array(line_colours[i], exact_dimensions=numpy.array( [3], dtype=int)) error_checking.assert_is_geq_numpy_array(line_colours[i], 0.) error_checking.assert_is_leq_numpy_array(line_colours[i], 1.) # Read files. evaluation_tables_xarray = [xarray.Dataset()] * num_evaluation_sets prediction_dicts = [dict()] * num_evaluation_sets for i in range(num_evaluation_sets): print('Reading data from: "{0:s}"...'.format(evaluation_file_names[i])) evaluation_tables_xarray[i] = evaluation.read_file( evaluation_file_names[i]) this_prediction_file_name = ( evaluation_tables_xarray[i].attrs[evaluation.PREDICTION_FILE_KEY]) print( 'Reading data from: "{0:s}"...'.format(this_prediction_file_name)) prediction_dicts[i] = prediction_io.read_file( this_prediction_file_name) model_file_name = ( evaluation_tables_xarray[0].attrs[evaluation.MODEL_FILE_KEY]) model_metafile_name = neural_net.find_metafile( model_dir_name=os.path.split(model_file_name)[0], raise_error_if_missing=True) print('Reading metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = neural_net.read_metafile(model_metafile_name) generator_option_dict = model_metadata_dict[ neural_net.TRAINING_OPTIONS_KEY] scalar_target_names = ( generator_option_dict[neural_net.SCALAR_TARGET_NAMES_KEY]) vector_target_names = ( generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY]) heights_m_agl = generator_option_dict[neural_net.HEIGHTS_KEY] try: t = evaluation_tables_xarray[0] aux_target_names = t.coords[evaluation.AUX_TARGET_FIELD_DIM].values except: aux_target_names = [] num_scalar_targets = len(scalar_target_names) num_vector_targets = len(vector_target_names) num_heights = len(heights_m_agl) num_aux_targets = len(aux_target_names) example_dict = { example_utils.SCALAR_TARGET_NAMES_KEY: scalar_target_names, example_utils.VECTOR_TARGET_NAMES_KEY: vector_target_names, example_utils.HEIGHTS_KEY: heights_m_agl, example_utils.SCALAR_PREDICTOR_NAMES_KEY: generator_option_dict[neural_net.SCALAR_PREDICTOR_NAMES_KEY], example_utils.VECTOR_PREDICTOR_NAMES_KEY: generator_option_dict[neural_net.VECTOR_PREDICTOR_NAMES_KEY] } normalization_file_name = ( generator_option_dict[neural_net.NORMALIZATION_FILE_KEY]) print(('Reading training examples (for climatology) from: "{0:s}"...' ).format(normalization_file_name)) training_example_dict = example_io.read_file(normalization_file_name) training_example_dict = example_utils.subset_by_height( example_dict=training_example_dict, heights_m_agl=heights_m_agl) mean_training_example_dict = normalization.create_mean_example( new_example_dict=example_dict, training_example_dict=training_example_dict) print(SEPARATOR_STRING) # Do actual stuff. _plot_error_distributions( prediction_dicts=prediction_dicts, model_metadata_dict=model_metadata_dict, aux_target_names=aux_target_names, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, output_dir_name=output_dir_name) print(SEPARATOR_STRING) _plot_reliability_by_height( evaluation_tables_xarray=evaluation_tables_xarray, vector_target_names=vector_target_names, heights_m_agl=heights_m_agl, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, output_dir_name=output_dir_name) print(SEPARATOR_STRING) for k in range(num_vector_targets): for this_score_name in list(SCORE_NAME_TO_PROFILE_KEY.keys()): _plot_score_profile( evaluation_tables_xarray=evaluation_tables_xarray, line_styles=line_styles, line_colours=line_colours, set_descriptions_verbose=set_descriptions_verbose, confidence_level=confidence_level, target_name=vector_target_names[k], score_name=this_score_name, use_log_scale=use_log_scale, output_dir_name=output_dir_name) print(SEPARATOR_STRING) for k in range(num_scalar_targets): _plot_attributes_diagram( evaluation_tables_xarray=evaluation_tables_xarray, line_styles=line_styles, line_colours=line_colours, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, confidence_level=confidence_level, mean_training_example_dict=mean_training_example_dict, target_name=scalar_target_names[k], output_dir_name=output_dir_name) for k in range(num_aux_targets): _plot_attributes_diagram( evaluation_tables_xarray=evaluation_tables_xarray, line_styles=line_styles, line_colours=line_colours, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, confidence_level=confidence_level, mean_training_example_dict=mean_training_example_dict, target_name=aux_target_names[k], output_dir_name=output_dir_name) if not plot_by_height: return print(SEPARATOR_STRING) for k in range(num_vector_targets): for j in range(num_heights): _plot_attributes_diagram( evaluation_tables_xarray=evaluation_tables_xarray, line_styles=line_styles, line_colours=line_colours, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, confidence_level=confidence_level, mean_training_example_dict=mean_training_example_dict, height_m_agl=heights_m_agl[j], target_name=vector_target_names[k], output_dir_name=output_dir_name) if k != num_vector_targets - 1: print(SEPARATOR_STRING)
def _run(example_file_name, num_examples, choose_max_heating_rate, max_noise_k_day01, pressure_cutoffs_pa, pressure_spacings_pa, first_interp_method_name, second_interp_method_name, interp_fluxes, output_dir_name): """Runs interpolation experiment. This is effectively the main method. :param example_file_name: See documentation at top of file. :param num_examples: Same. :param choose_max_heating_rate: Same. :param max_noise_k_day01: Same. :param pressure_cutoffs_pa: Same. :param pressure_spacings_pa: Same. :param first_interp_method_name: Same. :param second_interp_method_name: Same. :param interp_fluxes: Same. :param output_dir_name: Same. """ if interp_fluxes: max_noise_k_day01 = 0. error_checking.assert_is_greater(num_examples, 0) error_checking.assert_is_geq(max_noise_k_day01, 0.) error_checking.assert_is_geq_numpy_array(pressure_cutoffs_pa, 0.) error_checking.assert_is_greater_numpy_array( numpy.diff(pressure_cutoffs_pa), 0.) error_checking.assert_is_greater_numpy_array(pressure_spacings_pa, 0.) num_spacings = len(pressure_spacings_pa) expected_dim = numpy.array([num_spacings + 1], dtype=int) error_checking.assert_is_numpy_array(pressure_cutoffs_pa, exact_dimensions=expected_dim) high_res_pressures_pa = numpy.array([], dtype=float) for i in range(num_spacings): this_num_pressures = int( numpy.ceil(1 + (pressure_cutoffs_pa[i + 1] - pressure_cutoffs_pa[i]) / pressure_spacings_pa[i])) these_pressures_pa = numpy.linspace(pressure_cutoffs_pa[i], pressure_cutoffs_pa[i + 1], num=this_num_pressures, dtype=float) if i != num_spacings - 1: these_pressures_pa = these_pressures_pa[:-1] high_res_pressures_pa = numpy.concatenate( (high_res_pressures_pa, these_pressures_pa)) print('Number of levels in high-resolution grid = {0:d}'.format( len(high_res_pressures_pa))) if high_res_pressures_pa[0] < TOLERANCE: high_res_pressures_pa[0] = 0.5 * high_res_pressures_pa[1] high_res_pressures_pa = high_res_pressures_pa[::-1] high_res_heights_m_asl = standard_atmo.pressure_to_height( high_res_pressures_pa) file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) print('Reading data from: "{0:s}"...'.format(example_file_name)) example_dict = example_io.read_file(example_file_name) heating_rate_matrix_k_day01 = example_utils.get_field_from_dict( example_dict=example_dict, field_name=example_utils.SHORTWAVE_HEATING_RATE_NAME) if choose_max_heating_rate: hr_criterion_by_example = numpy.max(heating_rate_matrix_k_day01, axis=1) else: abs_diff_matrix = numpy.absolute( numpy.diff(heating_rate_matrix_k_day01[:, :-1], axis=1)) hr_criterion_by_example = numpy.max(abs_diff_matrix, axis=1) good_indices = numpy.argsort(-1 * hr_criterion_by_example) good_indices = good_indices[:num_examples] example_dict = example_utils.subset_by_index(example_dict=example_dict, desired_indices=good_indices) num_examples = len(good_indices) max_differences_k_day01 = numpy.full(num_examples, numpy.nan) for i in range(num_examples): max_differences_k_day01[i] = _run_experiment_one_example( example_dict=example_dict, example_index=i, max_noise_k_day01=max_noise_k_day01, high_res_pressures_pa=high_res_pressures_pa, high_res_heights_m_asl=high_res_heights_m_asl, first_interp_method_name=first_interp_method_name, second_interp_method_name=second_interp_method_name, interp_fluxes=interp_fluxes, output_dir_name=output_dir_name) print('Average max difference = {0:.4f} K day^-1'.format( numpy.mean(max_differences_k_day01))) print('Median max difference = {0:.4f} K day^-1'.format( numpy.median(max_differences_k_day01))) print('Max max difference = {0:.4f} K day^-1'.format( numpy.max(max_differences_k_day01)))
def _run(model_file_name, example_file_name, num_examples, example_dir_name, example_id_file_name, layer_name, neuron_indices, ideal_activation, num_iterations, learning_rate, l2_weight, output_file_name): """Runs backwards optimization. This is effectively the main method. :param model_file_name: See documentation at top of file. :param example_file_name: Same. :param num_examples: Same. :param example_dir_name: Same. :param example_id_file_name: Same. :param layer_name: Same. :param neuron_indices: Same. :param ideal_activation: Same. :param num_iterations: Same. :param learning_rate: Same. :param l2_weight: Same. :param output_file_name: Same. """ print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = neural_net.read_model(model_file_name) metafile_name = neural_net.find_metafile( model_dir_name=os.path.split(model_file_name)[0], raise_error_if_missing=True ) print('Reading metadata from: "{0:s}"...'.format(metafile_name)) metadata_dict = neural_net.read_metafile(metafile_name) predictor_matrix, _, example_id_strings = ( misc_utils.get_examples_for_inference( model_metadata_dict=metadata_dict, example_file_name=example_file_name, num_examples=num_examples, example_dir_name=example_dir_name, example_id_file_name=example_id_file_name ) ) print(SEPARATOR_STRING) generator_option_dict = metadata_dict[neural_net.TRAINING_OPTIONS_KEY] normalization_file_name = ( generator_option_dict[neural_net.NORMALIZATION_FILE_KEY] ) print(( 'Reading training examples (for normalization) from: "{0:s}"...' ).format( normalization_file_name )) training_example_dict = example_io.read_file(normalization_file_name) training_example_dict = example_utils.subset_by_height( example_dict=training_example_dict, heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY] ) num_examples = len(example_id_strings) bwo_dict = None for i in range(num_examples): this_bwo_dict = bwo.optimize_input_for_neuron( model_object=model_object, init_function_or_matrix=predictor_matrix[i, ...], layer_name=layer_name, neuron_indices=neuron_indices, ideal_activation=ideal_activation, num_iterations=num_iterations, learning_rate=learning_rate, l2_weight=l2_weight ) if i == num_examples - 1: print(SEPARATOR_STRING) else: print(MINOR_SEPARATOR_STRING) if bwo_dict is None: these_dim = numpy.array( (num_examples,) + this_bwo_dict[bwo.INITIAL_PREDICTORS_KEY].shape[1:], dtype=int ) bwo_dict = { bwo.INITIAL_PREDICTORS_KEY: numpy.full(these_dim, numpy.nan), bwo.FINAL_PREDICTORS_KEY: numpy.full(these_dim, numpy.nan), bwo.INITIAL_ACTIVATIONS_KEY: numpy.full(num_examples, numpy.nan), bwo.FINAL_ACTIVATIONS_KEY: numpy.full(num_examples, numpy.nan) } bwo_dict[bwo.INITIAL_PREDICTORS_KEY][i, ...] = ( this_bwo_dict[bwo.INITIAL_PREDICTORS_KEY][0, ...] ) bwo_dict[bwo.FINAL_PREDICTORS_KEY][i, ...] = ( this_bwo_dict[bwo.FINAL_PREDICTORS_KEY][0, ...] ) bwo_dict[bwo.INITIAL_ACTIVATIONS_KEY][i] = ( this_bwo_dict[bwo.INITIAL_ACTIVATION_KEY] ) bwo_dict[bwo.FINAL_ACTIVATIONS_KEY][i] = ( this_bwo_dict[bwo.FINAL_ACTIVATION_KEY] ) if example_file_name == '': example_file_name = example_io.find_many_files( directory_name=example_dir_name, first_time_unix_sec=0, last_time_unix_sec=int(1e12), raise_error_if_any_missing=False, raise_error_if_all_missing=True )[0] first_example_dict = example_io.read_file(example_file_name) first_example_dict = example_utils.subset_by_height( example_dict=first_example_dict, heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY] ) net_type_string = metadata_dict[neural_net.NET_TYPE_KEY] init_example_dict = copy.deepcopy(first_example_dict) this_example_dict = neural_net.predictors_numpy_to_dict( predictor_matrix=bwo_dict[bwo.INITIAL_PREDICTORS_KEY], example_dict=init_example_dict, net_type_string=net_type_string ) init_example_dict.update(this_example_dict) if generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY] is not None: init_example_dict = normalization.denormalize_data( new_example_dict=init_example_dict, training_example_dict=training_example_dict, normalization_type_string= generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY], min_normalized_value= generator_option_dict[neural_net.PREDICTOR_MIN_NORM_VALUE_KEY], max_normalized_value= generator_option_dict[neural_net.PREDICTOR_MAX_NORM_VALUE_KEY], separate_heights=True, apply_to_predictors=True, apply_to_vector_targets=False, apply_to_scalar_targets=False ) init_scalar_predictor_matrix = ( init_example_dict[example_utils.SCALAR_PREDICTOR_VALS_KEY] ) init_vector_predictor_matrix = ( init_example_dict[example_utils.VECTOR_PREDICTOR_VALS_KEY] ) final_example_dict = copy.deepcopy(first_example_dict) this_example_dict = neural_net.predictors_numpy_to_dict( predictor_matrix=bwo_dict[bwo.FINAL_PREDICTORS_KEY], example_dict=final_example_dict, net_type_string=net_type_string ) final_example_dict.update(this_example_dict) if generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY] is not None: final_example_dict = normalization.denormalize_data( new_example_dict=final_example_dict, training_example_dict=training_example_dict, normalization_type_string= generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY], min_normalized_value= generator_option_dict[neural_net.PREDICTOR_MIN_NORM_VALUE_KEY], max_normalized_value= generator_option_dict[neural_net.PREDICTOR_MAX_NORM_VALUE_KEY], separate_heights=True, apply_to_predictors=True, apply_to_vector_targets=False, apply_to_scalar_targets=False ) final_scalar_predictor_matrix = ( final_example_dict[example_utils.SCALAR_PREDICTOR_VALS_KEY] ) final_vector_predictor_matrix = ( final_example_dict[example_utils.VECTOR_PREDICTOR_VALS_KEY] ) print('Writing results to file: "{0:s}"...'.format(output_file_name)) bwo.write_file( netcdf_file_name=output_file_name, init_scalar_predictor_matrix=init_scalar_predictor_matrix, final_scalar_predictor_matrix=final_scalar_predictor_matrix, init_vector_predictor_matrix=init_vector_predictor_matrix, final_vector_predictor_matrix=final_vector_predictor_matrix, initial_activations=bwo_dict[bwo.INITIAL_ACTIVATIONS_KEY], final_activations=bwo_dict[bwo.FINAL_ACTIVATIONS_KEY], example_id_strings=example_id_strings, model_file_name=model_file_name, layer_name=layer_name, neuron_indices=neuron_indices, ideal_activation=ideal_activation, num_iterations=num_iterations, learning_rate=learning_rate, l2_weight=l2_weight )
def _run(tropical_example_dir_name, non_tropical_example_dir_name, output_file_name): """Plots all sites wtih data. This is effectively the main method. :param tropical_example_dir_name: See documentation at top of file. :param non_tropical_example_dir_name: Same. :param output_file_name: Same. """ first_time_unix_sec = ( time_conversion.first_and_last_times_in_year(FIRST_YEAR)[0]) last_time_unix_sec = ( time_conversion.first_and_last_times_in_year(LAST_YEAR)[-1]) tropical_file_names = example_io.find_many_files( directory_name=tropical_example_dir_name, first_time_unix_sec=first_time_unix_sec, last_time_unix_sec=last_time_unix_sec, raise_error_if_all_missing=True, raise_error_if_any_missing=False) non_tropical_file_names = example_io.find_many_files( directory_name=non_tropical_example_dir_name, first_time_unix_sec=first_time_unix_sec, last_time_unix_sec=last_time_unix_sec, raise_error_if_all_missing=True, raise_error_if_any_missing=False) latitudes_deg_n = numpy.array([]) longitudes_deg_e = numpy.array([]) for this_file_name in tropical_file_names: print('Reading data from: "{0:s}"...'.format(this_file_name)) this_example_dict = example_io.read_file(this_file_name) these_latitudes_deg_n = example_utils.get_field_from_dict( example_dict=this_example_dict, field_name=example_utils.LATITUDE_NAME) these_longitudes_deg_e = example_utils.get_field_from_dict( example_dict=this_example_dict, field_name=example_utils.LONGITUDE_NAME) latitudes_deg_n = numpy.concatenate( (latitudes_deg_n, these_latitudes_deg_n)) longitudes_deg_e = numpy.concatenate( (longitudes_deg_e, these_longitudes_deg_e)) for this_file_name in non_tropical_file_names: print('Reading data from: "{0:s}"...'.format(this_file_name)) this_example_dict = example_io.read_file(this_file_name) these_latitudes_deg_n = example_utils.get_field_from_dict( example_dict=this_example_dict, field_name=example_utils.LATITUDE_NAME) these_longitudes_deg_e = example_utils.get_field_from_dict( example_dict=this_example_dict, field_name=example_utils.LONGITUDE_NAME) latitudes_deg_n = numpy.concatenate( (latitudes_deg_n, these_latitudes_deg_n)) longitudes_deg_e = numpy.concatenate( (longitudes_deg_e, these_longitudes_deg_e)) coord_matrix = numpy.transpose( numpy.vstack((latitudes_deg_n, longitudes_deg_e))) coord_matrix = number_rounding.round_to_nearest(coord_matrix, LATLNG_TOLERANCE_DEG) coord_matrix = numpy.unique(coord_matrix, axis=0) latitudes_deg_n = coord_matrix[:, 0] longitudes_deg_e = coord_matrix[:, 1] figure_object, axes_object, basemap_object = ( plotting_utils.create_equidist_cylindrical_map( min_latitude_deg=MIN_PLOT_LATITUDE_DEG_N, max_latitude_deg=MAX_PLOT_LATITUDE_DEG_N, min_longitude_deg=MIN_PLOT_LONGITUDE_DEG_E, max_longitude_deg=MAX_PLOT_LONGITUDE_DEG_E, resolution_string='l')) plotting_utils.plot_coastlines(basemap_object=basemap_object, axes_object=axes_object, line_colour=BORDER_COLOUR, line_width=BORDER_WIDTH) plotting_utils.plot_countries(basemap_object=basemap_object, axes_object=axes_object, line_colour=BORDER_COLOUR, line_width=BORDER_WIDTH) plotting_utils.plot_parallels(basemap_object=basemap_object, axes_object=axes_object, num_parallels=NUM_PARALLELS, line_colour=GRID_LINE_COLOUR, line_width=GRID_LINE_WIDTH, font_size=FONT_SIZE) plotting_utils.plot_meridians(basemap_object=basemap_object, axes_object=axes_object, num_meridians=NUM_MERIDIANS, line_colour=GRID_LINE_COLOUR, line_width=GRID_LINE_WIDTH, font_size=FONT_SIZE) arctic_indices = numpy.where(latitudes_deg_n >= 66.5)[0] print(len(arctic_indices)) arctic_x_coords, arctic_y_coords = basemap_object( longitudes_deg_e[arctic_indices], latitudes_deg_n[arctic_indices]) axes_object.plot(arctic_x_coords, arctic_y_coords, linestyle='None', marker=MARKER_TYPE, markersize=MARKER_SIZE, markeredgewidth=0, markerfacecolor=ARCTIC_COLOUR, markeredgecolor=ARCTIC_COLOUR) mid_latitude_indices = numpy.where( numpy.logical_and(latitudes_deg_n >= 30., latitudes_deg_n < 66.5))[0] print(len(mid_latitude_indices)) mid_latitude_x_coords, mid_latitude_y_coords = basemap_object( longitudes_deg_e[mid_latitude_indices], latitudes_deg_n[mid_latitude_indices]) axes_object.plot(mid_latitude_x_coords, mid_latitude_y_coords, linestyle='None', marker=MARKER_TYPE, markersize=MARKER_SIZE, markeredgewidth=0, markerfacecolor=MID_LATITUDE_COLOUR, markeredgecolor=MID_LATITUDE_COLOUR) tropical_indices = numpy.where(latitudes_deg_n < 30.)[0] print(len(tropical_indices)) tropical_x_coords, tropical_y_coords = basemap_object( longitudes_deg_e[tropical_indices], latitudes_deg_n[tropical_indices]) axes_object.plot(tropical_x_coords, tropical_y_coords, linestyle='None', marker=MARKER_TYPE, markersize=MARKER_SIZE, markeredgewidth=0, markerfacecolor=TROPICAL_COLOUR, markeredgecolor=TROPICAL_COLOUR) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) print('Saving figure to: "{0:s}"...'.format(output_file_name)) figure_object.savefig(output_file_name, dpi=FIGURE_RESOLUTION_DPI, pad_inches=0, bbox_inches='tight') pyplot.close(figure_object)
def get_raw_examples(example_file_name, num_examples, example_dir_name, example_id_file_name): """Returns raw examples. The difference between `get_raw_examples` and `get_examples_for_inference` is that `get_raw_examples` returns examples in their raw form, *not* pre-processed to be fed through a model for inference. :param example_file_name: See doc for `get_examples_for_inference`. :param num_examples: Same. :param example_dir_name: Same. :param example_id_file_name: Same. :return: example_dict: See doc for `example_io.read_file`. """ error_checking.assert_is_string(example_file_name) use_specific_ids = example_file_name == '' if use_specific_ids: error_checking.assert_is_string(example_id_file_name) print('Reading desired example IDs from: "{0:s}"...'.format( example_id_file_name)) example_id_strings = read_example_ids_from_netcdf(example_id_file_name) valid_times_unix_sec = example_utils.parse_example_ids( example_id_strings)[example_utils.VALID_TIMES_KEY] example_file_names = example_io.find_many_files( directory_name=example_dir_name, first_time_unix_sec=numpy.min(valid_times_unix_sec), last_time_unix_sec=numpy.max(valid_times_unix_sec)) num_files = len(example_file_names) example_dicts = [dict()] * num_files for i in range(num_files): print('Reading data from: "{0:s}"...'.format( example_file_names[i])) example_dicts[i] = example_io.read_file(example_file_names[i]) example_dict = example_utils.concat_examples(example_dicts) good_indices = example_utils.find_examples( all_id_strings=example_dict[example_utils.EXAMPLE_IDS_KEY], desired_id_strings=example_id_strings, allow_missing=False) example_dict = example_utils.subset_by_index( example_dict=example_dict, desired_indices=good_indices) else: error_checking.assert_is_string(example_dir_name) error_checking.assert_is_integer(num_examples) error_checking.assert_is_greater(num_examples, 0) print('Reading data from: "{0:s}"...'.format(example_file_name)) example_dict = example_io.read_file(example_file_name) num_examples_total = len(example_dict[example_utils.VALID_TIMES_KEY]) desired_indices = numpy.linspace(0, num_examples_total - 1, num=num_examples_total, dtype=int) if num_examples < num_examples_total: desired_indices = numpy.random.choice(desired_indices, size=num_examples, replace=False) example_dict = example_utils.subset_by_index( example_dict=example_dict, desired_indices=desired_indices) return example_dict
def _run(example_dir_name, first_year, last_year, min_percentile_level, max_percentile_level, output_file_name): """Finds normalization parameters for radiative-transfer data. This is effectively the main method. :param example_dir_name: See documentation at top of file. :param first_year: Same. :param last_year: Same. :param min_percentile_level: Same. :param max_percentile_level: Same. :param output_file_name: Same. """ error_checking.assert_is_geq(last_year, first_year) years = numpy.linspace( first_year, last_year, num=last_year - first_year + 1, dtype=int ) num_years = len(years) example_file_names = [None] * num_years for i in range(num_years): example_file_names[i] = example_io.find_file( example_dir_name=example_dir_name, year=years[i], raise_error_if_missing=True ) this_example_dict = example_io.read_file(example_file_names[0]) heights_m_agl = numpy.round( this_example_dict[example_io.HEIGHTS_KEY] ).astype(int) orig_parameter_dict = { normalization_params.NUM_VALUES_KEY: 0, normalization_params.MEAN_VALUE_KEY: 0., normalization_params.MEAN_OF_SQUARES_KEY: 0. } field_names = example_io.PREDICTOR_NAMES + example_io.TARGET_NAMES z_score_dict_with_height = {} z_score_dict_no_height = {} frequency_dict_with_height = {} frequency_dict_no_height = {} for this_field_name in field_names: z_score_dict_no_height[this_field_name] = copy.deepcopy( orig_parameter_dict ) frequency_dict_no_height[this_field_name] = dict() for this_height_m_agl in heights_m_agl: z_score_dict_with_height[this_field_name, this_height_m_agl] = ( copy.deepcopy(orig_parameter_dict) ) frequency_dict_with_height[this_field_name, this_height_m_agl] = ( dict() ) for i in range(num_years): print('Reading data from: "{0:s}"...'.format(example_file_names[i])) this_example_dict = example_io.read_file(example_file_names[i]) for this_field_name in field_names: print('Updating normalization params for "{0:s}"...'.format( this_field_name )) this_data_matrix = example_io.get_field_from_dict( example_dict=this_example_dict, field_name=this_field_name, height_m_agl=None ) this_data_matrix = normalization.convert_to_log_if_necessary( physical_values=this_data_matrix, field_name=this_field_name ) z_score_dict_no_height[this_field_name] = ( normalization_params.update_z_score_params( z_score_param_dict=z_score_dict_no_height[this_field_name], new_data_matrix=this_data_matrix ) ) frequency_dict_no_height[this_field_name] = ( normalization_params.update_frequency_dict( frequency_dict=frequency_dict_no_height[this_field_name], new_data_matrix=this_data_matrix, rounding_base=FIELD_TO_ROUNDING_BASE_SCALAR[this_field_name] ) ) for this_height_m_agl in heights_m_agl: # TODO(thunderhoser): Could probably speed up code by not doing # this shit for scalar fields. if this_field_name in SCALAR_FIELD_NAMES: z_score_dict_with_height[ this_field_name, this_height_m_agl ] = copy.deepcopy(z_score_dict_no_height[this_field_name]) frequency_dict_with_height[ this_field_name, this_height_m_agl ] = copy.deepcopy(frequency_dict_no_height[this_field_name]) continue print(( 'Updating normalization params for "{0:s}" at {1:d} m ' 'AGL...' ).format( this_field_name, this_height_m_agl )) this_data_matrix = example_io.get_field_from_dict( example_dict=this_example_dict, field_name=this_field_name, height_m_agl=this_height_m_agl ) this_data_matrix = normalization.convert_to_log_if_necessary( physical_values=this_data_matrix, field_name=this_field_name ) this_dict = z_score_dict_with_height[ this_field_name, this_height_m_agl ] this_dict = normalization_params.update_z_score_params( z_score_param_dict=this_dict, new_data_matrix=this_data_matrix ) z_score_dict_with_height[ this_field_name, this_height_m_agl ] = this_dict this_dict = frequency_dict_with_height[ this_field_name, this_height_m_agl ] this_dict = normalization_params.update_frequency_dict( frequency_dict=this_dict, new_data_matrix=this_data_matrix, rounding_base=FIELD_TO_ROUNDING_BASE_VECTOR[this_field_name] ) frequency_dict_with_height[ this_field_name, this_height_m_agl ] = this_dict norm_table_no_height = normalization_params.finalize_params( z_score_dict_dict=z_score_dict_no_height, frequency_dict_dict=frequency_dict_no_height, min_percentile_level=min_percentile_level, max_percentile_level=max_percentile_level ) print(( 'Overall normalization params (not separated by height):\n{0:s}\n\n' ).format( str(norm_table_no_height) )) norm_table_with_height = normalization_params.finalize_params( z_score_dict_dict=z_score_dict_with_height, frequency_dict_dict=frequency_dict_with_height, min_percentile_level=min_percentile_level, max_percentile_level=max_percentile_level ) print('Normalization params separated by height:\n{0:s}\n\n'.format( str(norm_table_with_height) )) normalization_params.write_file( pickle_file_name=output_file_name, norm_table_no_height=norm_table_no_height, norm_table_with_height=norm_table_with_height )
def _run(input_prediction_file_name, average_over_height, scale_by_climo, num_examples_per_set, output_dir_name): """Finds best and worst heating-rate predictions. This is effectively the main method. :param input_prediction_file_name: See documentation at top of file. :param average_over_height: Same. :param scale_by_climo: Same. :param num_examples_per_set: Same. :param output_dir_name: Same. """ # TODO(thunderhoser): Maybe allow specific height again (e.g., 15 km). error_checking.assert_is_greater(num_examples_per_set, 0) scale_by_climo = scale_by_climo and not average_over_height print('Reading data from: "{0:s}"...'.format(input_prediction_file_name)) prediction_dict = prediction_io.read_file(input_prediction_file_name) model_file_name = prediction_dict[prediction_io.MODEL_FILE_KEY] model_metafile_name = neural_net.find_metafile( model_dir_name=os.path.split(model_file_name)[0]) print( 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = neural_net.read_metafile(model_metafile_name) generator_option_dict = model_metadata_dict[ neural_net.TRAINING_OPTIONS_KEY] vector_target_names = ( generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY]) hr_index = (vector_target_names.index( example_utils.SHORTWAVE_HEATING_RATE_NAME)) target_matrix_k_day01 = ( prediction_dict[prediction_io.VECTOR_TARGETS_KEY][..., hr_index]) prediction_matrix_k_day01 = ( prediction_dict[prediction_io.VECTOR_PREDICTIONS_KEY][..., hr_index]) bias_matrix = prediction_matrix_k_day01 - target_matrix_k_day01 absolute_error_matrix = numpy.absolute(bias_matrix) if average_over_height: bias_matrix = numpy.mean(bias_matrix, axis=1, keepdims=True) absolute_error_matrix = numpy.mean(absolute_error_matrix, axis=1, keepdims=True) if scale_by_climo: normalization_file_name = ( generator_option_dict[neural_net.NORMALIZATION_FILE_KEY]) print(('Reading training examples (for climatology) from: "{0:s}"...' ).format(normalization_file_name)) training_example_dict = example_io.read_file(normalization_file_name) training_example_dict = example_utils.subset_by_field( example_dict=training_example_dict, field_names=[example_utils.SHORTWAVE_HEATING_RATE_NAME]) training_example_dict = example_utils.subset_by_height( example_dict=training_example_dict, heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY]) dummy_example_dict = { example_utils.SCALAR_PREDICTOR_NAMES_KEY: [], example_utils.VECTOR_PREDICTOR_NAMES_KEY: [], example_utils.SCALAR_TARGET_NAMES_KEY: [], example_utils.VECTOR_TARGET_NAMES_KEY: [example_utils.SHORTWAVE_HEATING_RATE_NAME], example_utils.HEIGHTS_KEY: generator_option_dict[neural_net.HEIGHTS_KEY] } mean_training_example_dict = normalization.create_mean_example( new_example_dict=dummy_example_dict, training_example_dict=training_example_dict) climo_matrix_k_day01 = mean_training_example_dict[ example_utils.VECTOR_TARGET_VALS_KEY][..., 0] bias_matrix = bias_matrix / climo_matrix_k_day01 absolute_error_matrix = absolute_error_matrix / climo_matrix_k_day01 print(SEPARATOR_STRING) high_bias_indices, low_bias_indices, low_abs_error_indices = ( misc_utils.find_best_and_worst_predictions( bias_matrix=bias_matrix, absolute_error_matrix=absolute_error_matrix, num_examples_per_set=num_examples_per_set)) print(SEPARATOR_STRING) high_bias_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=high_bias_indices) high_bias_file_name = ( '{0:s}/predictions_high-bias.nc'.format(output_dir_name)) print('Writing examples with greatest positive bias to: "{0:s}"...'.format( high_bias_file_name)) prediction_io.write_file( netcdf_file_name=high_bias_file_name, scalar_target_matrix=high_bias_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=high_bias_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=high_bias_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=high_bias_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=high_bias_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=high_bias_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=high_bias_prediction_dict[ prediction_io.MODEL_FILE_KEY]) low_bias_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=low_bias_indices) low_bias_file_name = ( '{0:s}/predictions_low-bias.nc'.format(output_dir_name)) print('Writing examples with greatest negative bias to: "{0:s}"...'.format( low_bias_file_name)) prediction_io.write_file( netcdf_file_name=low_bias_file_name, scalar_target_matrix=low_bias_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=low_bias_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=low_bias_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=low_bias_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=low_bias_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=low_bias_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=low_bias_prediction_dict[prediction_io.MODEL_FILE_KEY]) low_abs_error_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=low_abs_error_indices) low_abs_error_file_name = ( '{0:s}/predictions_low-absolute-error.nc'.format(output_dir_name)) print( 'Writing examples with smallest absolute error to: "{0:s}"...'.format( low_abs_error_file_name)) prediction_io.write_file( netcdf_file_name=low_abs_error_file_name, scalar_target_matrix=low_abs_error_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=low_abs_error_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=low_abs_error_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=low_abs_error_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=low_abs_error_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=low_abs_error_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=low_abs_error_prediction_dict[ prediction_io.MODEL_FILE_KEY]) if scale_by_climo: return if average_over_height: mean_targets_k_day01 = numpy.mean(target_matrix_k_day01, axis=1) sort_indices = numpy.argsort(-1 * mean_targets_k_day01) else: max_targets_k_day01 = numpy.max(target_matrix_k_day01, axis=1) sort_indices = numpy.argsort(-1 * max_targets_k_day01) large_hr_indices = sort_indices[:num_examples_per_set] large_hr_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=large_hr_indices) large_hr_file_name = ( '{0:s}/predictions_large-heating-rate.nc'.format(output_dir_name)) print('Writing examples with greatest heating rate to: "{0:s}"...'.format( large_hr_file_name)) prediction_io.write_file( netcdf_file_name=large_hr_file_name, scalar_target_matrix=large_hr_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=large_hr_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=large_hr_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=large_hr_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=large_hr_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=large_hr_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=large_hr_prediction_dict[prediction_io.MODEL_FILE_KEY]) if not average_over_height: return mean_targets_k_day01 = numpy.mean(target_matrix_k_day01, axis=1) sort_indices = numpy.argsort(mean_targets_k_day01) small_hr_indices = sort_indices[:num_examples_per_set] small_hr_prediction_dict = prediction_io.subset_by_index( prediction_dict=copy.deepcopy(prediction_dict), desired_indices=small_hr_indices) small_hr_file_name = ( '{0:s}/predictions_small-heating-rate.nc'.format(output_dir_name)) print('Writing examples with smallest heating rate to: "{0:s}"...'.format( small_hr_file_name)) prediction_io.write_file( netcdf_file_name=small_hr_file_name, scalar_target_matrix=small_hr_prediction_dict[ prediction_io.SCALAR_TARGETS_KEY], vector_target_matrix=small_hr_prediction_dict[ prediction_io.VECTOR_TARGETS_KEY], scalar_prediction_matrix=small_hr_prediction_dict[ prediction_io.SCALAR_PREDICTIONS_KEY], vector_prediction_matrix=small_hr_prediction_dict[ prediction_io.VECTOR_PREDICTIONS_KEY], heights_m_agl=small_hr_prediction_dict[prediction_io.HEIGHTS_KEY], example_id_strings=small_hr_prediction_dict[ prediction_io.EXAMPLE_IDS_KEY], model_file_name=small_hr_prediction_dict[prediction_io.MODEL_FILE_KEY])
def _run(tropical_example_dir_name, non_tropical_example_dir_name, year, assorted1_example_dir_name, assorted2_example_dir_name): """Splits examples into Assorted1 and Assorted2 sites. This is effectively the main method. :param tropical_example_dir_name: See documentation at top of file. :param non_tropical_example_dir_name: Same. :param year: Same. :param assorted1_example_dir_name: Same. :param assorted2_example_dir_name: Same. """ tropical_example_file_name = example_io.find_file( directory_name=tropical_example_dir_name, year=year, raise_error_if_missing=True ) non_tropical_example_file_name = example_io.find_file( directory_name=non_tropical_example_dir_name, year=year, raise_error_if_missing=True ) print('Reading data from: "{0:s}"...'.format(tropical_example_file_name)) tropical_example_dict = example_io.read_file(tropical_example_file_name) print('Reading data from: "{0:s}"...'.format( non_tropical_example_file_name )) non_tropical_example_dict = example_io.read_file( non_tropical_example_file_name ) example_dict = example_utils.concat_examples([ tropical_example_dict, non_tropical_example_dict ]) del tropical_example_dict, non_tropical_example_dict example_metadata_dict = example_utils.parse_example_ids( example_dict[example_utils.EXAMPLE_IDS_KEY] ) example_latitudes_deg_n = example_metadata_dict[example_utils.LATITUDES_KEY] example_longitudes_deg_e = lng_conversion.convert_lng_positive_in_west( example_metadata_dict[example_utils.LONGITUDES_KEY] ) example_coord_matrix = numpy.transpose(numpy.vstack(( example_latitudes_deg_n, example_longitudes_deg_e ))) assorted2_coord_matrix = numpy.transpose(numpy.vstack(( ASSORTED2_LATITUDES_DEG_N, ASSORTED2_LONGITUDES_DEG_E ))) distance_matrix_deg2 = euclidean_distances( X=example_coord_matrix, Y=assorted2_coord_matrix, squared=True ) assorted2_flags = numpy.any(distance_matrix_deg2 <= TOLERANCE_DEG2, axis=1) assorted2_example_dict = example_utils.subset_by_index( example_dict=copy.deepcopy(example_dict), desired_indices=numpy.where(assorted2_flags)[0] ) assorted2_example_file_name = example_io.find_file( directory_name=assorted2_example_dir_name, year=year, raise_error_if_missing=False ) print('Writing {0:d} examples in set Assorted2 to: "{1:s}"...'.format( len(assorted2_example_dict[example_utils.VALID_TIMES_KEY]), assorted2_example_file_name )) example_io.write_file( example_dict=assorted2_example_dict, netcdf_file_name=assorted2_example_file_name ) assorted1_example_dict = example_utils.subset_by_index( example_dict=example_dict, desired_indices=numpy.where(numpy.invert(assorted2_flags))[0] ) assorted1_example_file_name = example_io.find_file( directory_name=assorted1_example_dir_name, year=year, raise_error_if_missing=False ) print('Writing {0:d} examples in set Assorted1 to: "{1:s}"...'.format( len(assorted1_example_dict[example_utils.VALID_TIMES_KEY]), assorted1_example_file_name )) example_io.write_file( example_dict=assorted1_example_dict, netcdf_file_name=assorted1_example_file_name )