def test_find_many_files(self): """Ensures correct output from find_many_files.""" these_file_names = example_io.find_many_files( directory_name=EXAMPLE_DIR_NAME, first_time_unix_sec=FIRST_FILE_TIME_UNIX_SEC, last_time_unix_sec=LAST_FILE_TIME_UNIX_SEC, raise_error_if_any_missing=False, raise_error_if_all_missing=False, test_mode=True) self.assertTrue(these_file_names == EXAMPLE_FILE_NAMES)
def _run(tropical_example_dir_name, non_tropical_example_dir_name, num_histogram_bins, output_dir_name): """Plots distribution of each target variable. This is effectively the main method. :param tropical_example_dir_name: See documentation at top of file. :param non_tropical_example_dir_name: Same. :param num_histogram_bins: Same. :param output_dir_name: Same. """ file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) first_time_unix_sec = ( time_conversion.first_and_last_times_in_year(FIRST_YEAR)[0]) last_time_unix_sec = ( time_conversion.first_and_last_times_in_year(LAST_YEAR)[-1]) example_file_names = example_io.find_many_files( directory_name=tropical_example_dir_name, first_time_unix_sec=first_time_unix_sec, last_time_unix_sec=last_time_unix_sec, raise_error_if_all_missing=True, raise_error_if_any_missing=True) example_file_names += example_io.find_many_files( directory_name=non_tropical_example_dir_name, first_time_unix_sec=first_time_unix_sec, last_time_unix_sec=last_time_unix_sec, raise_error_if_all_missing=True, raise_error_if_any_missing=True) example_dicts = [] for this_file_name in example_file_names: print('Reading data from: "{0:s}"...'.format(this_file_name)) this_example_dict = example_io.read_file(this_file_name) this_example_dict = example_utils.subset_by_field( example_dict=this_example_dict, field_names=TARGET_NAMES_IN_FILE) example_dicts.append(this_example_dict) example_dict = example_utils.concat_examples(example_dicts) del example_dicts letter_label = None panel_file_names = [] for this_target_name in TARGET_NAMES: if this_target_name in TARGET_NAMES_IN_FILE: these_target_values = example_utils.get_field_from_dict( example_dict=example_dict, field_name=this_target_name) else: down_fluxes_w_m02 = example_utils.get_field_from_dict( example_dict=example_dict, field_name=example_utils.SHORTWAVE_SURFACE_DOWN_FLUX_NAME) up_fluxes_w_m02 = example_utils.get_field_from_dict( example_dict=example_dict, field_name=example_utils.SHORTWAVE_TOA_UP_FLUX_NAME) these_target_values = down_fluxes_w_m02 - up_fluxes_w_m02 these_target_values = numpy.ravel(these_target_values) if letter_label is None: letter_label = 'a' else: letter_label = chr(ord(letter_label) + 1) this_file_name = _plot_histogram_one_target( target_values=these_target_values, target_name=this_target_name, num_bins=num_histogram_bins, letter_label=letter_label, output_dir_name=output_dir_name) panel_file_names.append(this_file_name) concat_file_name = '{0:s}/target_distributions.jpg'.format(output_dir_name) print('Concatenating panels to: "{0:s}"...'.format(concat_file_name)) imagemagick_utils.concatenate_images(input_file_names=panel_file_names, output_file_name=concat_file_name, num_panel_rows=2, num_panel_columns=2, border_width_pixels=25) imagemagick_utils.trim_whitespace(input_file_name=concat_file_name, output_file_name=concat_file_name)
def _run(tropical_example_dir_name, non_tropical_example_dir_name, output_file_name): """Plots all sites wtih data. This is effectively the main method. :param tropical_example_dir_name: See documentation at top of file. :param non_tropical_example_dir_name: Same. :param output_file_name: Same. """ first_time_unix_sec = ( time_conversion.first_and_last_times_in_year(FIRST_YEAR)[0]) last_time_unix_sec = ( time_conversion.first_and_last_times_in_year(LAST_YEAR)[-1]) tropical_file_names = example_io.find_many_files( directory_name=tropical_example_dir_name, first_time_unix_sec=first_time_unix_sec, last_time_unix_sec=last_time_unix_sec, raise_error_if_all_missing=True, raise_error_if_any_missing=False) non_tropical_file_names = example_io.find_many_files( directory_name=non_tropical_example_dir_name, first_time_unix_sec=first_time_unix_sec, last_time_unix_sec=last_time_unix_sec, raise_error_if_all_missing=True, raise_error_if_any_missing=False) latitudes_deg_n = numpy.array([]) longitudes_deg_e = numpy.array([]) for this_file_name in tropical_file_names: print('Reading data from: "{0:s}"...'.format(this_file_name)) this_example_dict = example_io.read_file(this_file_name) these_latitudes_deg_n = example_utils.get_field_from_dict( example_dict=this_example_dict, field_name=example_utils.LATITUDE_NAME) these_longitudes_deg_e = example_utils.get_field_from_dict( example_dict=this_example_dict, field_name=example_utils.LONGITUDE_NAME) latitudes_deg_n = numpy.concatenate( (latitudes_deg_n, these_latitudes_deg_n)) longitudes_deg_e = numpy.concatenate( (longitudes_deg_e, these_longitudes_deg_e)) for this_file_name in non_tropical_file_names: print('Reading data from: "{0:s}"...'.format(this_file_name)) this_example_dict = example_io.read_file(this_file_name) these_latitudes_deg_n = example_utils.get_field_from_dict( example_dict=this_example_dict, field_name=example_utils.LATITUDE_NAME) these_longitudes_deg_e = example_utils.get_field_from_dict( example_dict=this_example_dict, field_name=example_utils.LONGITUDE_NAME) latitudes_deg_n = numpy.concatenate( (latitudes_deg_n, these_latitudes_deg_n)) longitudes_deg_e = numpy.concatenate( (longitudes_deg_e, these_longitudes_deg_e)) coord_matrix = numpy.transpose( numpy.vstack((latitudes_deg_n, longitudes_deg_e))) coord_matrix = number_rounding.round_to_nearest(coord_matrix, LATLNG_TOLERANCE_DEG) coord_matrix = numpy.unique(coord_matrix, axis=0) latitudes_deg_n = coord_matrix[:, 0] longitudes_deg_e = coord_matrix[:, 1] figure_object, axes_object, basemap_object = ( plotting_utils.create_equidist_cylindrical_map( min_latitude_deg=MIN_PLOT_LATITUDE_DEG_N, max_latitude_deg=MAX_PLOT_LATITUDE_DEG_N, min_longitude_deg=MIN_PLOT_LONGITUDE_DEG_E, max_longitude_deg=MAX_PLOT_LONGITUDE_DEG_E, resolution_string='l')) plotting_utils.plot_coastlines(basemap_object=basemap_object, axes_object=axes_object, line_colour=BORDER_COLOUR, line_width=BORDER_WIDTH) plotting_utils.plot_countries(basemap_object=basemap_object, axes_object=axes_object, line_colour=BORDER_COLOUR, line_width=BORDER_WIDTH) plotting_utils.plot_parallels(basemap_object=basemap_object, axes_object=axes_object, num_parallels=NUM_PARALLELS, line_colour=GRID_LINE_COLOUR, line_width=GRID_LINE_WIDTH, font_size=FONT_SIZE) plotting_utils.plot_meridians(basemap_object=basemap_object, axes_object=axes_object, num_meridians=NUM_MERIDIANS, line_colour=GRID_LINE_COLOUR, line_width=GRID_LINE_WIDTH, font_size=FONT_SIZE) arctic_indices = numpy.where(latitudes_deg_n >= 66.5)[0] print(len(arctic_indices)) arctic_x_coords, arctic_y_coords = basemap_object( longitudes_deg_e[arctic_indices], latitudes_deg_n[arctic_indices]) axes_object.plot(arctic_x_coords, arctic_y_coords, linestyle='None', marker=MARKER_TYPE, markersize=MARKER_SIZE, markeredgewidth=0, markerfacecolor=ARCTIC_COLOUR, markeredgecolor=ARCTIC_COLOUR) mid_latitude_indices = numpy.where( numpy.logical_and(latitudes_deg_n >= 30., latitudes_deg_n < 66.5))[0] print(len(mid_latitude_indices)) mid_latitude_x_coords, mid_latitude_y_coords = basemap_object( longitudes_deg_e[mid_latitude_indices], latitudes_deg_n[mid_latitude_indices]) axes_object.plot(mid_latitude_x_coords, mid_latitude_y_coords, linestyle='None', marker=MARKER_TYPE, markersize=MARKER_SIZE, markeredgewidth=0, markerfacecolor=MID_LATITUDE_COLOUR, markeredgecolor=MID_LATITUDE_COLOUR) tropical_indices = numpy.where(latitudes_deg_n < 30.)[0] print(len(tropical_indices)) tropical_x_coords, tropical_y_coords = basemap_object( longitudes_deg_e[tropical_indices], latitudes_deg_n[tropical_indices]) axes_object.plot(tropical_x_coords, tropical_y_coords, linestyle='None', marker=MARKER_TYPE, markersize=MARKER_SIZE, markeredgewidth=0, markerfacecolor=TROPICAL_COLOUR, markeredgecolor=TROPICAL_COLOUR) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) print('Saving figure to: "{0:s}"...'.format(output_file_name)) figure_object.savefig(output_file_name, dpi=FIGURE_RESOLUTION_DPI, pad_inches=0, bbox_inches='tight') pyplot.close(figure_object)
def _run(model_file_name, example_file_name, num_examples, example_dir_name, example_id_file_name, layer_name, neuron_indices, ideal_activation, num_iterations, learning_rate, l2_weight, output_file_name): """Runs backwards optimization. This is effectively the main method. :param model_file_name: See documentation at top of file. :param example_file_name: Same. :param num_examples: Same. :param example_dir_name: Same. :param example_id_file_name: Same. :param layer_name: Same. :param neuron_indices: Same. :param ideal_activation: Same. :param num_iterations: Same. :param learning_rate: Same. :param l2_weight: Same. :param output_file_name: Same. """ print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = neural_net.read_model(model_file_name) metafile_name = neural_net.find_metafile( model_dir_name=os.path.split(model_file_name)[0], raise_error_if_missing=True ) print('Reading metadata from: "{0:s}"...'.format(metafile_name)) metadata_dict = neural_net.read_metafile(metafile_name) predictor_matrix, _, example_id_strings = ( misc_utils.get_examples_for_inference( model_metadata_dict=metadata_dict, example_file_name=example_file_name, num_examples=num_examples, example_dir_name=example_dir_name, example_id_file_name=example_id_file_name ) ) print(SEPARATOR_STRING) generator_option_dict = metadata_dict[neural_net.TRAINING_OPTIONS_KEY] normalization_file_name = ( generator_option_dict[neural_net.NORMALIZATION_FILE_KEY] ) print(( 'Reading training examples (for normalization) from: "{0:s}"...' ).format( normalization_file_name )) training_example_dict = example_io.read_file(normalization_file_name) training_example_dict = example_utils.subset_by_height( example_dict=training_example_dict, heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY] ) num_examples = len(example_id_strings) bwo_dict = None for i in range(num_examples): this_bwo_dict = bwo.optimize_input_for_neuron( model_object=model_object, init_function_or_matrix=predictor_matrix[i, ...], layer_name=layer_name, neuron_indices=neuron_indices, ideal_activation=ideal_activation, num_iterations=num_iterations, learning_rate=learning_rate, l2_weight=l2_weight ) if i == num_examples - 1: print(SEPARATOR_STRING) else: print(MINOR_SEPARATOR_STRING) if bwo_dict is None: these_dim = numpy.array( (num_examples,) + this_bwo_dict[bwo.INITIAL_PREDICTORS_KEY].shape[1:], dtype=int ) bwo_dict = { bwo.INITIAL_PREDICTORS_KEY: numpy.full(these_dim, numpy.nan), bwo.FINAL_PREDICTORS_KEY: numpy.full(these_dim, numpy.nan), bwo.INITIAL_ACTIVATIONS_KEY: numpy.full(num_examples, numpy.nan), bwo.FINAL_ACTIVATIONS_KEY: numpy.full(num_examples, numpy.nan) } bwo_dict[bwo.INITIAL_PREDICTORS_KEY][i, ...] = ( this_bwo_dict[bwo.INITIAL_PREDICTORS_KEY][0, ...] ) bwo_dict[bwo.FINAL_PREDICTORS_KEY][i, ...] = ( this_bwo_dict[bwo.FINAL_PREDICTORS_KEY][0, ...] ) bwo_dict[bwo.INITIAL_ACTIVATIONS_KEY][i] = ( this_bwo_dict[bwo.INITIAL_ACTIVATION_KEY] ) bwo_dict[bwo.FINAL_ACTIVATIONS_KEY][i] = ( this_bwo_dict[bwo.FINAL_ACTIVATION_KEY] ) if example_file_name == '': example_file_name = example_io.find_many_files( directory_name=example_dir_name, first_time_unix_sec=0, last_time_unix_sec=int(1e12), raise_error_if_any_missing=False, raise_error_if_all_missing=True )[0] first_example_dict = example_io.read_file(example_file_name) first_example_dict = example_utils.subset_by_height( example_dict=first_example_dict, heights_m_agl=generator_option_dict[neural_net.HEIGHTS_KEY] ) net_type_string = metadata_dict[neural_net.NET_TYPE_KEY] init_example_dict = copy.deepcopy(first_example_dict) this_example_dict = neural_net.predictors_numpy_to_dict( predictor_matrix=bwo_dict[bwo.INITIAL_PREDICTORS_KEY], example_dict=init_example_dict, net_type_string=net_type_string ) init_example_dict.update(this_example_dict) if generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY] is not None: init_example_dict = normalization.denormalize_data( new_example_dict=init_example_dict, training_example_dict=training_example_dict, normalization_type_string= generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY], min_normalized_value= generator_option_dict[neural_net.PREDICTOR_MIN_NORM_VALUE_KEY], max_normalized_value= generator_option_dict[neural_net.PREDICTOR_MAX_NORM_VALUE_KEY], separate_heights=True, apply_to_predictors=True, apply_to_vector_targets=False, apply_to_scalar_targets=False ) init_scalar_predictor_matrix = ( init_example_dict[example_utils.SCALAR_PREDICTOR_VALS_KEY] ) init_vector_predictor_matrix = ( init_example_dict[example_utils.VECTOR_PREDICTOR_VALS_KEY] ) final_example_dict = copy.deepcopy(first_example_dict) this_example_dict = neural_net.predictors_numpy_to_dict( predictor_matrix=bwo_dict[bwo.FINAL_PREDICTORS_KEY], example_dict=final_example_dict, net_type_string=net_type_string ) final_example_dict.update(this_example_dict) if generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY] is not None: final_example_dict = normalization.denormalize_data( new_example_dict=final_example_dict, training_example_dict=training_example_dict, normalization_type_string= generator_option_dict[neural_net.PREDICTOR_NORM_TYPE_KEY], min_normalized_value= generator_option_dict[neural_net.PREDICTOR_MIN_NORM_VALUE_KEY], max_normalized_value= generator_option_dict[neural_net.PREDICTOR_MAX_NORM_VALUE_KEY], separate_heights=True, apply_to_predictors=True, apply_to_vector_targets=False, apply_to_scalar_targets=False ) final_scalar_predictor_matrix = ( final_example_dict[example_utils.SCALAR_PREDICTOR_VALS_KEY] ) final_vector_predictor_matrix = ( final_example_dict[example_utils.VECTOR_PREDICTOR_VALS_KEY] ) print('Writing results to file: "{0:s}"...'.format(output_file_name)) bwo.write_file( netcdf_file_name=output_file_name, init_scalar_predictor_matrix=init_scalar_predictor_matrix, final_scalar_predictor_matrix=final_scalar_predictor_matrix, init_vector_predictor_matrix=init_vector_predictor_matrix, final_vector_predictor_matrix=final_vector_predictor_matrix, initial_activations=bwo_dict[bwo.INITIAL_ACTIVATIONS_KEY], final_activations=bwo_dict[bwo.FINAL_ACTIVATIONS_KEY], example_id_strings=example_id_strings, model_file_name=model_file_name, layer_name=layer_name, neuron_indices=neuron_indices, ideal_activation=ideal_activation, num_iterations=num_iterations, learning_rate=learning_rate, l2_weight=l2_weight )
def get_raw_examples(example_file_name, num_examples, example_dir_name, example_id_file_name): """Returns raw examples. The difference between `get_raw_examples` and `get_examples_for_inference` is that `get_raw_examples` returns examples in their raw form, *not* pre-processed to be fed through a model for inference. :param example_file_name: See doc for `get_examples_for_inference`. :param num_examples: Same. :param example_dir_name: Same. :param example_id_file_name: Same. :return: example_dict: See doc for `example_io.read_file`. """ error_checking.assert_is_string(example_file_name) use_specific_ids = example_file_name == '' if use_specific_ids: error_checking.assert_is_string(example_id_file_name) print('Reading desired example IDs from: "{0:s}"...'.format( example_id_file_name)) example_id_strings = read_example_ids_from_netcdf(example_id_file_name) valid_times_unix_sec = example_utils.parse_example_ids( example_id_strings)[example_utils.VALID_TIMES_KEY] example_file_names = example_io.find_many_files( directory_name=example_dir_name, first_time_unix_sec=numpy.min(valid_times_unix_sec), last_time_unix_sec=numpy.max(valid_times_unix_sec)) num_files = len(example_file_names) example_dicts = [dict()] * num_files for i in range(num_files): print('Reading data from: "{0:s}"...'.format( example_file_names[i])) example_dicts[i] = example_io.read_file(example_file_names[i]) example_dict = example_utils.concat_examples(example_dicts) good_indices = example_utils.find_examples( all_id_strings=example_dict[example_utils.EXAMPLE_IDS_KEY], desired_id_strings=example_id_strings, allow_missing=False) example_dict = example_utils.subset_by_index( example_dict=example_dict, desired_indices=good_indices) else: error_checking.assert_is_string(example_dir_name) error_checking.assert_is_integer(num_examples) error_checking.assert_is_greater(num_examples, 0) print('Reading data from: "{0:s}"...'.format(example_file_name)) example_dict = example_io.read_file(example_file_name) num_examples_total = len(example_dict[example_utils.VALID_TIMES_KEY]) desired_indices = numpy.linspace(0, num_examples_total - 1, num=num_examples_total, dtype=int) if num_examples < num_examples_total: desired_indices = numpy.random.choice(desired_indices, size=num_examples, replace=False) example_dict = example_utils.subset_by_index( example_dict=example_dict, desired_indices=desired_indices) return example_dict