def get_connected_input_layers(model_object, target_layer_name): """Gets input layers connected to target layer. :param model_object: Instance of `keras.models.Model` or `keras.models.Sequential`. :param target_layer_name: Name of target layer. :return: input_layer_objects: 1-D list of input layers (instances of `keras.layers.Input`) connected to target layer. """ error_checking.assert_is_string(target_layer_name) prev_layer_objects = [model_object.get_layer(name=target_layer_name)] input_layer_objects = [] while len(prev_layer_objects) > 0: new_prev_layer_objects = [] for l in prev_layer_objects: these_prev_node_objects = l._inbound_nodes these_prev_layer_objects = [] for n in these_prev_node_objects: these_prev_layer_objects += n.inbound_layers if len(these_prev_layer_objects) == 0: input_layer_objects.append(l) else: new_prev_layer_objects += these_prev_layer_objects prev_layer_objects = [l for l in new_prev_layer_objects] return input_layer_objects
def check_metadata(component_type_string, target_class=None, layer_name=None, ideal_activation=None, neuron_indices=None, channel_index=None): """Error-checks metadata for saliency calculations. :param component_type_string: Component type (must be accepted by `model_interpretation.check_component_type`). :param target_class: See doc for `get_saliency_maps_for_class_activation`. :param layer_name: See doc for `get_saliency_maps_for_neuron_activation` or `get_saliency_maps_for_channel_activation`. :param ideal_activation: Same. :param neuron_indices: See doc for `get_saliency_maps_for_neuron_activation`. :param channel_index: See doc for `get_saliency_maps_for_class_activation`. :return: metadata_dict: Dictionary with the following keys. metadata_dict['component_type_string']: See input doc. metadata_dict['target_class']: Same. metadata_dict['layer_name']: Same. metadata_dict['ideal_activation']: Same. metadata_dict['neuron_indices']: Same. metadata_dict['channel_index']: Same. """ model_interpretation.check_component_type(component_type_string) if (component_type_string == model_interpretation.CLASS_COMPONENT_TYPE_STRING): error_checking.assert_is_integer(target_class) error_checking.assert_is_geq(target_class, 0) if component_type_string in [ model_interpretation.NEURON_COMPONENT_TYPE_STRING, model_interpretation.CHANNEL_COMPONENT_TYPE_STRING ]: error_checking.assert_is_string(layer_name) if ideal_activation is not None: error_checking.assert_is_greater(ideal_activation, 0.) if (component_type_string == model_interpretation.NEURON_COMPONENT_TYPE_STRING): error_checking.assert_is_integer_numpy_array(neuron_indices) error_checking.assert_is_geq_numpy_array(neuron_indices, 0) error_checking.assert_is_numpy_array(neuron_indices, num_dimensions=1) if (component_type_string == model_interpretation.CHANNEL_COMPONENT_TYPE_STRING): error_checking.assert_is_integer(channel_index) error_checking.assert_is_geq(channel_index, 0) return { COMPONENT_TYPE_KEY: component_type_string, TARGET_CLASS_KEY: target_class, LAYER_NAME_KEY: layer_name, IDEAL_ACTIVATION_KEY: ideal_activation, NEURON_INDICES_KEY: neuron_indices, CHANNEL_INDEX_KEY: channel_index }
def find_match_file(top_directory_name, valid_time_unix_sec, raise_error_if_missing=False): """Finds match file. A "match file" matches storm objects in one dataset (e.g., MYRORSS or GridRad) to those in another dataset, at one time step. :param top_directory_name: Name of top-level directory. :param valid_time_unix_sec: Valid time. :param raise_error_if_missing: See doc for `find_file`. :return: match_file_name: Path to match file. If file is missing and `raise_error_if_missing = False`, this will be the *expected* path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) spc_date_string = time_conversion.time_to_spc_date_string( valid_time_unix_sec) match_file_name = '{0:s}/{1:s}/{2:s}/storm-matches_{3:s}.p'.format( top_directory_name, spc_date_string[:4], spc_date_string, time_conversion.unix_sec_to_string( valid_time_unix_sec, FILE_NAME_TIME_FORMAT) ) if raise_error_if_missing and not os.path.isfile(match_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( match_file_name) raise ValueError(error_string) return match_file_name
def field_name_orig_to_new(field_name_orig, data_source): """Converts field name from original to new format. "Original format" = in original data source (examples: MYRORSS, MRMS, GridRad). "New format" = GewitterGefahr format, which is Pythonic and includes units at the end. :param field_name_orig: Name of radar field in original format. :param data_source: Data source (string). :return: field_name: Name of radar field in new format. """ error_checking.assert_is_string(field_name_orig) check_data_source(data_source) if data_source == MYRORSS_SOURCE_ID: all_orig_field_names = RADAR_FIELD_NAMES_MYRORSS_PADDED elif data_source == MRMS_SOURCE_ID: all_orig_field_names = RADAR_FIELD_NAMES_MRMS_PADDED elif data_source == GRIDRAD_SOURCE_ID: all_orig_field_names = RADAR_FIELD_NAMES_GRIDRAD_PADDED found_flags = [s == field_name_orig for s in all_orig_field_names] return RADAR_FIELD_NAMES[numpy.where(found_flags)[0][0]]
def find_metafile(model_file_name, raise_error_if_missing=True): """Finds metafile for CNN. :param model_file_name: Path to model itself (see doc for `read_model`). :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing = True`, this method will error out. :return: metafile_name: Path to metafile. If file is missing and `raise_error_if_missing = False`, this will be the expected path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(model_file_name) error_checking.assert_is_boolean(raise_error_if_missing) metafile_name = '{0:s}/model_metadata.p'.format( os.path.split(model_file_name)[0] ) if not os.path.isfile(metafile_name): metafile_name = metafile_name.replace( '/glade/work/ryanlage', '/condo/swatwork/ralager' ) if not os.path.isfile(metafile_name) and raise_error_if_missing: error_string = 'Cannot find file. Expected at: "{0:s}"'.format( metafile_name) raise ValueError(error_string) return metafile_name
def unzip_tar(tar_file_name, target_directory_name=None, file_and_dir_names_to_unzip=None): """Unzips tar file. :param tar_file_name: Path to input file. :param target_directory_name: Path to output directory. :param file_and_dir_names_to_unzip: List of files and directories to extract from the tar file. Each list element should be a relative path inside the tar file. After unzipping, the same relative path will exist inside `target_directory_name`. :raises: ValueError: if the Unix command fails. """ error_checking.assert_is_string(tar_file_name) error_checking.assert_is_string_list(file_and_dir_names_to_unzip) file_system_utils.mkdir_recursive_if_necessary( directory_name=target_directory_name) unix_command_string = 'tar -C "{0:s}" -xvf "{1:s}"'.format( target_directory_name, tar_file_name) for this_relative_path in file_and_dir_names_to_unzip: unix_command_string += ' "' + this_relative_path + '"' exit_code = os.system(unix_command_string) if exit_code != 0: raise ValueError('\nUnix command failed (log messages shown above ' 'should explain why).')
def find_local_raw_5minute_file(station_id=None, month_unix_sec=None, top_directory_name=None, raise_error_if_missing=True): """Finds raw 5-minute file on local machine. This file should contain 5-minute METARs for one station-month. :param station_id: String ID for station. :param month_unix_sec: Month in Unix format. :param top_directory_name: Top-level directory for raw 1-minute files. :param raise_error_if_missing: Boolean flag. If True and file is missing, this method will raise an error. :return: raw_1minute_file_name: File path. If raise_error_if_missing = False and file is missing, this will be the *expected* path. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_string(station_id) error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) pathless_file_name = _get_pathless_raw_5minute_file_name( station_id, month_unix_sec) raw_5minute_file_name = '{0:s}/{1:s}/{2:s}'.format(top_directory_name, station_id, pathless_file_name) if raise_error_if_missing and not os.path.isfile(raw_5minute_file_name): raise ValueError( 'Cannot find raw 5-minute file. Expected at location: ' + raw_5minute_file_name) return raw_5minute_file_name
def find_file(unix_time_sec, top_directory_name, raise_error_if_missing=True): """Finds GridRad file on local machine. Each GridRad file contains all fields at all heights for one valid time. :param unix_time_sec: Valid time. :param top_directory_name: Name of top-level directory with GridRad. :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, will raise error. If file is missing and raise_error_if_missing = False, will return *expected* path to file. :return: gridrad_file_name: Path to GridRad file. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_string(top_directory_name) spc_date_string = time_conversion.time_to_spc_date_string(unix_time_sec) gridrad_file_name = '{0:s}/{1:s}/{2:s}/{3:s}'.format( top_directory_name, spc_date_string[:4], spc_date_string, _get_pathless_file_name(unix_time_sec)) if raise_error_if_missing and not os.path.isfile(gridrad_file_name): error_string = ('Cannot find GridRad file. Expected at: "{0:s}"' ).format(gridrad_file_name) raise ValueError(error_string) return gridrad_file_name
def find_grid_metafile(prediction_dir_name, raise_error_if_missing=True): """Finds file with metadata for grid. This file is needed only if prediction files are split by space (one per grid cell). :param prediction_dir_name: Name of directory with prediction files. The metafile is expected here. :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing == True`, will throw error. If file is missing and `raise_error_if_missing == False`, will return *expected* file path. :return: grid_metafile_name: File path. :raises: ValueError: if file is missing and `raise_error_if_missing == True`. """ error_checking.assert_is_string(prediction_dir_name) grid_metafile_name = '{0:s}/grid_metadata.nc'.format(prediction_dir_name) if raise_error_if_missing and not os.path.isfile(grid_metafile_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( grid_metafile_name) raise ValueError(error_string) return grid_metafile_name
def run_guided_gradcam(orig_model_object, list_of_input_matrices, target_layer_name, class_activation_matrix, new_model_object=None): """Runs guided Grad-CAM. M = number of rows in grid N = number of columns in grid C = number of channels :param orig_model_object: Original model (trained instance of `keras.models.Model` or `keras.models.Sequential`). :param list_of_input_matrices: See doc for `run_gradcam`. :param target_layer_name: Same. :param class_activation_matrix: Same. :param new_model_object: New model (created by `_change_backprop_function`), to be used for guided backprop. :return: ggradcam_output_matrix: M-by-N-by-C numpy array of output values. :return: new_model_object: See input doc. """ # Check input args. error_checking.assert_is_string(target_layer_name) error_checking.assert_is_list(list_of_input_matrices) error_checking.assert_is_numpy_array_without_nan(class_activation_matrix) for q in range(len(list_of_input_matrices)): error_checking.assert_is_numpy_array(list_of_input_matrices[q]) if list_of_input_matrices[q].shape[0] != 1: list_of_input_matrices[q] = numpy.expand_dims( list_of_input_matrices[q], axis=0) # Do the dirty work. if new_model_object is None: _register_guided_backprop() new_model_object = _change_backprop_function( model_object=orig_model_object) input_index = _find_relevant_input_matrix( list_of_input_matrices=list_of_input_matrices, num_spatial_dim=len(class_activation_matrix.shape)) saliency_function = _make_saliency_function(model_object=new_model_object, layer_name=target_layer_name, input_index=input_index) saliency_matrix = saliency_function(list_of_input_matrices + [0])[0] print 'Minimum saliency = {0:.4e} ... max saliency = {1:.4e}'.format( numpy.min(saliency_matrix), numpy.max(saliency_matrix)) ggradcam_output_matrix = saliency_matrix * class_activation_matrix[ ..., numpy.newaxis] ggradcam_output_matrix = ggradcam_output_matrix[0, ...] # ggradcam_output_matrix = _normalize_guided_gradcam_output( # ggradcam_output_matrix[0, ...]) return ggradcam_output_matrix, new_model_object
def check_metadata(activation_layer_name, vector_output_layer_name, output_neuron_indices, ideal_activation): """Checks metadata for errors. :param activation_layer_name: Name of activation layer. :param vector_output_layer_name: Name of layer that outputs predictions for vector target variables. :param output_neuron_indices: length-2 numpy array with indices of output neuron (height index, channel index). Class activation will be computed with respect to the output of this neuron. :param ideal_activation: Ideal neuron activation, used to define loss function. The loss function will be (output_neuron_activation - ideal_activation)**2. """ error_checking.assert_is_string(activation_layer_name) error_checking.assert_is_string(vector_output_layer_name) error_checking.assert_is_integer_numpy_array(output_neuron_indices) error_checking.assert_is_geq_numpy_array(output_neuron_indices, 0) error_checking.assert_is_numpy_array(output_neuron_indices, exact_dimensions=numpy.array( [2], dtype=int)) error_checking.assert_is_not_nan(ideal_activation)
def write_file( pickle_file_name, activation_matrix, storm_ids, storm_times_unix_sec, model_file_name, component_type_string, target_class=None, layer_name=None, neuron_index_matrix=None, channel_indices=None): """Writes activations to Pickle file. E = number of examples (storm objects) C = number of model components (classes, neurons, or channels) for which activations were computed :param pickle_file_name: Path to output file. :param activation_matrix: E-by-C numpy array of activations, where activation_matrix[i, j] = activation of the [j]th model component for the [i]th example. :param storm_ids: length-E list of storm IDs. :param storm_times_unix_sec: length-E numpy array of storm times. :param model_file_name: Path to file with trained model. :param component_type_string: See doc for `check_metadata`. :param target_class: Same. :param layer_name: Same. :param neuron_index_matrix: Same. :param channel_indices: Same. """ num_components = check_metadata( component_type_string=component_type_string, target_class=target_class, layer_name=layer_name, neuron_index_matrix=neuron_index_matrix, channel_indices=channel_indices) error_checking.assert_is_string(model_file_name) error_checking.assert_is_string_list(storm_ids) error_checking.assert_is_numpy_array( numpy.array(storm_ids), num_dimensions=1) num_examples = len(storm_ids) error_checking.assert_is_integer_numpy_array(storm_times_unix_sec) error_checking.assert_is_numpy_array( storm_times_unix_sec, exact_dimensions=numpy.array([num_examples])) error_checking.assert_is_numpy_array_without_nan(activation_matrix) error_checking.assert_is_numpy_array( activation_matrix, exact_dimensions=numpy.array([num_examples, num_components])) metadata_dict = { STORM_IDS_KEY: storm_ids, STORM_TIMES_KEY: storm_times_unix_sec, MODEL_FILE_NAME_KEY: model_file_name, COMPONENT_TYPE_KEY: component_type_string, TARGET_CLASS_KEY: target_class, LAYER_NAME_KEY: layer_name, NEURON_INDICES_KEY: neuron_index_matrix, CHANNEL_INDICES_KEY: channel_indices, } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(activation_matrix, pickle_file_handle) pickle.dump(metadata_dict, pickle_file_handle) pickle_file_handle.close()
def find_prediction_file(top_directory_name, spc_date_string, raise_error_if_missing=False): """Finds file with upconvnet predictions (reconstructed radar images). :param top_directory_name: Name of top-level directory with upconvnet predictions. :param spc_date_string: SPC date (format "yyyymmdd"). :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing = True`, this method will error out. :return: prediction_file_name: Path to prediction file. If file is missing and `raise_error_if_missing = False`, this will be the expected path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) time_conversion.spc_date_string_to_unix_sec(spc_date_string) prediction_file_name = ( '{0:s}/{1:s}/{2:s}_{3:s}.p' ).format( top_directory_name, spc_date_string[:4], PATHLESS_FILE_NAME_PREFIX, spc_date_string ) if raise_error_if_missing and not os.path.isfile(prediction_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( prediction_file_name) raise ValueError(error_string) return prediction_file_name
def find_local_raw_file(year, directory_name=None, raise_error_if_missing=True): """Finds raw file on local machine. This file should contain all storm reports for one year. :param year: [integer] Will look for file from this year. :param directory_name: Name of directory with Storm Events files. :param raise_error_if_missing: Boolean flag. If True and file is missing, this method will raise an error. :return: raw_file_name: File path. If raise_error_if_missing = False and file is missing, this will be the *expected* path. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_integer(year) error_checking.assert_is_string(directory_name) error_checking.assert_is_boolean(raise_error_if_missing) raw_file_name = '{0:s}/{1:s}{2:s}{3:s}'.format( directory_name, PATHLESS_RAW_FILE_PREFIX, _year_number_to_string(year), RAW_FILE_EXTENSION) if raise_error_if_missing and not os.path.isfile(raw_file_name): raise ValueError('Cannot find raw file. Expected at location: ' + raw_file_name) return raw_file_name
def find_model_file(base_model_file_name, raise_error_if_missing=True): """Finds file containing isotonic-regression model(s). This file should be written by `write_model_for_each_class`. :param base_model_file_name: Path to file containing base model (e.g., CNN). :param raise_error_if_missing: Boolean flag. If isotonic-regression file is missing and `raise_error_if_missing = True`, this method will error out. :return: isotonic_file_name: Path to metafile. If isotonic-regression file is missing and `raise_error_if_missing = False`, this will be the *expected* path. :raises: ValueError: if isotonic-regression file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(base_model_file_name) error_checking.assert_is_boolean(raise_error_if_missing) isotonic_file_name = '{0:s}/isotonic_regression_models.p'.format( os.path.split(base_model_file_name)[0]) if not os.path.isfile(isotonic_file_name) and raise_error_if_missing: error_string = 'Cannot find file. Expected at: "{0:s}"'.format( isotonic_file_name) raise ValueError(error_string) return isotonic_file_name
def _check_convolution_options( num_kernel_rows, num_rows_per_stride, padding_type_string, num_filters, num_kernel_dimensions, num_kernel_columns=None, num_columns_per_stride=None, num_kernel_heights=None, num_heights_per_stride=None): """Checks input args for 1-D, 2-D, or 3-D convolution layer. :param num_kernel_rows: Number of rows in kernel. :param num_rows_per_stride: Number of rows per stride (number of rows moved by the kernel at once). :param padding_type_string: Padding type (must be in `VALID_PADDING_TYPE_STRINGS`). :param num_filters: Number of output filters (channels). :param num_kernel_dimensions: Number of dimensions in kernel. :param num_kernel_columns: [used only if num_kernel_dimensions > 1] Number of columns in kernel. :param num_columns_per_stride: [used only if num_kernel_dimensions > 1] Number of columns per stride. :param num_kernel_heights: [used only if num_kernel_dimensions = 3] Number of heights in kernel. :param num_heights_per_stride: [used only if num_kernel_dimensions = 3] Number of heights per stride. :raises: ValueError: if `padding_type_string not in VALID_PADDING_TYPE_STRINGS`. """ error_checking.assert_is_integer(num_kernel_rows) error_checking.assert_is_geq(num_kernel_rows, 3) error_checking.assert_is_integer(num_rows_per_stride) error_checking.assert_is_geq(num_rows_per_stride, 1) error_checking.assert_is_leq(num_rows_per_stride, num_kernel_rows) error_checking.assert_is_string(padding_type_string) if padding_type_string not in VALID_PADDING_TYPE_STRINGS: error_string = ( '\n{0:s}\nValid padding types (listed above) do not include ' '"{1:s}".' ).format(str(VALID_PADDING_TYPE_STRINGS), padding_type_string) raise ValueError(error_string) error_checking.assert_is_integer(num_filters) error_checking.assert_is_geq(num_filters, 1) error_checking.assert_is_integer(num_kernel_dimensions) error_checking.assert_is_geq(num_kernel_dimensions, 1) error_checking.assert_is_leq(num_kernel_dimensions, 3) if num_kernel_dimensions >= 2: error_checking.assert_is_integer(num_kernel_columns) error_checking.assert_is_geq(num_kernel_columns, 3) error_checking.assert_is_integer(num_columns_per_stride) error_checking.assert_is_geq(num_columns_per_stride, 1) error_checking.assert_is_leq(num_columns_per_stride, num_kernel_columns) if num_kernel_dimensions == 3: error_checking.assert_is_integer(num_kernel_heights) error_checking.assert_is_geq(num_kernel_heights, 3) error_checking.assert_is_integer(num_heights_per_stride) error_checking.assert_is_geq(num_heights_per_stride, 1) error_checking.assert_is_leq(num_heights_per_stride, num_kernel_heights)
def find_file(directory_name, year, raise_error_if_missing=True): """Finds NetCDF file with RRTM data. :param directory_name: Name of directory where file is expected. :param year: Year (integer). :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing == True`, will throw error. If file is missing and `raise_error_if_missing == False`, will return *expected* file path. :return: rrtm_file_name: File path. :raises: ValueError: if file is missing and `raise_error_if_missing == True`. """ error_checking.assert_is_string(directory_name) error_checking.assert_is_integer(year) error_checking.assert_is_boolean(raise_error_if_missing) rrtm_file_name = '{0:s}/rrtm_output_{1:04d}.nc'.format( directory_name, year) if raise_error_if_missing and not os.path.isfile(rrtm_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( rrtm_file_name) raise ValueError(error_string) return rrtm_file_name
def read_target_values(netcdf_file_name, target_name): """Reads target values from NetCDF file. N = number of storm objects :param netcdf_file_name: Path to input file. :param target_name: Name of target variable. :return: storm_label_dict: Dictionary with the following keys. storm_label_dict['storm_ids']: length-N list of storm IDs. storm_label_dict['valid_times_unix_sec']: length-N numpy array of valid times. storm_label_dict['target_values']: length-N numpy array with values of `target_name`. """ error_checking.assert_is_string(target_name) netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name=netcdf_file_name, raise_error_if_fails=True) storm_ids = netCDF4.chartostring( netcdf_dataset.variables[STORM_IDS_KEY][:]) valid_times_unix_sec = numpy.array( netcdf_dataset.variables[VALID_TIMES_KEY][:], dtype=int) target_values = numpy.array(netcdf_dataset.variables[target_name][:], dtype=int) netcdf_dataset.close() return { STORM_IDS_KEY: [str(s) for s in storm_ids], VALID_TIMES_KEY: valid_times_unix_sec, TARGET_VALUES_KEY: target_values }
def label_axes(axes_object, label_string, font_size=DEFAULT_LABEL_FONT_SIZE, font_colour=DEFAULT_LABEL_FONT_COLOUR, x_coord_normalized=DEFAULT_LABEL_X_NORMALIZED, y_coord_normalized=DEFAULT_LABEL_Y_NORMALIZED): """Adds text label to axes. :param axes_object: Axes (instance of `matplotlib.axes._subplots.AxesSubplot`). :param label_string: Label. :param font_size: Font size. :param font_colour: Font colour. :param x_coord_normalized: Normalized x-coordinate (from 0...1, where 1 is the right side). :param y_coord_normalized: Normalized y-coordinate (from 0...1, where 1 is the top). """ error_checking.assert_is_string(label_string) # error_checking.assert_is_geq(x_coord_normalized, 0.) # error_checking.assert_is_leq(x_coord_normalized, 1.) # error_checking.assert_is_geq(y_coord_normalized, 0.) # error_checking.assert_is_leq(y_coord_normalized, 1.) axes_object.text(x_coord_normalized, y_coord_normalized, label_string, fontsize=font_size, color=colour_from_numpy_to_tuple(font_colour), horizontalalignment='right', verticalalignment='bottom', transform=axes_object.transAxes)
def download_raw_file(unix_time_sec, secondary_source, top_local_directory_name, protocol, user_name=None, password=None, raise_error_if_fails=True): """Downloads raw file from either FTP or HTTP server. :param unix_time_sec: Valid time. :param secondary_source: String ID for secondary data source. :param top_local_directory_name: Name of top-level directory with raw MADIS files on local machine. :param protocol: Protocol (either "http" or "ftp"). :param user_name: User name on FTP or HTTP server. To login anonymously, leave this as None. :param password: Password on FTP or HTTP server. To login anonymously, leave this as None. :param raise_error_if_fails: Boolean flag. If True and download fails, this method will raise an error. :return: local_gzip_file_name: Local path to file that was just downloaded. If download failed but raise_error_if_fails = False, this will be None. :raises: ValueError: if protocol is neither "ftp" nor "http". """ error_checking.assert_is_string(protocol) if protocol not in ['ftp', 'http']: error_string = ( 'Protocol should be either "ftp" or "http", not "{0:s}"'.format( protocol)) raise ValueError(error_string) raw_wind_io.check_data_sources(raw_wind_io.MADIS_DATA_SOURCE, secondary_source) online_file_name = _get_online_file_name(unix_time_sec=unix_time_sec, secondary_source=secondary_source, protocol=protocol) local_gzip_file_name = find_local_raw_file( unix_time_sec=unix_time_sec, secondary_source=secondary_source, top_directory_name=top_local_directory_name, raise_error_if_missing=False) if protocol == 'ftp': return downloads.download_file_via_ftp( server_name=FTP_SERVER_NAME, user_name=user_name, password=password, ftp_file_name=online_file_name, local_file_name=local_gzip_file_name, raise_error_if_fails=raise_error_if_fails) return downloads.download_files_via_http( online_file_names=[online_file_name], local_file_names=[local_gzip_file_name], user_name=user_name, password=password, host_name=HTTP_HOST_NAME, raise_error_if_fails=raise_error_if_fails)[0]
def merge_winds_and_station_metadata(wind_table, station_metadata_table, station_id): """Merges wind data with metadata for observing stations. :param wind_table: pandas DataFrame created by read_1minute_winds_from_raw_file, read_5minute_winds_from_raw_file, or `raw_wind_io.sustained_and_gust_to_uv_max`. :param station_metadata_table: pandas DataFrame created by read_station_metadata_from_raw_file. :param station_id: String ID for station in wind_table. :return: wind_table: Same as input, but with the following extra columns. wind_table.station_id: String ID for station. wind_table.station_name: Verbose name for station. wind_table.latitude_deg: Latitude (deg N). wind_table.longitude_deg: Longitude (deg E). wind_table.elevation_m_asl: Elevation (metres above sea level). """ error_checking.assert_is_string(station_id) station_id_list = [station_id] * len(wind_table.index) argument_dict = {raw_wind_io.STATION_ID_COLUMN: station_id_list} wind_table = wind_table.assign(**argument_dict) return wind_table.merge(station_metadata_table[METADATA_COLUMNS_TO_MERGE], on=raw_wind_io.STATION_ID_COLUMN, how='inner')
def mkdir_recursive_if_necessary(directory_name=None, file_name=None): """Creates directory if necessary (i.e., doesn't already exist). This method checks for the argument `directory_name` first. If `directory_name` is None, this method checks for `file_name` and extracts the directory. :param directory_name: Path to local directory. :param file_name: Path to local file. """ if directory_name is None: error_checking.assert_is_string(file_name) directory_name = os.path.dirname(file_name) else: error_checking.assert_is_string(directory_name) if directory_name == '': return try: os.makedirs(directory_name) except OSError as this_error: if this_error.errno == errno.EEXIST and os.path.isdir(directory_name): pass else: raise
def find_file(valid_time_unix_sec, top_directory_name, raise_error_if_missing=True): """Finds file (text file in WPC format) on local machine. This file should contain positions of cyclones, anticyclones, fronts, etc. for a single valid time. :param valid_time_unix_sec: Valid time. :param top_directory_name: Name of top-level directory with WPC bulletins. :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, this method will error out. If file is missing and raise_error_if_missing = False, this method will return the *expected* path to the file. :return: bulletin_file_name: Path to file. If file is missing and raise_error_if_missing = False, this is the *expected* path. :raises: ValueError: if file is missing and raise_error_if_missing = True. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) valid_time_string = time_conversion.unix_sec_to_string( valid_time_unix_sec, TIME_FORMAT_IN_FILE_NAME) bulletin_file_name = '{0:s}/{1:s}/{2:s}_{3:s}'.format( top_directory_name, valid_time_string[:4], PATHLESS_FILE_NAME_PREFIX, valid_time_string) if raise_error_if_missing and not os.path.isfile(bulletin_file_name): error_string = ('Cannot find file. Expected at location: "{0:s}"'. format(bulletin_file_name)) raise ValueError(error_string) return bulletin_file_name
def check_metadata(layer_name, neuron_indices, ideal_activation, num_iterations, learning_rate, l2_weight): """Checks metadata for errors. :param layer_name: Name of layer with relevant neuron. :param neuron_indices: 1-D numpy array with indices of relevant neuron. Must have length D - 1, where D = number of dimensions in layer output. The first dimension is the batch dimension, which always has length `None` in Keras. :param ideal_activation: Ideal neuron activation, used to define loss function. The loss function will be (neuron_activation - ideal_activation)**2. :param num_iterations: Number of iterations for gradient descent. :param learning_rate: Learning rate for gradient descent. :param l2_weight: L2 weight (penalty for difference between initial and final predictor matrix) in loss function. """ error_checking.assert_is_string(layer_name) error_checking.assert_is_integer_numpy_array(neuron_indices) error_checking.assert_is_geq_numpy_array(neuron_indices, 0) error_checking.assert_is_numpy_array(neuron_indices, num_dimensions=1) error_checking.assert_is_not_nan(ideal_activation) error_checking.assert_is_integer(num_iterations) error_checking.assert_is_greater(num_iterations, 0) error_checking.assert_is_greater(learning_rate, 0.) error_checking.assert_is_less_than(learning_rate, 1.) error_checking.assert_is_geq(l2_weight, 0.)
def check_component_metadata( component_type_string, target_class=None, layer_name=None, neuron_indices=None, channel_index=None): """Checks metadata for model component. :param component_type_string: Component type (must be accepted by `check_component_type`). :param target_class: [used only if component_type_string = "class"] Target class. Integer from 0...(K - 1), where K = number of classes. :param layer_name: [used only if component_type_string = "neuron" or "channel"] Name of layer containing neuron or channel. :param neuron_indices: [used only if component_type_string = "neuron"] 1-D numpy array with indices of neuron. :param channel_index: [used only if component_type_string = "channel"] Index of channel. """ check_component_type(component_type_string) if component_type_string == CLASS_COMPONENT_TYPE_STRING: error_checking.assert_is_integer(target_class) error_checking.assert_is_geq(target_class, 0) if component_type_string in [NEURON_COMPONENT_TYPE_STRING, CHANNEL_COMPONENT_TYPE_STRING]: error_checking.assert_is_string(layer_name) if component_type_string == NEURON_COMPONENT_TYPE_STRING: error_checking.assert_is_integer_numpy_array(neuron_indices) error_checking.assert_is_geq_numpy_array(neuron_indices, 0) error_checking.assert_is_numpy_array(neuron_indices, num_dimensions=1) if component_type_string == CHANNEL_COMPONENT_TYPE_STRING: error_checking.assert_is_integer(channel_index) error_checking.assert_is_geq(channel_index, 0)
def _check_basemap_args(min_latitude_deg, max_latitude_deg, min_longitude_deg, max_longitude_deg, resolution_string): """Error-checks input args for creating basemap. Latitudes must be in deg N, and longitudes must be in deg E. Both output values are in deg E, with positive values (180-360) in the western hemisphere. The inputs may be positive or negative in WH. :param min_latitude_deg: Minimum latitude in map (bottom-left corner). :param max_latitude_deg: Max latitude in map (top-right corner). :param min_longitude_deg: Minimum longitude in map (bottom-left corner). :param max_longitude_deg: Max longitude in map (top-right corner). :param resolution_string: Resolution of boundaries (political borders, lakes, rivers, etc.) in basemap. Options are "c" for crude, "l" for low, "i" for intermediate, "h" for high, and "f" for full. :return: min_longitude_deg: Minimum longitude (deg E, positive in western hemisphere). :return: max_longitude_deg: Max longitude (deg E, positive in western hemisphere). """ error_checking.assert_is_valid_latitude(min_latitude_deg) error_checking.assert_is_valid_latitude(max_latitude_deg) error_checking.assert_is_greater(max_latitude_deg, min_latitude_deg) min_longitude_deg = lng_conversion.convert_lng_positive_in_west( min_longitude_deg) max_longitude_deg = lng_conversion.convert_lng_positive_in_west( max_longitude_deg) error_checking.assert_is_greater(max_longitude_deg, min_longitude_deg) error_checking.assert_is_string(resolution_string) return min_longitude_deg, max_longitude_deg
def find_file(year, directory_name, raise_error_if_missing=True): """Finds Storm Events file. This file should contain all storm reports for one year. :param year: Year (integer). :param directory_name: Name of directory with Storm Events files. :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, this method will error out. :return: storm_event_file_name: Path to Storm Events file. If file is missing and raise_error_if_missing = False, this will be the *expected* path. :raises: ValueError: if file is missing and raise_error_if_missing = True. """ error_checking.assert_is_integer(year) error_checking.assert_is_string(directory_name) error_checking.assert_is_boolean(raise_error_if_missing) storm_event_file_name = '{0:s}/{1:s}{2:s}{3:s}'.format( directory_name, PATHLESS_FILE_PREFIX, _year_number_to_string(year), FILE_EXTENSION) if raise_error_if_missing and not os.path.isfile(storm_event_file_name): error_string = ('Cannot find Storm Events file. Expected at: {0:s}'. format(storm_event_file_name)) raise ValueError(error_string) return storm_event_file_name
def find_processed_file(directory_name, year, raise_error_if_missing=True): """Finds processed file with tornado reports. See `write_processed_file` for the definition of a "processed file". :param directory_name: Name of directory. :param year: Year (integer). :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, this method will error out. :return: processed_file_name: Path to file. If file is missing and raise_error_if_missing = True, this will be the *expected* path. :raises: ValueError: if file is missing and raise_error_if_missing = True. """ error_checking.assert_is_string(directory_name) error_checking.assert_is_integer(year) error_checking.assert_is_boolean(raise_error_if_missing) processed_file_name = '{0:s}/tornado_reports_{1:04d}.csv'.format( directory_name, year) if raise_error_if_missing and not os.path.isfile(processed_file_name): error_string = ( 'Cannot find processed file with tornado reports. Expected at: ' '{0:s}').format(processed_file_name) raise ValueError(error_string) return processed_file_name
def check_field_name(field_name, require_standard=False): """Ensures that name of model field is recognized. :param field_name: Field name in GewitterGefahr format (not the original NetCDF format). :param require_standard: Boolean flag. If True, `field_name` must be in `STANDARD_FIELD_NAMES`. If False, `field_name` must be in `FIELD_NAMES`. :raises: ValueError: if field name is unrecognized. """ error_checking.assert_is_string(field_name) error_checking.assert_is_boolean(require_standard) if require_standard: valid_field_names = STANDARD_FIELD_NAMES else: valid_field_names = FIELD_NAMES if field_name not in valid_field_names: error_string = ( '\n\n' + str(valid_field_names) + '\n\nValid field names (listed above) do not include "' + field_name + '".') raise ValueError(error_string)
def _check_input_data_for_learning( input_table, feature_names, target_name=None): """Checks input data (to machine-learning model) for errors. :param input_table: pandas DataFrame, where each row is one example (data point). :param feature_names: 1-D list with names of features (predictor variables). Each feature must be a column of input_table. :param target_name: Name of target variable (predictand). Must be a column of input_table. All values must be 0 or 1. """ error_checking.assert_is_string_list(feature_names) error_checking.assert_is_numpy_array( numpy.array(feature_names), num_dimensions=1) if target_name is None: error_checking.assert_columns_in_dataframe(input_table, feature_names) return error_checking.assert_is_string(target_name) error_checking.assert_columns_in_dataframe( input_table, feature_names + [target_name]) target_values = input_table[target_name].values error_checking.assert_is_integer_numpy_array(target_values) error_checking.assert_is_geq_numpy_array(target_values, 0) error_checking.assert_is_leq_numpy_array(target_values, 1)