예제 #1
0
def _run(upconvnet_file_name, top_example_dir_name, first_spc_date_string,
         last_spc_date_string, num_examples_per_date, downsampling_keys,
         downsampling_values, top_output_dir_name):
    """Makes predictions from trained upconvnet.

    This is effectively the main method.

    :param upconvnet_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param num_examples_per_date: Same.
    :param downsampling_keys: Same.
    :param downsampling_values: Same.
    :param top_output_dir_name: Same.
    """

    # Process input args.
    print('Reading upconvnet from: "{0:s}"...'.format(upconvnet_file_name))
    upconvnet_model_object = cnn.read_model(upconvnet_file_name)
    upconvnet_metafile_name = cnn.find_metafile(upconvnet_file_name)

    print('Reading upconvnet metadata from: "{0:s}"...'.format(
        upconvnet_metafile_name))
    upconvnet_metadata_dict = upconvnet.read_model_metadata(
        upconvnet_metafile_name)
    cnn_file_name = upconvnet_metadata_dict[upconvnet.CNN_FILE_KEY]

    print('Reading CNN from: "{0:s}"...'.format(cnn_file_name))
    cnn_model_object = cnn.read_model(cnn_file_name)
    cnn_metafile_name = cnn.find_metafile(cnn_file_name)

    print('Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name))
    cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name)
    training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    if len(downsampling_keys) > 1:
        downsampling_dict = dict(
            list(zip(downsampling_keys, downsampling_values)))
    else:
        downsampling_dict = None

    training_option_dict[
        trainval_io.SAMPLING_FRACTIONS_KEY] = downsampling_dict

    training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = (
        NUM_EXAMPLES_PER_BATCH)
    training_option_dict[
        trainval_io.FIRST_STORM_TIME_KEY] = EARLY_TIME_UNIX_SEC
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = LATE_TIME_UNIX_SEC

    # Find example files.
    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    # Do dirty work.
    for this_example_file_name in example_file_names:
        _apply_upconvnet_one_file(
            example_file_name=this_example_file_name,
            num_examples=num_examples_per_date,
            upconvnet_model_object=upconvnet_model_object,
            cnn_model_object=cnn_model_object,
            cnn_metadata_dict=cnn_metadata_dict,
            cnn_feature_layer_name=upconvnet_metadata_dict[
                upconvnet.CNN_FEATURE_LAYER_KEY],
            upconvnet_file_name=upconvnet_file_name,
            top_output_dir_name=top_output_dir_name)

        print(SEPARATOR_STRING)
예제 #2
0
def _run(input_cnn_file_name, input_upconvnet_file_name,
         cnn_feature_layer_name, top_training_dir_name,
         first_training_time_string, last_training_time_string,
         top_validation_dir_name, first_validation_time_string,
         last_validation_time_string, num_examples_per_batch, num_epochs,
         num_training_batches_per_epoch, num_validation_batches_per_epoch,
         output_model_file_name):
    """Trains upconvnet.

    This is effectively the main method.

    :param input_cnn_file_name: See documentation at top of file.
    :param input_upconvnet_file_name: Same.
    :param cnn_feature_layer_name: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_model_file_name: Same.
    """

    # Find training and validation files.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    validation_file_names = input_examples.find_many_example_files(
        top_directory_name=top_validation_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    print 'Reading trained CNN from: "{0:s}"...'.format(input_cnn_file_name)
    cnn_model_object = cnn.read_model(input_cnn_file_name)
    cnn_model_object.summary()
    print SEPARATOR_STRING

    cnn_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(input_cnn_file_name)[0])

    print 'Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name)
    cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name)

    print 'Reading upconvnet architecture from: "{0:s}"...'.format(
        input_upconvnet_file_name)
    upconvnet_model_object = cnn.read_model(input_upconvnet_file_name)
    upconvnet_model_object = keras.models.clone_model(upconvnet_model_object)

    # TODO(thunderhoser): This is a HACK.
    upconvnet_model_object.compile(loss=keras.losses.mean_squared_error,
                                   optimizer=keras.optimizers.Adam())

    print SEPARATOR_STRING
    upconvnet_model_object.summary()
    print SEPARATOR_STRING

    upconvnet_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(output_model_file_name)[0])

    print 'Writing upconvnet metadata to: "{0:s}"...'.format(
        upconvnet_metafile_name)

    upconvnet.write_model_metadata(
        cnn_file_name=input_cnn_file_name,
        cnn_feature_layer_name=cnn_feature_layer_name,
        num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_example_file_names=training_file_names,
        first_training_time_unix_sec=first_training_time_unix_sec,
        last_training_time_unix_sec=last_training_time_unix_sec,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_example_file_names=validation_file_names,
        first_validation_time_unix_sec=first_validation_time_unix_sec,
        last_validation_time_unix_sec=last_validation_time_unix_sec,
        pickle_file_name=upconvnet_metafile_name)

    print SEPARATOR_STRING

    upconvnet.train_upconvnet(
        upconvnet_model_object=upconvnet_model_object,
        output_model_file_name=output_model_file_name,
        cnn_model_object=cnn_model_object,
        cnn_feature_layer_name=cnn_feature_layer_name,
        cnn_metadata_dict=cnn_metadata_dict,
        num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_example_file_names=training_file_names,
        first_training_time_unix_sec=first_training_time_unix_sec,
        last_training_time_unix_sec=last_training_time_unix_sec,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_example_file_names=validation_file_names,
        first_validation_time_unix_sec=first_validation_time_unix_sec,
        last_validation_time_unix_sec=last_validation_time_unix_sec)
예제 #3
0
def _run(input_cnn_file_name, input_upconvnet_file_name,
         cnn_feature_layer_name, top_training_dir_name,
         first_training_time_string, last_training_time_string,
         top_validation_dir_name, first_validation_time_string,
         last_validation_time_string, num_examples_per_batch, num_epochs,
         num_training_batches_per_epoch, num_validation_batches_per_epoch,
         output_dir_name):
    """Trains upconvnet.

    This is effectively the main method.

    :param input_cnn_file_name: See documentation at top of file.
    :param input_upconvnet_file_name: Same.
    :param cnn_feature_layer_name: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)
    # argument_file_name = '{0:s}/input_args.p'.format(output_dir_name)
    # print('Writing input args to: "{0:s}"...'.format(argument_file_name))
    #
    # argument_file_handle = open(argument_file_name, 'wb')
    # pickle.dump(INPUT_ARG_OBJECT.__dict__, argument_file_handle)
    # argument_file_handle.close()
    #
    # return

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    # Find training and validation files.
    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    validation_file_names = input_examples.find_many_example_files(
        top_directory_name=top_validation_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    # Read trained CNN.
    print('Reading trained CNN from: "{0:s}"...'.format(input_cnn_file_name))
    cnn_model_object = cnn.read_model(input_cnn_file_name)
    cnn_model_object.summary()
    print(SEPARATOR_STRING)

    cnn_metafile_name = cnn.find_metafile(model_file_name=input_cnn_file_name,
                                          raise_error_if_missing=True)

    print('Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name))
    cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name)

    # Read architecture.
    print('Reading upconvnet architecture from: "{0:s}"...'.format(
        input_upconvnet_file_name))
    upconvnet_model_object = cnn.read_model(input_upconvnet_file_name)
    # upconvnet_model_object = keras.models.clone_model(upconvnet_model_object)

    # TODO(thunderhoser): This is a HACK.
    upconvnet_model_object.compile(loss=keras.losses.mean_squared_error,
                                   optimizer=keras.optimizers.Adam())

    upconvnet_model_object.summary()
    print(SEPARATOR_STRING)

    upconvnet_metafile_name = cnn.find_metafile(
        model_file_name='{0:s}/foo.h5'.format(output_dir_name),
        raise_error_if_missing=False)
    print('Writing upconvnet metadata to: "{0:s}"...'.format(
        upconvnet_metafile_name))

    upconvnet.write_model_metadata(
        cnn_file_name=input_cnn_file_name,
        cnn_feature_layer_name=cnn_feature_layer_name,
        num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_example_file_names=training_file_names,
        first_training_time_unix_sec=first_training_time_unix_sec,
        last_training_time_unix_sec=last_training_time_unix_sec,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_example_file_names=validation_file_names,
        first_validation_time_unix_sec=first_validation_time_unix_sec,
        last_validation_time_unix_sec=last_validation_time_unix_sec,
        pickle_file_name=upconvnet_metafile_name)

    print(SEPARATOR_STRING)

    upconvnet.train_upconvnet(
        upconvnet_model_object=upconvnet_model_object,
        output_dir_name=output_dir_name,
        cnn_model_object=cnn_model_object,
        cnn_metadata_dict=cnn_metadata_dict,
        cnn_feature_layer_name=cnn_feature_layer_name,
        num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_example_file_names=training_file_names,
        first_training_time_unix_sec=first_training_time_unix_sec,
        last_training_time_unix_sec=last_training_time_unix_sec,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_example_file_names=validation_file_names,
        first_validation_time_unix_sec=first_validation_time_unix_sec,
        last_validation_time_unix_sec=last_validation_time_unix_sec)
def _train_one_upconvnet(gpu_queue, argument_dict):
    """Trains one upconvolutional network.

    :param gpu_queue: GPU queue (instance of `multiprocessing.Manager.Queue`).
    :param argument_dict: Dictionary of CNN arguments, where each key is an
        input arg to the script train_upconvnet.py.
    """

    import keras
    from keras import backend as K
    import tensorflow
    from gewittergefahr.deep_learning import cnn
    from gewittergefahr.deep_learning import upconvnet
    from gewittergefahr.scripts import train_upconvnet

    gpu_index = -1

    try:
        # Deal with GPU business.
        gpu_index = int(gpu_queue.get())
        os.environ['CUDA_VISIBLE_DEVICES'] = '{0:d}'.format(gpu_index)

        session_object = tensorflow.Session(
            config=tensorflow.ConfigProto(
                intra_op_parallelism_threads=7, inter_op_parallelism_threads=7,
                allow_soft_placement=False, log_device_placement=False,
                gpu_options=tensorflow.GPUOptions(allow_growth=True)
            )
        )

        K.set_session(session_object)

        # Write metadata.
        upconvnet_metadata_dict = _write_metadata_one_model(argument_dict)

        # Read trained CNN.
        cnn_file_name = argument_dict[train_upconvnet.CNN_FILE_ARG_NAME]
        cnn_metafile_name = cnn.find_metafile(cnn_file_name)

        print('Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name))
        cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name)

        upconvnet_template_file_name = argument_dict[
            train_upconvnet.UPCONVNET_FILE_ARG_NAME]

        with tensorflow.device('/gpu:0'):
            print('Reading trained CNN from: "{0:s}"...'.format(cnn_file_name))
            cnn_model_object = cnn.read_model(cnn_file_name)

            print('Reading upconvnet architecture from: "{0:s}"...'.format(
                upconvnet_template_file_name
            ))
            upconvnet_model_object = cnn.read_model(
                upconvnet_template_file_name)

        upconvnet_model_object.compile(
            loss=keras.losses.mean_squared_error,
            optimizer=keras.optimizers.Adam()
        )

        print(SEPARATOR_STRING)
        upconvnet_model_object.summary()
        print(SEPARATOR_STRING)

        # Train upconvnet.
        print('Training upconvnet on GPU {0:d}...'.format(gpu_index))
        print(SEPARATOR_STRING)

        upconvnet.train_upconvnet(
            upconvnet_model_object=upconvnet_model_object,
            output_dir_name=argument_dict[train_upconvnet.OUTPUT_DIR_ARG_NAME],
            cnn_model_object=cnn_model_object,
            cnn_metadata_dict=cnn_metadata_dict,
            cnn_feature_layer_name=
            upconvnet_metadata_dict[upconvnet.CNN_FEATURE_LAYER_KEY],
            num_epochs=upconvnet_metadata_dict[upconvnet.NUM_EPOCHS_KEY],
            num_examples_per_batch=
            upconvnet_metadata_dict[upconvnet.NUM_EXAMPLES_PER_BATCH_KEY],
            num_training_batches_per_epoch=
            upconvnet_metadata_dict[upconvnet.NUM_TRAINING_BATCHES_KEY],
            training_example_file_names=
            upconvnet_metadata_dict[upconvnet.TRAINING_FILES_KEY],
            first_training_time_unix_sec=
            upconvnet_metadata_dict[upconvnet.FIRST_TRAINING_TIME_KEY],
            last_training_time_unix_sec=
            upconvnet_metadata_dict[upconvnet.LAST_TRAINING_TIME_KEY],
            num_validation_batches_per_epoch=
            upconvnet_metadata_dict[upconvnet.NUM_VALIDATION_BATCHES_KEY],
            validation_example_file_names=
            upconvnet_metadata_dict[upconvnet.VALIDATION_FILES_KEY],
            first_validation_time_unix_sec=
            upconvnet_metadata_dict[upconvnet.FIRST_VALIDATION_TIME_KEY],
            last_validation_time_unix_sec=
            upconvnet_metadata_dict[upconvnet.LAST_VALIDATION_TIME_KEY]
        )

        session_object.close()
        del session_object
        gpu_queue.put(gpu_index)

    except Exception as this_exception:
        if gpu_index >= 0:
            gpu_queue.put(gpu_index)

        print(traceback.format_exc())
        raise this_exception
예제 #5
0
def _run(model_file_name, example_file_name, first_time_string,
         last_time_string, top_output_dir_name):
    """Applies CNN to one example file.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param example_file_name: Same.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param top_output_dir_name: Same.
    """

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    model_directory_name, _ = os.path.split(model_file_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(model_directory_name)

    print('Reading metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)

    training_option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = None
    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = [example_file_name]
    training_option_dict[
        trainval_io.FIRST_STORM_TIME_KEY] = first_time_unix_sec
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = last_time_unix_sec

    if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            list_of_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY],
            num_examples_total=LARGE_INTEGER)

    elif model_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict, num_examples_total=LARGE_INTEGER)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict, num_examples_total=LARGE_INTEGER)

    include_soundings = (training_option_dict[trainval_io.SOUNDING_FIELDS_KEY]
                         is not None)

    try:
        storm_object_dict = next(generator_object)
    except StopIteration:
        storm_object_dict = None

    print(SEPARATOR_STRING)

    if storm_object_dict is not None:
        observed_labels = storm_object_dict[testing_io.TARGET_ARRAY_KEY]
        list_of_predictor_matrices = storm_object_dict[
            testing_io.INPUT_MATRICES_KEY]

        if include_soundings:
            sounding_matrix = list_of_predictor_matrices[-1]
        else:
            sounding_matrix = None

        if model_metadata_dict[cnn.CONV_2D3D_KEY]:
            if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]:
                class_probability_matrix = cnn.apply_2d_or_3d_cnn(
                    model_object=model_object,
                    radar_image_matrix=list_of_predictor_matrices[0],
                    sounding_matrix=sounding_matrix,
                    verbose=True)
            else:
                class_probability_matrix = cnn.apply_2d3d_cnn(
                    model_object=model_object,
                    reflectivity_matrix_dbz=list_of_predictor_matrices[0],
                    azimuthal_shear_matrix_s01=list_of_predictor_matrices[1],
                    sounding_matrix=sounding_matrix,
                    verbose=True)
        else:
            class_probability_matrix = cnn.apply_2d_or_3d_cnn(
                model_object=model_object,
                radar_image_matrix=list_of_predictor_matrices[0],
                sounding_matrix=sounding_matrix,
                verbose=True)

        print(SEPARATOR_STRING)
        num_examples = class_probability_matrix.shape[0]

        for k in [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
            print(
                '{0:d}th percentile of {1:d} forecast probs = {2:.4f}'.format(
                    k, num_examples,
                    numpy.percentile(class_probability_matrix[:, 1], k)))

        print('\n')

    target_param_dict = target_val_utils.target_name_to_params(
        training_option_dict[trainval_io.TARGET_NAME_KEY])

    event_type_string = target_param_dict[target_val_utils.EVENT_TYPE_KEY]
    if event_type_string == linkage.TORNADO_EVENT_STRING:
        genesis_only = False
    elif event_type_string == linkage.TORNADOGENESIS_EVENT_STRING:
        genesis_only = True
    else:
        genesis_only = None

    target_name = target_val_utils.target_params_to_name(
        min_lead_time_sec=target_param_dict[
            target_val_utils.MIN_LEAD_TIME_KEY],
        max_lead_time_sec=target_param_dict[
            target_val_utils.MAX_LEAD_TIME_KEY],
        min_link_distance_metres=target_param_dict[
            target_val_utils.MIN_LINKAGE_DISTANCE_KEY],
        max_link_distance_metres=10000.,
        genesis_only=genesis_only)

    output_file_name = prediction_io.find_file(
        top_prediction_dir_name=top_output_dir_name,
        first_init_time_unix_sec=first_time_unix_sec,
        last_init_time_unix_sec=last_time_unix_sec,
        gridded=False,
        raise_error_if_missing=False)

    print('Writing "{0:s}" predictions to: "{1:s}"...'.format(
        target_name, output_file_name))

    if storm_object_dict is None:
        num_output_neurons = (
            model_object.layers[-1].output.get_shape().as_list()[-1])

        num_classes = max([num_output_neurons, 2])
        class_probability_matrix = numpy.full((0, num_classes), numpy.nan)

        prediction_io.write_ungridded_predictions(
            netcdf_file_name=output_file_name,
            class_probability_matrix=class_probability_matrix,
            storm_ids=[],
            storm_times_unix_sec=numpy.array([], dtype=int),
            target_name=target_name,
            observed_labels=numpy.array([], dtype=int))

        return

    prediction_io.write_ungridded_predictions(
        netcdf_file_name=output_file_name,
        class_probability_matrix=class_probability_matrix,
        storm_ids=storm_object_dict[testing_io.FULL_IDS_KEY],
        storm_times_unix_sec=storm_object_dict[testing_io.STORM_TIMES_KEY],
        target_name=target_name,
        observed_labels=observed_labels)
예제 #6
0
def _run(model_file_name, top_example_dir_name, first_spc_date_string,
         last_spc_date_string, num_examples, do_backwards_test,
         separate_radar_heights, downsampling_keys, downsampling_values,
         num_bootstrap_reps, output_file_name):
    """Runs permutation test for predictor importance.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param num_examples: Same.
    :param do_backwards_test: Same.
    :param separate_radar_heights: Same.
    :param downsampling_keys: Same.
    :param downsampling_values: Same.
    :param num_bootstrap_reps: Same.
    :param output_file_name: Same.
    """

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    metafile_name = cnn.find_metafile(model_file_name=model_file_name)
    print('Reading metadata from: "{0:s}"...'.format(metafile_name))
    cnn_metadata_dict = cnn.read_model_metadata(metafile_name)

    if len(downsampling_keys) > 1:
        downsampling_dict = dict(
            list(zip(downsampling_keys, downsampling_values)))
    else:
        downsampling_dict = None

    training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]
    training_option_dict[
        trainval_io.SAMPLING_FRACTIONS_KEY] = downsampling_dict

    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names
    training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = (
        time_conversion.get_start_of_spc_date(first_spc_date_string))
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = (
        time_conversion.get_end_of_spc_date(last_spc_date_string))
    training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = (
        NUM_EXAMPLES_PER_BATCH)

    if cnn_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            desired_num_examples=num_examples,
            list_of_operation_dicts=cnn_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY])

    elif cnn_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict,
            desired_num_examples=num_examples)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict,
            desired_num_examples=num_examples)

    full_storm_id_strings = []
    storm_times_unix_sec = numpy.array([], dtype=int)
    target_values = numpy.array([], dtype=int)
    predictor_matrices = None

    print(SEPARATOR_STRING)

    for _ in range(len(example_file_names)):
        try:
            this_storm_object_dict = next(generator_object)
            print(SEPARATOR_STRING)
        except StopIteration:
            break

        full_storm_id_strings += this_storm_object_dict[
            testing_io.FULL_IDS_KEY]
        storm_times_unix_sec = numpy.concatenate(
            (storm_times_unix_sec,
             this_storm_object_dict[testing_io.STORM_TIMES_KEY]))

        these_target_values = this_storm_object_dict[
            testing_io.TARGET_ARRAY_KEY]
        if len(these_target_values.shape) > 1:
            these_target_values = numpy.argmax(these_target_values, axis=1)

        target_values = numpy.concatenate((target_values, these_target_values))

        these_predictor_matrices = this_storm_object_dict[
            testing_io.INPUT_MATRICES_KEY]

        if predictor_matrices is None:
            predictor_matrices = copy.deepcopy(these_predictor_matrices)
        else:
            for k in range(len(predictor_matrices)):
                predictor_matrices[k] = numpy.concatenate(
                    (predictor_matrices[k], these_predictor_matrices[k]))

    print(SEPARATOR_STRING)
    correlation_matrix, predictor_names = correlation.get_pearson_correlations(
        predictor_matrices=predictor_matrices,
        cnn_metadata_dict=cnn_metadata_dict,
        separate_radar_heights=separate_radar_heights)
    print(SEPARATOR_STRING)

    num_predictors = len(predictor_names)

    for i in range(num_predictors):
        for j in range(i, num_predictors):
            print(('Pearson correlation between "{0:s}" and "{1:s}" = {2:.3f}'
                   ).format(predictor_names[i], predictor_names[j],
                            correlation_matrix[i, j]))

    print(SEPARATOR_STRING)

    if do_backwards_test:
        result_dict = permutation.run_backwards_test(
            model_object=model_object,
            predictor_matrices=predictor_matrices,
            target_values=target_values,
            cnn_metadata_dict=cnn_metadata_dict,
            cost_function=permutation_utils.negative_auc_function,
            separate_radar_heights=separate_radar_heights,
            num_bootstrap_reps=num_bootstrap_reps)
    else:
        result_dict = permutation.run_forward_test(
            model_object=model_object,
            predictor_matrices=predictor_matrices,
            target_values=target_values,
            cnn_metadata_dict=cnn_metadata_dict,
            cost_function=permutation_utils.negative_auc_function,
            separate_radar_heights=separate_radar_heights,
            num_bootstrap_reps=num_bootstrap_reps)

    print(SEPARATOR_STRING)

    result_dict[permutation_utils.MODEL_FILE_KEY] = model_file_name
    result_dict[permutation_utils.TARGET_VALUES_KEY] = target_values
    result_dict[permutation_utils.FULL_IDS_KEY] = full_storm_id_strings
    result_dict[permutation_utils.STORM_TIMES_KEY] = storm_times_unix_sec

    print('Writing results to: "{0:s}"...'.format(output_file_name))
    permutation_utils.write_results(result_dict=result_dict,
                                    pickle_file_name=output_file_name)
def _run(model_file_name, top_example_dir_name, storm_metafile_name,
         num_examples, do_backwards_test, separate_radar_heights,
         num_bootstrap_reps, output_file_name):
    """Runs permutation test with specific examples (storm objects).

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param storm_metafile_name: Same.
    :param num_examples: Same.
    :param do_backwards_test: Same.
    :param separate_radar_heights: Same.
    :param num_bootstrap_reps: Same.
    :param output_file_name: Same.
    """

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)
    metafile_name = cnn.find_metafile(model_file_name=model_file_name)

    print('Reading metadata from: "{0:s}"...'.format(metafile_name))
    cnn_metadata_dict = cnn.read_model_metadata(metafile_name)
    training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    print(
        'Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name))
    full_storm_id_strings, storm_times_unix_sec = (
        tracking_io.read_ids_and_times(storm_metafile_name))
    print(SEPARATOR_STRING)

    if 0 < num_examples < len(full_storm_id_strings):
        numpy.random.seed(RANDOM_SEED)
        good_indices = numpy.random.permutation(len(full_storm_id_strings))
        good_indices = good_indices[:num_examples]

        full_storm_id_strings = [
            full_storm_id_strings[k] for k in good_indices
        ]
        storm_times_unix_sec = storm_times_unix_sec[good_indices]

    example_dict = testing_io.read_predictors_specific_examples(
        top_example_dir_name=top_example_dir_name,
        desired_full_id_strings=full_storm_id_strings,
        desired_times_unix_sec=storm_times_unix_sec,
        option_dict=training_option_dict,
        layer_operation_dicts=cnn_metadata_dict[cnn.LAYER_OPERATIONS_KEY])
    print(SEPARATOR_STRING)

    predictor_matrices = example_dict[testing_io.INPUT_MATRICES_KEY]
    target_values = example_dict[testing_io.TARGET_ARRAY_KEY]

    correlation_matrix, predictor_names = correlation.get_pearson_correlations(
        predictor_matrices=predictor_matrices,
        cnn_metadata_dict=cnn_metadata_dict,
        separate_radar_heights=separate_radar_heights)
    print(SEPARATOR_STRING)

    num_predictors = len(predictor_names)

    for i in range(num_predictors):
        for j in range(i, num_predictors):
            print(('Pearson correlation between "{0:s}" and "{1:s}" = {2:.3f}'
                   ).format(predictor_names[i], predictor_names[j],
                            correlation_matrix[i, j]))

    print(SEPARATOR_STRING)

    if do_backwards_test:
        result_dict = permutation.run_backwards_test(
            model_object=model_object,
            predictor_matrices=predictor_matrices,
            target_values=target_values,
            cnn_metadata_dict=cnn_metadata_dict,
            cost_function=permutation_utils.negative_auc_function,
            separate_radar_heights=separate_radar_heights,
            num_bootstrap_reps=num_bootstrap_reps)
    else:
        result_dict = permutation.run_forward_test(
            model_object=model_object,
            predictor_matrices=predictor_matrices,
            target_values=target_values,
            cnn_metadata_dict=cnn_metadata_dict,
            cost_function=permutation_utils.negative_auc_function,
            separate_radar_heights=separate_radar_heights,
            num_bootstrap_reps=num_bootstrap_reps)

    print(SEPARATOR_STRING)

    result_dict[permutation_utils.MODEL_FILE_KEY] = model_file_name
    result_dict[permutation_utils.TARGET_VALUES_KEY] = target_values
    result_dict[permutation_utils.FULL_IDS_KEY] = full_storm_id_strings
    result_dict[permutation_utils.STORM_TIMES_KEY] = storm_times_unix_sec

    print('Writing results to: "{0:s}"...'.format(output_file_name))
    permutation_utils.write_results(result_dict=result_dict,
                                    pickle_file_name=output_file_name)
예제 #8
0
def _run(model_file_name, top_example_dir_name, first_spc_date_string,
         last_spc_date_string, num_examples, num_bootstrap_reps,
         confidence_level, class_fraction_keys, class_fraction_values,
         output_dir_name):
    """Evaluates CNN (convolutional neural net) predictions.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param num_examples: Same.
    :param num_bootstrap_reps: Same.
    :param confidence_level: Same.
    :param class_fraction_keys: Same.
    :param class_fraction_values: Same.
    :param output_dir_name: Same.
    :raises: ValueError: if the model does multi-class classification.
    """

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    num_output_neurons = (
        model_object.layers[-1].output.get_shape().as_list()[-1])

    if num_output_neurons > 2:
        error_string = (
            'The model has {0:d} output neurons, which suggests {0:d}-class '
            'classification.  This script handles only binary classification.'
        ).format(num_output_neurons)

        raise ValueError(error_string)

    soundings_only = False

    if isinstance(model_object.input, list):
        list_of_input_tensors = model_object.input
    else:
        list_of_input_tensors = [model_object.input]

    if len(list_of_input_tensors) == 1:
        these_spatial_dim = numpy.array(
            list_of_input_tensors[0].get_shape().as_list()[1:-1], dtype=int)
        soundings_only = len(these_spatial_dim) == 1

    model_directory_name, _ = os.path.split(model_file_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(model_directory_name)

    print('Reading metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    if len(class_fraction_keys) > 1:
        class_to_sampling_fraction_dict = dict(
            list(zip(class_fraction_keys, class_fraction_values)))
    else:
        class_to_sampling_fraction_dict = None

    training_option_dict[
        trainval_io.SAMPLING_FRACTIONS_KEY] = class_to_sampling_fraction_dict

    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names
    training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = (
        time_conversion.get_start_of_spc_date(first_spc_date_string))
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = (
        time_conversion.get_end_of_spc_date(last_spc_date_string))

    if soundings_only:
        generator_object = testing_io.sounding_generator(
            option_dict=training_option_dict, num_examples_total=num_examples)

    elif model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            list_of_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY],
            num_examples_total=num_examples)

    elif model_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict, num_examples_total=num_examples)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict, num_examples_total=num_examples)

    include_soundings = (training_option_dict[trainval_io.SOUNDING_FIELDS_KEY]
                         is not None)

    forecast_probabilities = numpy.array([])
    observed_labels = numpy.array([], dtype=int)

    for _ in range(len(example_file_names)):
        try:
            this_storm_object_dict = next(generator_object)
            print(SEPARATOR_STRING)
        except StopIteration:
            break

        observed_labels = numpy.concatenate(
            (observed_labels,
             this_storm_object_dict[testing_io.TARGET_ARRAY_KEY]))

        if soundings_only:
            these_predictor_matrices = [
                this_storm_object_dict[testing_io.SOUNDING_MATRIX_KEY]
            ]
        else:
            these_predictor_matrices = this_storm_object_dict[
                testing_io.INPUT_MATRICES_KEY]

        if include_soundings:
            this_sounding_matrix = these_predictor_matrices[-1]
        else:
            this_sounding_matrix = None

        if soundings_only:
            this_probability_matrix = cnn.apply_cnn_soundings_only(
                model_object=model_object,
                sounding_matrix=this_sounding_matrix,
                verbose=True)
        elif model_metadata_dict[cnn.CONV_2D3D_KEY]:
            if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]:
                this_probability_matrix = cnn.apply_2d_or_3d_cnn(
                    model_object=model_object,
                    radar_image_matrix=these_predictor_matrices[0],
                    sounding_matrix=this_sounding_matrix,
                    verbose=True)
            else:
                this_probability_matrix = cnn.apply_2d3d_cnn(
                    model_object=model_object,
                    reflectivity_matrix_dbz=these_predictor_matrices[0],
                    azimuthal_shear_matrix_s01=these_predictor_matrices[1],
                    sounding_matrix=this_sounding_matrix,
                    verbose=True)
        else:
            this_probability_matrix = cnn.apply_2d_or_3d_cnn(
                model_object=model_object,
                radar_image_matrix=these_predictor_matrices[0],
                sounding_matrix=this_sounding_matrix,
                verbose=True)

        print(SEPARATOR_STRING)

        forecast_probabilities = numpy.concatenate(
            (forecast_probabilities, this_probability_matrix[:, -1]))

    model_eval_helper.run_evaluation(
        forecast_probabilities=forecast_probabilities,
        observed_labels=observed_labels,
        num_bootstrap_reps=num_bootstrap_reps,
        confidence_level=confidence_level,
        output_dir_name=output_dir_name)
예제 #9
0
def _run(model_file_name, target_class, target_layer_name,
         top_example_dir_name, storm_metafile_name, num_examples,
         output_file_name):
    """Runs Grad-CAM (gradient-weighted class-activation maps).

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param target_class: Same.
    :param target_layer_name: Same.
    :param top_example_dir_name: Same.
    :param storm_metafile_name: Same.
    :param num_examples: Same.
    :param output_file_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    # Read model and metadata.
    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    model_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0])

    print(
        'Reading model metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]
    training_option_dict[trainval_io.REFLECTIVITY_MASK_KEY] = None

    print(
        'Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name))
    full_id_strings, storm_times_unix_sec = tracking_io.read_ids_and_times(
        storm_metafile_name)

    print(SEPARATOR_STRING)

    if 0 < num_examples < len(full_id_strings):
        full_id_strings = full_id_strings[:num_examples]
        storm_times_unix_sec = storm_times_unix_sec[:num_examples]

    list_of_input_matrices, sounding_pressure_matrix_pascals = (
        testing_io.read_specific_examples(
            top_example_dir_name=top_example_dir_name,
            desired_full_id_strings=full_id_strings,
            desired_times_unix_sec=storm_times_unix_sec,
            option_dict=training_option_dict,
            list_of_layer_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY]))
    print(SEPARATOR_STRING)

    list_of_cam_matrices = None
    list_of_guided_cam_matrices = None
    new_model_object = None

    num_examples = len(full_id_strings)

    for i in range(num_examples):
        print('Running Grad-CAM for example {0:d} of {1:d}...'.format(
            i + 1, num_examples))

        these_input_matrices = [a[[i], ...] for a in list_of_input_matrices]
        these_cam_matrices = gradcam.run_gradcam(
            model_object=model_object,
            list_of_input_matrices=these_input_matrices,
            target_class=target_class,
            target_layer_name=target_layer_name)

        print('Running guided Grad-CAM for example {0:d} of {1:d}...'.format(
            i + 1, num_examples))

        these_guided_cam_matrices, new_model_object = (
            gradcam.run_guided_gradcam(
                orig_model_object=model_object,
                list_of_input_matrices=these_input_matrices,
                target_layer_name=target_layer_name,
                list_of_cam_matrices=these_cam_matrices,
                new_model_object=new_model_object))

        if list_of_cam_matrices is None:
            list_of_cam_matrices = copy.deepcopy(these_cam_matrices)
            list_of_guided_cam_matrices = copy.deepcopy(
                these_guided_cam_matrices)
        else:
            for j in range(len(these_cam_matrices)):
                if list_of_cam_matrices[j] is None:
                    continue

                list_of_cam_matrices[j] = numpy.concatenate(
                    (list_of_cam_matrices[j], these_cam_matrices[j]), axis=0)

                list_of_guided_cam_matrices[j] = numpy.concatenate(
                    (list_of_guided_cam_matrices[j],
                     these_guided_cam_matrices[j]),
                    axis=0)

    print(SEPARATOR_STRING)
    upsample_refl = training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]

    if upsample_refl:
        list_of_cam_matrices[0] = numpy.expand_dims(list_of_cam_matrices[0],
                                                    axis=-1)

        num_channels = list_of_input_matrices[0].shape[-1]
        list_of_cam_matrices[0] = numpy.repeat(a=list_of_cam_matrices[0],
                                               repeats=num_channels,
                                               axis=-1)

        list_of_cam_matrices = trainval_io.separate_shear_and_reflectivity(
            list_of_input_matrices=list_of_cam_matrices,
            training_option_dict=training_option_dict)

        list_of_cam_matrices[0] = list_of_cam_matrices[0][..., 0]
        list_of_cam_matrices[1] = list_of_cam_matrices[1][..., 0]

    list_of_guided_cam_matrices = trainval_io.separate_shear_and_reflectivity(
        list_of_input_matrices=list_of_guided_cam_matrices,
        training_option_dict=training_option_dict)

    print('Denormalizing predictors...')
    list_of_input_matrices = trainval_io.separate_shear_and_reflectivity(
        list_of_input_matrices=list_of_input_matrices,
        training_option_dict=training_option_dict)

    list_of_input_matrices = model_interpretation.denormalize_data(
        list_of_input_matrices=list_of_input_matrices,
        model_metadata_dict=model_metadata_dict)

    print('Writing class-activation maps to file: "{0:s}"...'.format(
        output_file_name))

    gradcam.write_standard_file(
        pickle_file_name=output_file_name,
        list_of_input_matrices=list_of_input_matrices,
        list_of_cam_matrices=list_of_cam_matrices,
        list_of_guided_cam_matrices=list_of_guided_cam_matrices,
        model_file_name=model_file_name,
        full_id_strings=full_id_strings,
        storm_times_unix_sec=storm_times_unix_sec,
        target_class=target_class,
        target_layer_name=target_layer_name,
        sounding_pressure_matrix_pascals=sounding_pressure_matrix_pascals)
예제 #10
0
def _run(model_file_name, target_class, target_layer_name,
         top_example_dir_name, storm_metafile_name, num_examples,
         output_file_name):
    """Runs Grad-CAM (gradient-weighted class-activation maps).

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param target_class: Same.
    :param target_layer_name: Same.
    :param top_example_dir_name: Same.
    :param storm_metafile_name: Same.
    :param num_examples: Same.
    :param output_file_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    # Read model and metadata.
    print 'Reading model from: "{0:s}"...'.format(model_file_name)
    model_object = cnn.read_model(model_file_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0])

    print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]
    training_option_dict[trainval_io.REFLECTIVITY_MASK_KEY] = None

    print 'Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name)
    storm_ids, storm_times_unix_sec = tracking_io.read_ids_and_times(
        storm_metafile_name)
    print SEPARATOR_STRING

    if 0 < num_examples < len(storm_ids):
        storm_ids = storm_ids[:num_examples]
        storm_times_unix_sec = storm_times_unix_sec[:num_examples]

    list_of_input_matrices, sounding_pressure_matrix_pascals = (
        testing_io.read_specific_examples(
            top_example_dir_name=top_example_dir_name,
            desired_storm_ids=storm_ids,
            desired_times_unix_sec=storm_times_unix_sec,
            option_dict=training_option_dict,
            list_of_layer_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY]))
    print SEPARATOR_STRING

    class_activation_matrix = None
    ggradcam_output_matrix = None
    new_model_object = None

    num_examples = len(storm_ids)

    for i in range(num_examples):
        print 'Running Grad-CAM for example {0:d} of {1:d}...'.format(
            i + 1, num_examples)

        these_input_matrices = [a[[i], ...] for a in list_of_input_matrices]
        this_class_activation_matrix = gradcam.run_gradcam(
            model_object=model_object,
            list_of_input_matrices=these_input_matrices,
            target_class=target_class,
            target_layer_name=target_layer_name)

        print 'Running guided Grad-CAM for example {0:d} of {1:d}...'.format(
            i + 1, num_examples)

        this_ggradcam_output_matrix, new_model_object = (
            gradcam.run_guided_gradcam(
                orig_model_object=model_object,
                list_of_input_matrices=these_input_matrices,
                target_layer_name=target_layer_name,
                class_activation_matrix=this_class_activation_matrix,
                new_model_object=new_model_object))

        this_class_activation_matrix = numpy.expand_dims(
            this_class_activation_matrix, axis=0)
        this_ggradcam_output_matrix = numpy.expand_dims(
            this_ggradcam_output_matrix, axis=0)

        if class_activation_matrix is None:
            class_activation_matrix = this_class_activation_matrix + 0.
            ggradcam_output_matrix = this_ggradcam_output_matrix + 0.
        else:
            class_activation_matrix = numpy.concatenate(
                (class_activation_matrix, this_class_activation_matrix),
                axis=0)
            ggradcam_output_matrix = numpy.concatenate(
                (ggradcam_output_matrix, this_ggradcam_output_matrix), axis=0)

    print SEPARATOR_STRING

    print 'Denormalizing predictors...'
    list_of_input_matrices = model_interpretation.denormalize_data(
        list_of_input_matrices=list_of_input_matrices,
        model_metadata_dict=model_metadata_dict)

    print 'Writing class-activation maps to file: "{0:s}"...'.format(
        output_file_name)
    gradcam.write_standard_file(
        pickle_file_name=output_file_name,
        list_of_input_matrices=list_of_input_matrices,
        class_activation_matrix=class_activation_matrix,
        ggradcam_output_matrix=ggradcam_output_matrix,
        model_file_name=model_file_name,
        storm_ids=storm_ids,
        storm_times_unix_sec=storm_times_unix_sec,
        target_class=target_class,
        target_layer_name=target_layer_name,
        sounding_pressure_matrix_pascals=sounding_pressure_matrix_pascals)
예제 #11
0
def _run(model_file_name, init_function_name, storm_metafile_name,
         num_examples, top_example_dir_name, component_type_string,
         target_class, layer_name, neuron_indices, channel_index,
         num_iterations, ideal_activation, learning_rate, output_file_name):
    """Runs backwards optimization on a trained CNN.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param init_function_name: Same.
    :param storm_metafile_name: Same.
    :param num_examples: Same.
    :param top_example_dir_name: Same.
    :param component_type_string: Same.
    :param target_class: Same.
    :param layer_name: Same.
    :param neuron_indices: Same.
    :param channel_index: Same.
    :param num_iterations: Same.
    :param ideal_activation: Same.
    :param learning_rate: Same.
    :param output_file_name: Same.
    """

    model_interpretation.check_component_type(component_type_string)

    if ideal_activation <= 0:
        ideal_activation = None
    if init_function_name in ['', 'None']:
        init_function_name = None

    model_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0])

    print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)

    if init_function_name is None:
        print 'Reading storm metadata from: "{0:s}"...'.format(
            storm_metafile_name)

        storm_ids, storm_times_unix_sec = tracking_io.read_ids_and_times(
            storm_metafile_name)

        if 0 < num_examples < len(storm_ids):
            storm_ids = storm_ids[:num_examples]
            storm_times_unix_sec = storm_times_unix_sec[:num_examples]

        list_of_init_matrices = testing_io.read_specific_examples(
            desired_storm_ids=storm_ids,
            desired_times_unix_sec=storm_times_unix_sec,
            option_dict=model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY],
            top_example_dir_name=top_example_dir_name,
            list_of_layer_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY])[0]

        num_examples = list_of_init_matrices[0].shape[0]
        print SEPARATOR_STRING

    else:
        storm_ids = None
        storm_times_unix_sec = None
        num_examples = 1

        init_function = _create_initializer(
            init_function_name=init_function_name,
            model_metadata_dict=model_metadata_dict)

    print 'Reading model from: "{0:s}"...'.format(model_file_name)
    model_object = cnn.read_model(model_file_name)

    list_of_optimized_matrices = None

    for i in range(num_examples):
        if init_function_name is None:
            this_init_arg = [a[[i], ...] for a in list_of_init_matrices]
        else:
            this_init_arg = init_function

        if component_type_string == CLASS_COMPONENT_TYPE_STRING:
            print(
                '\nOptimizing {0:d}th of {1:d} images for target class {2:d}...'
            ).format(i + 1, num_examples, target_class)

            these_optimized_matrices = backwards_opt.optimize_input_for_class(
                model_object=model_object,
                target_class=target_class,
                init_function_or_matrices=this_init_arg,
                num_iterations=num_iterations,
                learning_rate=learning_rate)

        elif component_type_string == NEURON_COMPONENT_TYPE_STRING:
            print(
                '\nOptimizing {0:d}th of {1:d} images for neuron {2:s} in layer'
                ' "{3:s}"...').format(i + 1, num_examples, str(neuron_indices),
                                      layer_name)

            these_optimized_matrices = backwards_opt.optimize_input_for_neuron(
                model_object=model_object,
                layer_name=layer_name,
                neuron_indices=neuron_indices,
                init_function_or_matrices=this_init_arg,
                num_iterations=num_iterations,
                learning_rate=learning_rate,
                ideal_activation=ideal_activation)

        else:
            print(
                '\nOptimizing {0:d}th of {1:d} images for channel {2:d} in '
                'layer "{3:s}"...').format(i + 1, num_examples, channel_index,
                                           layer_name)

            these_optimized_matrices = backwards_opt.optimize_input_for_channel(
                model_object=model_object,
                layer_name=layer_name,
                channel_index=channel_index,
                init_function_or_matrices=this_init_arg,
                stat_function_for_neuron_activations=K.max,
                num_iterations=num_iterations,
                learning_rate=learning_rate,
                ideal_activation=ideal_activation)

        if list_of_optimized_matrices is None:
            num_matrices = len(these_optimized_matrices)
            list_of_optimized_matrices = [None] * num_matrices

        for k in range(len(list_of_optimized_matrices)):
            if list_of_optimized_matrices[k] is None:
                list_of_optimized_matrices[
                    k] = these_optimized_matrices[k] + 0.
            else:
                list_of_optimized_matrices[k] = numpy.concatenate(
                    (list_of_optimized_matrices[k],
                     these_optimized_matrices[k]),
                    axis=0)

    print SEPARATOR_STRING

    print 'Denormalizing optimized examples...'
    list_of_optimized_matrices = model_interpretation.denormalize_data(
        list_of_input_matrices=list_of_optimized_matrices,
        model_metadata_dict=model_metadata_dict)

    if init_function_name is None:
        print 'Denormalizing input examples...'
        list_of_init_matrices = model_interpretation.denormalize_data(
            list_of_input_matrices=list_of_init_matrices,
            model_metadata_dict=model_metadata_dict)

        this_init_arg = list_of_init_matrices
    else:
        this_init_arg = init_function_name + ''

    print 'Writing results to: "{0:s}"...'.format(output_file_name)
    backwards_opt.write_standard_file(
        pickle_file_name=output_file_name,
        list_of_optimized_matrices=list_of_optimized_matrices,
        model_file_name=model_file_name,
        init_function_name_or_matrices=this_init_arg,
        num_iterations=num_iterations,
        learning_rate=learning_rate,
        component_type_string=component_type_string,
        target_class=target_class,
        layer_name=layer_name,
        neuron_indices=neuron_indices,
        channel_index=channel_index,
        ideal_activation=ideal_activation,
        storm_ids=storm_ids,
        storm_times_unix_sec=storm_times_unix_sec)
def _run(cnn_file_name, upconvnet_file_name, top_example_dir_name,
         baseline_storm_metafile_name, trial_storm_metafile_name,
         num_baseline_examples, num_trial_examples, num_novel_examples,
         cnn_feature_layer_name, percent_variance_to_keep, output_file_name):
    """Runs novelty detection.

    This is effectively the main method.

    :param cnn_file_name: See documentation at top of file.
    :param upconvnet_file_name: Same.
    :param top_example_dir_name: Same.
    :param baseline_storm_metafile_name: Same.
    :param trial_storm_metafile_name: Same.
    :param num_baseline_examples: Same.
    :param num_trial_examples: Same.
    :param num_novel_examples: Same.
    :param cnn_feature_layer_name: Same.
    :param percent_variance_to_keep: Same.
    :param output_file_name: Same.
    :raises: ValueError: if dimensions of first CNN input matrix != dimensions
        of upconvnet output.
    """

    print('Reading trained CNN from: "{0:s}"...'.format(cnn_file_name))
    cnn_model_object = cnn.read_model(cnn_file_name)

    print('Reading trained upconvnet from: "{0:s}"...'.format(
        upconvnet_file_name))
    upconvnet_model_object = cnn.read_model(upconvnet_file_name)
    _check_dimensions(cnn_model_object=cnn_model_object,
                      upconvnet_model_object=upconvnet_model_object)

    print('Reading metadata for baseline examples from: "{0:s}"...'.format(
        baseline_storm_metafile_name))
    baseline_full_id_strings, baseline_times_unix_sec = (
        tracking_io.read_ids_and_times(baseline_storm_metafile_name))

    print('Reading metadata for trial examples from: "{0:s}"...'.format(
        trial_storm_metafile_name))
    trial_full_id_strings, trial_times_unix_sec = (
        tracking_io.read_ids_and_times(trial_storm_metafile_name))

    this_dict = _filter_examples(
        trial_full_id_strings=trial_full_id_strings,
        trial_times_unix_sec=trial_times_unix_sec,
        num_trial_examples=num_trial_examples,
        baseline_full_id_strings=baseline_full_id_strings,
        baseline_times_unix_sec=baseline_times_unix_sec,
        num_baseline_examples=num_baseline_examples,
        num_novel_examples=num_novel_examples)

    trial_full_id_strings = this_dict[TRIAL_STORM_IDS_KEY]
    trial_times_unix_sec = this_dict[TRIAL_STORM_TIMES_KEY]
    baseline_full_id_strings = this_dict[BASELINE_STORM_IDS_KEY]
    baseline_times_unix_sec = this_dict[BASELINE_STORM_TIMES_KEY]
    num_novel_examples = this_dict[NUM_NOVEL_EXAMPLES_KEY]

    cnn_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(cnn_file_name)[0])

    print('Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name))
    cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name)
    print(SEPARATOR_STRING)

    baseline_predictor_matrices = testing_io.read_predictors_specific_examples(
        top_example_dir_name=top_example_dir_name,
        desired_full_id_strings=baseline_full_id_strings,
        desired_times_unix_sec=baseline_times_unix_sec,
        option_dict=cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY],
        layer_operation_dicts=cnn_metadata_dict[cnn.LAYER_OPERATIONS_KEY])[
            testing_io.INPUT_MATRICES_KEY]

    print(SEPARATOR_STRING)

    trial_predictor_matrices = testing_io.read_predictors_specific_examples(
        top_example_dir_name=top_example_dir_name,
        desired_full_id_strings=trial_full_id_strings,
        desired_times_unix_sec=trial_times_unix_sec,
        option_dict=cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY],
        layer_operation_dicts=cnn_metadata_dict[cnn.LAYER_OPERATIONS_KEY])[
            testing_io.INPUT_MATRICES_KEY]

    print(SEPARATOR_STRING)

    novelty_dict = novelty_detection.do_novelty_detection(
        baseline_predictor_matrices=baseline_predictor_matrices,
        trial_predictor_matrices=trial_predictor_matrices,
        cnn_model_object=cnn_model_object,
        cnn_feature_layer_name=cnn_feature_layer_name,
        upconvnet_model_object=upconvnet_model_object,
        num_novel_examples=num_novel_examples,
        multipass=False,
        percent_variance_to_keep=percent_variance_to_keep)

    print(SEPARATOR_STRING)
    print('Denormalizing inputs and outputs of novelty detection...')

    cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY][
        trainval_io.SOUNDING_FIELDS_KEY] = None

    novelty_dict[novelty_detection.BASELINE_MATRIX_KEY] = (
        model_interpretation.denormalize_data(
            list_of_input_matrices=baseline_predictor_matrices[[0]],
            model_metadata_dict=cnn_metadata_dict))

    novelty_dict[novelty_detection.TRIAL_MATRIX_KEY] = (
        model_interpretation.denormalize_data(
            list_of_input_matrices=trial_predictor_matrices[[0]],
            model_metadata_dict=cnn_metadata_dict))

    novelty_dict[novelty_detection.UPCONV_MATRIX_KEY] = (
        model_interpretation.denormalize_data(
            list_of_input_matrices=[
                novelty_dict[novelty_detection.UPCONV_NORM_MATRIX_KEY]
            ],
            model_metadata_dict=cnn_metadata_dict))[0]
    novelty_dict.pop(novelty_detection.UPCONV_NORM_MATRIX_KEY)

    novelty_dict[novelty_detection.UPCONV_SVD_MATRIX_KEY] = (
        model_interpretation.denormalize_data(
            list_of_input_matrices=[
                novelty_dict[novelty_detection.UPCONV_NORM_SVD_MATRIX_KEY]
            ],
            model_metadata_dict=cnn_metadata_dict))[0]
    novelty_dict.pop(novelty_detection.UPCONV_NORM_SVD_MATRIX_KEY)

    novelty_dict = novelty_detection.add_metadata(
        novelty_dict=novelty_dict,
        baseline_full_id_strings=baseline_full_id_strings,
        baseline_times_unix_sec=baseline_times_unix_sec,
        trial_full_id_strings=trial_full_id_strings,
        trial_times_unix_sec=trial_times_unix_sec,
        cnn_file_name=cnn_file_name,
        upconvnet_file_name=upconvnet_file_name)

    print('Writing results to: "{0:s}"...'.format(output_file_name))
    novelty_detection.write_standard_file(novelty_dict=novelty_dict,
                                          pickle_file_name=output_file_name)
def _run(model_file_name, init_function_name, storm_metafile_name,
         num_examples, top_example_dir_name, component_type_string,
         target_class, layer_name, neuron_indices, channel_index,
         num_iterations, ideal_activation, learning_rate, l2_weight,
         radar_constraint_weight, minmax_constraint_weight, output_file_name):
    """Runs backwards optimization on a trained CNN.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param init_function_name: Same.
    :param storm_metafile_name: Same.
    :param num_examples: Same.
    :param top_example_dir_name: Same.
    :param component_type_string: Same.
    :param target_class: Same.
    :param layer_name: Same.
    :param neuron_indices: Same.
    :param channel_index: Same.
    :param num_iterations: Same.
    :param ideal_activation: Same.
    :param learning_rate: Same.
    :param l2_weight: Same.
    :param radar_constraint_weight: Same.
    :param minmax_constraint_weight: Same.
    :param output_file_name: Same.
    """

    if l2_weight <= 0:
        l2_weight = None
    if radar_constraint_weight <= 0:
        radar_constraint_weight = None
    if minmax_constraint_weight <= 0:
        minmax_constraint_weight = None
    if ideal_activation <= 0:
        ideal_activation = None
    if init_function_name in ['', 'None']:
        init_function_name = None

    model_interpretation.check_component_type(component_type_string)

    model_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0])

    print(
        'Reading model metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)

    input_matrices = None
    init_function = None
    full_storm_id_strings = None
    storm_times_unix_sec = None
    sounding_pressure_matrix_pa = None

    if init_function_name is None:
        print('Reading storm metadata from: "{0:s}"...'.format(
            storm_metafile_name))

        full_storm_id_strings, storm_times_unix_sec = (
            tracking_io.read_ids_and_times(storm_metafile_name))

        if 0 < num_examples < len(full_storm_id_strings):
            full_storm_id_strings = full_storm_id_strings[:num_examples]
            storm_times_unix_sec = storm_times_unix_sec[:num_examples]

        example_dict = testing_io.read_predictors_specific_examples(
            top_example_dir_name=top_example_dir_name,
            desired_full_id_strings=full_storm_id_strings,
            desired_times_unix_sec=storm_times_unix_sec,
            option_dict=model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY],
            layer_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY])
        print(SEPARATOR_STRING)

        input_matrices = example_dict[testing_io.INPUT_MATRICES_KEY]
        sounding_pressure_matrix_pa = example_dict[
            testing_io.SOUNDING_PRESSURES_KEY]
        num_examples = input_matrices[0].shape[0]
    else:
        num_examples = 1
        init_function = _create_initializer(
            init_function_name=init_function_name,
            model_metadata_dict=model_metadata_dict)

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    output_matrices = None
    initial_activations = numpy.full(num_examples, numpy.nan)
    final_activations = numpy.full(num_examples, numpy.nan)

    for i in range(num_examples):
        if init_function_name is None:
            this_init_arg = [a[[i], ...] for a in input_matrices]
        else:
            this_init_arg = init_function

        if component_type_string == CLASS_COMPONENT_TYPE_STRING:
            print((
                '\nOptimizing {0:d}th of {1:d} images for target class {2:d}...'
            ).format(i + 1, num_examples, target_class))

            this_result_dict = backwards_opt.optimize_input_for_class(
                model_object=model_object,
                target_class=target_class,
                init_function_or_matrices=this_init_arg,
                num_iterations=num_iterations,
                learning_rate=learning_rate,
                l2_weight=l2_weight,
                radar_constraint_weight=radar_constraint_weight,
                minmax_constraint_weight=minmax_constraint_weight,
                model_metadata_dict=model_metadata_dict)

        elif component_type_string == NEURON_COMPONENT_TYPE_STRING:
            print((
                '\nOptimizing {0:d}th of {1:d} images for neuron {2:s} in layer'
                ' "{3:s}"...').format(i + 1, num_examples, str(neuron_indices),
                                      layer_name))

            this_result_dict = backwards_opt.optimize_input_for_neuron(
                model_object=model_object,
                layer_name=layer_name,
                neuron_indices=neuron_indices,
                init_function_or_matrices=this_init_arg,
                num_iterations=num_iterations,
                learning_rate=learning_rate,
                l2_weight=l2_weight,
                ideal_activation=ideal_activation,
                radar_constraint_weight=radar_constraint_weight,
                minmax_constraint_weight=minmax_constraint_weight,
                model_metadata_dict=model_metadata_dict)

        else:
            print(('\nOptimizing {0:d}th of {1:d} images for channel {2:d} in '
                   'layer "{3:s}"...').format(i + 1, num_examples,
                                              channel_index, layer_name))

            this_result_dict = backwards_opt.optimize_input_for_channel(
                model_object=model_object,
                layer_name=layer_name,
                channel_index=channel_index,
                init_function_or_matrices=this_init_arg,
                stat_function_for_neuron_activations=K.max,
                num_iterations=num_iterations,
                learning_rate=learning_rate,
                l2_weight=l2_weight,
                ideal_activation=ideal_activation,
                radar_constraint_weight=radar_constraint_weight,
                minmax_constraint_weight=minmax_constraint_weight,
                model_metadata_dict=model_metadata_dict)

        initial_activations[i] = this_result_dict[
            backwards_opt.INITIAL_ACTIVATION_KEY]
        final_activations[i] = this_result_dict[
            backwards_opt.FINAL_ACTIVATION_KEY]
        these_output_matrices = this_result_dict[
            backwards_opt.NORM_OUTPUT_MATRICES_KEY]

        if output_matrices is None:
            output_matrices = [None] * len(these_output_matrices)

        for k in range(len(output_matrices)):
            if output_matrices[k] is None:
                output_matrices[k] = these_output_matrices[k] + 0.
            else:
                output_matrices[k] = numpy.concatenate(
                    (output_matrices[k], these_output_matrices[k]), axis=0)

        if init_function_name is None:
            continue

        these_input_matrices = this_result_dict[
            backwards_opt.NORM_INPUT_MATRICES_KEY]

        if input_matrices is None:
            input_matrices = [None] * len(these_input_matrices)

        for k in range(len(input_matrices)):
            if input_matrices[k] is None:
                input_matrices[k] = these_input_matrices[k] + 0.
            else:
                input_matrices[k] = numpy.concatenate(
                    (input_matrices[k], these_input_matrices[k]), axis=0)

    print(SEPARATOR_STRING)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    print('Denormalizing input examples...')
    input_matrices = trainval_io.separate_shear_and_reflectivity(
        list_of_input_matrices=input_matrices,
        training_option_dict=training_option_dict)

    input_matrices = model_interpretation.denormalize_data(
        list_of_input_matrices=input_matrices,
        model_metadata_dict=model_metadata_dict)

    print('Denormalizing optimized examples...')
    output_matrices = trainval_io.separate_shear_and_reflectivity(
        list_of_input_matrices=output_matrices,
        training_option_dict=training_option_dict)

    output_matrices = model_interpretation.denormalize_data(
        list_of_input_matrices=output_matrices,
        model_metadata_dict=model_metadata_dict)

    print('Writing results to: "{0:s}"...'.format(output_file_name))
    bwo_metadata_dict = backwards_opt.check_metadata(
        component_type_string=component_type_string,
        num_iterations=num_iterations,
        learning_rate=learning_rate,
        target_class=target_class,
        layer_name=layer_name,
        ideal_activation=ideal_activation,
        neuron_indices=neuron_indices,
        channel_index=channel_index,
        l2_weight=l2_weight,
        radar_constraint_weight=radar_constraint_weight,
        minmax_constraint_weight=minmax_constraint_weight)

    backwards_opt.write_standard_file(
        pickle_file_name=output_file_name,
        denorm_input_matrices=input_matrices,
        denorm_output_matrices=output_matrices,
        initial_activations=initial_activations,
        final_activations=final_activations,
        model_file_name=model_file_name,
        metadata_dict=bwo_metadata_dict,
        full_storm_id_strings=full_storm_id_strings,
        storm_times_unix_sec=storm_times_unix_sec,
        sounding_pressure_matrix_pa=sounding_pressure_matrix_pa)
예제 #14
0
def _run(cnn_file_name, upconvnet_file_name, top_example_dir_name,
         baseline_storm_metafile_name, trial_storm_metafile_name,
         num_baseline_examples, num_trial_examples, num_novel_examples,
         cnn_feature_layer_name, percent_svd_variance_to_keep,
         output_file_name):
    """Runs novelty detection.

    This is effectively the main method.

    :param cnn_file_name: See documentation at top of file.
    :param upconvnet_file_name: Same.
    :param top_example_dir_name: Same.
    :param baseline_storm_metafile_name: Same.
    :param trial_storm_metafile_name: Same.
    :param num_baseline_examples: Same.
    :param num_trial_examples: Same.
    :param num_novel_examples: Same.
    :param cnn_feature_layer_name: Same.
    :param percent_svd_variance_to_keep: Same.
    :param output_file_name: Same.
    :raises: ValueError: if dimensions of first CNN input matrix != dimensions
        of upconvnet output.
    """

    print('Reading trained CNN from: "{0:s}"...'.format(cnn_file_name))
    cnn_model_object = cnn.read_model(cnn_file_name)

    cnn_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(cnn_file_name)[0]
    )

    print('Reading trained upconvnet from: "{0:s}"...'.format(
        upconvnet_file_name))
    upconvnet_model_object = cnn.read_model(upconvnet_file_name)

    # ucn_output_dimensions = numpy.array(
    #     upconvnet_model_object.output.get_shape().as_list()[1:], dtype=int
    # )

    if isinstance(cnn_model_object.input, list):
        first_cnn_input_tensor = cnn_model_object.input[0]
    else:
        first_cnn_input_tensor = cnn_model_object.input

    cnn_input_dimensions = numpy.array(
        first_cnn_input_tensor.get_shape().as_list()[1:], dtype=int
    )

    # if not numpy.array_equal(cnn_input_dimensions, ucn_output_dimensions):
    #     error_string = (
    #         'Dimensions of first CNN input matrix ({0:s}) should equal '
    #         'dimensions of upconvnet output ({1:s}).'
    #     ).format(str(cnn_input_dimensions), str(ucn_output_dimensions))
    #
    #     raise ValueError(error_string)

    print('Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name))
    cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name)

    print('Reading metadata for baseline examples from: "{0:s}"...'.format(
        baseline_storm_metafile_name))
    baseline_full_id_strings, baseline_times_unix_sec = (
        tracking_io.read_ids_and_times(baseline_storm_metafile_name)
    )

    print('Reading metadata for trial examples from: "{0:s}"...'.format(
        trial_storm_metafile_name))
    trial_full_id_strings, trial_times_unix_sec = (
        tracking_io.read_ids_and_times(trial_storm_metafile_name)
    )

    if 0 < num_baseline_examples < len(baseline_full_id_strings):
        baseline_full_id_strings = baseline_full_id_strings[
            :num_baseline_examples]
        baseline_times_unix_sec = baseline_times_unix_sec[
            :num_baseline_examples]

    if 0 < num_trial_examples < len(trial_full_id_strings):
        trial_full_id_strings = trial_full_id_strings[:num_trial_examples]
        trial_times_unix_sec = trial_times_unix_sec[:num_trial_examples]

    num_trial_examples = len(trial_full_id_strings)

    if num_novel_examples <= 0:
        num_novel_examples = num_trial_examples + 0

    num_novel_examples = min([num_novel_examples, num_trial_examples])
    print('Number of novel examples to find: {0:d}'.format(num_novel_examples))

    bad_baseline_indices = tracking_utils.find_storm_objects(
        all_id_strings=baseline_full_id_strings,
        all_times_unix_sec=baseline_times_unix_sec,
        id_strings_to_keep=trial_full_id_strings,
        times_to_keep_unix_sec=trial_times_unix_sec, allow_missing=True)

    print('Removing {0:d} trial examples from baseline set...'.format(
        len(bad_baseline_indices)
    ))

    baseline_times_unix_sec = numpy.delete(
        baseline_times_unix_sec, bad_baseline_indices
    )
    baseline_full_id_strings = numpy.delete(
        numpy.array(baseline_full_id_strings), bad_baseline_indices
    )
    baseline_full_id_strings = baseline_full_id_strings.tolist()

    # num_baseline_examples = len(baseline_full_id_strings)

    print(SEPARATOR_STRING)

    list_of_baseline_input_matrices, _ = testing_io.read_specific_examples(
        top_example_dir_name=top_example_dir_name,
        desired_full_id_strings=baseline_full_id_strings,
        desired_times_unix_sec=baseline_times_unix_sec,
        option_dict=cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY],
        list_of_layer_operation_dicts=cnn_metadata_dict[
            cnn.LAYER_OPERATIONS_KEY]
    )

    print(SEPARATOR_STRING)

    list_of_trial_input_matrices, _ = testing_io.read_specific_examples(
        top_example_dir_name=top_example_dir_name,
        desired_full_id_strings=trial_full_id_strings,
        desired_times_unix_sec=trial_times_unix_sec,
        option_dict=cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY],
        list_of_layer_operation_dicts=cnn_metadata_dict[
            cnn.LAYER_OPERATIONS_KEY]
    )

    print(SEPARATOR_STRING)

    novelty_dict = novelty_detection.do_novelty_detection(
        list_of_baseline_input_matrices=list_of_baseline_input_matrices,
        list_of_trial_input_matrices=list_of_trial_input_matrices,
        cnn_model_object=cnn_model_object,
        cnn_feature_layer_name=cnn_feature_layer_name,
        upconvnet_model_object=upconvnet_model_object,
        num_novel_examples=num_novel_examples, multipass=False,
        percent_svd_variance_to_keep=percent_svd_variance_to_keep)

    print(SEPARATOR_STRING)

    print('Adding metadata to novelty-detection results...')
    novelty_dict = novelty_detection.add_metadata(
        novelty_dict=novelty_dict,
        baseline_full_id_strings=baseline_full_id_strings,
        baseline_storm_times_unix_sec=baseline_times_unix_sec,
        trial_full_id_strings=trial_full_id_strings,
        trial_storm_times_unix_sec=trial_times_unix_sec,
        cnn_file_name=cnn_file_name, upconvnet_file_name=upconvnet_file_name)

    print('Denormalizing inputs and outputs of novelty detection...')

    novelty_dict[novelty_detection.BASELINE_INPUTS_KEY] = (
        model_interpretation.denormalize_data(
            list_of_input_matrices=novelty_dict[
                novelty_detection.BASELINE_INPUTS_KEY
            ],
            model_metadata_dict=cnn_metadata_dict)
    )

    novelty_dict[novelty_detection.TRIAL_INPUTS_KEY] = (
        model_interpretation.denormalize_data(
            list_of_input_matrices=novelty_dict[
                novelty_detection.TRIAL_INPUTS_KEY
            ],
            model_metadata_dict=cnn_metadata_dict)
    )

    cnn_metadata_dict[
        cnn.TRAINING_OPTION_DICT_KEY][trainval_io.SOUNDING_FIELDS_KEY] = None

    novelty_dict[novelty_detection.NOVEL_IMAGES_UPCONV_KEY] = (
        model_interpretation.denormalize_data(
            list_of_input_matrices=[
                novelty_dict[novelty_detection.NOVEL_IMAGES_UPCONV_KEY]
            ],
            model_metadata_dict=cnn_metadata_dict)
    )[0]

    novelty_dict[novelty_detection.NOVEL_IMAGES_UPCONV_SVD_KEY] = (
        model_interpretation.denormalize_data(
            list_of_input_matrices=[
                novelty_dict[novelty_detection.NOVEL_IMAGES_UPCONV_SVD_KEY]
            ],
            model_metadata_dict=cnn_metadata_dict)
    )[0]

    print('Writing results to: "{0:s}"...'.format(output_file_name))
    novelty_detection.write_standard_file(novelty_dict=novelty_dict,
                                          pickle_file_name=output_file_name)
예제 #15
0
def _run(input_model_file_name, radar_field_name_by_channel,
         layer_op_name_by_channel, min_height_by_channel_m_agl,
         max_height_by_channel_m_agl, sounding_field_names,
         normalization_type_string, normalization_param_file_name,
         min_normalized_value, max_normalized_value, target_name,
         shuffle_target, downsampling_classes, downsampling_fractions,
         monitor_string, weight_loss_function, x_translations_pixels,
         y_translations_pixels, ccw_rotation_angles_deg,
         noise_standard_deviation, num_noisings, flip_in_x, flip_in_y,
         top_training_dir_name, first_training_time_string,
         last_training_time_string, num_examples_per_train_batch,
         top_validation_dir_name, first_validation_time_string,
         last_validation_time_string, num_examples_per_validn_batch,
         num_epochs, num_training_batches_per_epoch,
         num_validation_batches_per_epoch, output_dir_name):
    """Trains CNN with 2-D GridRad images.

    This is effectively the main method.

    :param input_model_file_name: See documentation at top of file.
    :param radar_field_name_by_channel: Same.
    :param layer_op_name_by_channel: Same.
    :param min_height_by_channel_m_agl: Same.
    :param max_height_by_channel_m_agl: Same.
    :param sounding_field_names: Same.
    :param normalization_type_string: Same.
    :param normalization_param_file_name: Same.
    :param min_normalized_value: Same.
    :param max_normalized_value: Same.
    :param target_name: Same.
    :param shuffle_target: Same.
    :param downsampling_classes: Same.
    :param downsampling_fractions: Same.
    :param monitor_string: Same.
    :param weight_loss_function: Same.
    :param x_translations_pixels: Same.
    :param y_translations_pixels: Same.
    :param ccw_rotation_angles_deg: Same.
    :param noise_standard_deviation: Same.
    :param num_noisings: Same.
    :param flip_in_x: Same.
    :param flip_in_y: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param num_examples_per_train_batch: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_validn_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_dir_name: Same.
    """

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    if top_validation_dir_name in ['', 'None']:
        top_validation_dir_name = None
        num_validation_batches_per_epoch = 0
        first_validation_time_unix_sec = 0
        last_validation_time_unix_sec = 0
    else:
        first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
            first_validation_time_string, TIME_FORMAT)
        last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
            last_validation_time_string, TIME_FORMAT)

    if sounding_field_names[0] in ['', 'None']:
        sounding_field_names = None

    if len(downsampling_classes) > 1:
        class_to_sampling_fraction_dict = dict(
            list(zip(downsampling_classes, downsampling_fractions)))
    else:
        class_to_sampling_fraction_dict = None

    if (len(x_translations_pixels) == 1
            and x_translations_pixels + y_translations_pixels == 0):
        x_translations_pixels = None
        y_translations_pixels = None

    if len(ccw_rotation_angles_deg) == 1 and ccw_rotation_angles_deg[0] == 0:
        ccw_rotation_angles_deg = None

    if num_noisings <= 0:
        num_noisings = 0
        noise_standard_deviation = None

    num_channels = len(radar_field_name_by_channel)
    expected_dimensions = numpy.array([num_channels], dtype=int)

    error_checking.assert_is_numpy_array(numpy.array(layer_op_name_by_channel),
                                         exact_dimensions=expected_dimensions)

    error_checking.assert_is_numpy_array(min_height_by_channel_m_agl,
                                         exact_dimensions=expected_dimensions)
    error_checking.assert_is_numpy_array(max_height_by_channel_m_agl,
                                         exact_dimensions=expected_dimensions)

    list_of_layer_operation_dicts = [{}] * num_channels
    for m in range(num_channels):
        list_of_layer_operation_dicts[m] = {
            input_examples.RADAR_FIELD_KEY: radar_field_name_by_channel[m],
            input_examples.OPERATION_NAME_KEY: layer_op_name_by_channel[m],
            input_examples.MIN_HEIGHT_KEY: min_height_by_channel_m_agl[m],
            input_examples.MAX_HEIGHT_KEY: max_height_by_channel_m_agl[m]
        }

    # Set output locations.
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    output_model_file_name = '{0:s}/model.h5'.format(output_dir_name)
    history_file_name = '{0:s}/model_history.csv'.format(output_dir_name)
    tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name)

    # Find training and validation files.
    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    if top_validation_dir_name is None:
        validation_file_names = []
    else:
        validation_file_names = input_examples.find_many_example_files(
            top_directory_name=top_validation_dir_name,
            shuffled=True,
            first_batch_number=FIRST_BATCH_NUMBER,
            last_batch_number=LAST_BATCH_NUMBER,
            raise_error_if_any_missing=False)

    # Read architecture.
    print(
        'Reading architecture from: "{0:s}"...'.format(input_model_file_name))
    model_object = cnn.read_model(input_model_file_name)
    # model_object = clone_model(model_object)

    # TODO(thunderhoser): This is a HACK.
    model_object.compile(loss=keras.losses.binary_crossentropy,
                         optimizer=keras.optimizers.Adam(),
                         metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST)

    print(SEPARATOR_STRING)
    model_object.summary()
    print(SEPARATOR_STRING)

    print(K.eval(model_object.get_layer(name='radar_conv2d_2').weights[0]))
    print(SEPARATOR_STRING)

    # Write metadata.
    metadata_dict = {
        cnn.NUM_EPOCHS_KEY: num_epochs,
        cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch,
        cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch,
        cnn.MONITOR_STRING_KEY: monitor_string,
        cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function,
        cnn.CONV_2D3D_KEY: False,
        cnn.VALIDATION_FILES_KEY: validation_file_names,
        cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec,
        cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec,
        cnn.NUM_EX_PER_VALIDN_BATCH_KEY: num_examples_per_validn_batch
    }

    input_tensor = model_object.input
    if isinstance(input_tensor, list):
        input_tensor = input_tensor[0]

    num_grid_rows = input_tensor.get_shape().as_list()[1]
    num_grid_columns = input_tensor.get_shape().as_list()[2]

    training_option_dict = {
        trainval_io.EXAMPLE_FILES_KEY: training_file_names,
        trainval_io.TARGET_NAME_KEY: target_name,
        trainval_io.SHUFFLE_TARGET_KEY: shuffle_target,
        trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec,
        trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec,
        trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_train_batch,
        trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names,
        trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL,
        trainval_io.NUM_ROWS_KEY: num_grid_rows,
        trainval_io.NUM_COLUMNS_KEY: num_grid_columns,
        trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string,
        trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name,
        trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value,
        trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value,
        trainval_io.BINARIZE_TARGET_KEY: False,
        trainval_io.SAMPLING_FRACTIONS_KEY: class_to_sampling_fraction_dict,
        trainval_io.LOOP_ONCE_KEY: False,
        trainval_io.X_TRANSLATIONS_KEY: x_translations_pixels,
        trainval_io.Y_TRANSLATIONS_KEY: y_translations_pixels,
        trainval_io.ROTATION_ANGLES_KEY: ccw_rotation_angles_deg,
        trainval_io.NOISE_STDEV_KEY: noise_standard_deviation,
        trainval_io.NUM_NOISINGS_KEY: num_noisings,
        trainval_io.FLIP_X_KEY: flip_in_x,
        trainval_io.FLIP_Y_KEY: flip_in_y
    }

    print('Writing metadata to: "{0:s}"...'.format(model_metafile_name))
    cnn.write_model_metadata(
        pickle_file_name=model_metafile_name,
        metadata_dict=metadata_dict,
        training_option_dict=training_option_dict,
        list_of_layer_operation_dicts=list_of_layer_operation_dicts)

    cnn.train_cnn_gridrad_2d_reduced(
        model_object=model_object,
        model_file_name=output_model_file_name,
        history_file_name=history_file_name,
        tensorboard_dir_name=tensorboard_dir_name,
        num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_option_dict=training_option_dict,
        list_of_layer_operation_dicts=list_of_layer_operation_dicts,
        monitor_string=monitor_string,
        weight_loss_function=weight_loss_function,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_file_names=validation_file_names,
        first_validn_time_unix_sec=first_validation_time_unix_sec,
        last_validn_time_unix_sec=last_validation_time_unix_sec,
        num_examples_per_validn_batch=num_examples_per_validn_batch)
def _run(input_model_file_name, radar_field_names, sounding_field_names,
         normalization_type_string, normalization_param_file_name,
         min_normalized_value, max_normalized_value, downsampling_keys,
         downsampling_fractions, monitor_string, weight_loss_function,
         refl_masking_threshold_dbz, x_translations_pixels,
         y_translations_pixels, ccw_rotation_angles_deg,
         noise_standard_deviation, num_noisings, flip_in_x, flip_in_y,
         top_training_dir_name, first_training_time_string,
         last_training_time_string, top_validation_dir_name,
         first_validation_time_string, last_validation_time_string,
         num_examples_per_batch, num_epochs, num_training_batches_per_epoch,
         num_validation_batches_per_epoch, output_dir_name):
    """Trains CNN with native (3-D) GridRad images.

    This is effectively the main method.

    :param input_model_file_name: See documentation at top of file.
    :param radar_field_names: Same.
    :param sounding_field_names: Same.
    :param normalization_type_string: Same.
    :param normalization_param_file_name: Same.
    :param min_normalized_value: Same.
    :param max_normalized_value: Same.
    :param downsampling_keys: Same.
    :param downsampling_fractions: Same.
    :param monitor_string: Same.
    :param weight_loss_function: Same.
    :param refl_masking_threshold_dbz: Same.
    :param x_translations_pixels: Same.
    :param y_translations_pixels: Same.
    :param ccw_rotation_angles_deg: Same.
    :param noise_standard_deviation: Same.
    :param num_noisings: Same.
    :param flip_in_x: Same.
    :param flip_in_y: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_dir_name: Same.
    """

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    if sounding_field_names[0] in ['', 'None']:
        sounding_field_names = None

    if len(downsampling_keys) > 1:
        class_to_sampling_fraction_dict = dict(
            zip(downsampling_keys, downsampling_fractions))
    else:
        class_to_sampling_fraction_dict = None

    if (len(x_translations_pixels) == 1
            and x_translations_pixels + y_translations_pixels == 0):
        x_translations_pixels = None
        y_translations_pixels = None

    if len(ccw_rotation_angles_deg) == 1 and ccw_rotation_angles_deg[0] == 0:
        ccw_rotation_angles_deg = None

    if num_noisings <= 0:
        num_noisings = 0
        noise_standard_deviation = None

    # Set output locations.
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    output_model_file_name = '{0:s}/model.h5'.format(output_dir_name)
    history_file_name = '{0:s}/model_history.csv'.format(output_dir_name)
    tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name)

    # Find training and validation files.
    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    validation_file_names = input_examples.find_many_example_files(
        top_directory_name=top_validation_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    # Read architecture.
    print 'Reading architecture from: "{0:s}"...'.format(input_model_file_name)
    model_object = cnn.read_model(input_model_file_name)
    model_object = keras.models.clone_model(model_object)

    # TODO(thunderhoser): This is a HACK.
    model_object.compile(loss=keras.losses.binary_crossentropy,
                         optimizer=keras.optimizers.Adam(),
                         metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST)

    print SEPARATOR_STRING
    model_object.summary()
    print SEPARATOR_STRING

    # Write metadata.
    this_example_dict = input_examples.read_example_file(
        netcdf_file_name=training_file_names[0], metadata_only=True)
    target_name = this_example_dict[input_examples.TARGET_NAME_KEY]

    metadata_dict = {
        cnn.TARGET_NAME_KEY: target_name,
        cnn.NUM_EPOCHS_KEY: num_epochs,
        cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch,
        cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch,
        cnn.MONITOR_STRING_KEY: monitor_string,
        cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function,
        cnn.USE_2D3D_CONVOLUTION_KEY: False,
        cnn.VALIDATION_FILES_KEY: validation_file_names,
        cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec,
        cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec
    }

    input_tensor = model_object.input
    if isinstance(input_tensor, list):
        input_tensor = input_tensor[0]

    num_grid_rows = input_tensor.get_shape().as_list()[1]
    num_grid_columns = input_tensor.get_shape().as_list()[2]

    training_option_dict = {
        trainval_io.EXAMPLE_FILES_KEY: training_file_names,
        trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec,
        trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec,
        trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_batch,
        trainval_io.RADAR_FIELDS_KEY: radar_field_names,
        trainval_io.RADAR_HEIGHTS_KEY: RADAR_HEIGHTS_M_AGL,
        trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names,
        trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL,
        trainval_io.NUM_ROWS_KEY: num_grid_rows,
        trainval_io.NUM_COLUMNS_KEY: num_grid_columns,
        trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string,
        trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name,
        trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value,
        trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value,
        trainval_io.BINARIZE_TARGET_KEY: False,
        trainval_io.SAMPLING_FRACTIONS_KEY: class_to_sampling_fraction_dict,
        trainval_io.LOOP_ONCE_KEY: False,
        trainval_io.REFLECTIVITY_MASK_KEY: refl_masking_threshold_dbz,
        trainval_io.X_TRANSLATIONS_KEY: x_translations_pixels,
        trainval_io.Y_TRANSLATIONS_KEY: y_translations_pixels,
        trainval_io.ROTATION_ANGLES_KEY: ccw_rotation_angles_deg,
        trainval_io.NOISE_STDEV_KEY: noise_standard_deviation,
        trainval_io.NUM_NOISINGS_KEY: num_noisings,
        trainval_io.FLIP_X_KEY: flip_in_x,
        trainval_io.FLIP_Y_KEY: flip_in_y
    }

    print 'Writing metadata to: "{0:s}"...'.format(model_metafile_name)
    cnn.write_model_metadata(pickle_file_name=model_metafile_name,
                             metadata_dict=metadata_dict,
                             training_option_dict=training_option_dict)

    cnn.train_cnn_2d_or_3d(
        model_object=model_object,
        model_file_name=output_model_file_name,
        history_file_name=history_file_name,
        tensorboard_dir_name=tensorboard_dir_name,
        num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_option_dict=training_option_dict,
        monitor_string=monitor_string,
        weight_loss_function=weight_loss_function,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_file_names=validation_file_names,
        first_validn_time_unix_sec=first_validation_time_unix_sec,
        last_validn_time_unix_sec=last_validation_time_unix_sec)
예제 #17
0
def _run(model_file_name, top_example_dir_name, storm_metafile_name,
         output_dir_name):
    """Uses trained CNN to make predictions for specific examples.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param storm_metafile_name: Same.
    :param output_dir_name: Same.
    :raises: ValueError: if the model does multi-class classification.
    """

    print('Reading CNN from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    num_output_neurons = (
        model_object.layers[-1].output.get_shape().as_list()[-1]
    )

    if num_output_neurons > 2:
        error_string = (
            'The model has {0:d} output neurons, which suggests {0:d}-class '
            'classification.  This script handles only binary classification.'
        ).format(num_output_neurons)

        raise ValueError(error_string)

    soundings_only = False

    if isinstance(model_object.input, list):
        list_of_input_tensors = model_object.input
    else:
        list_of_input_tensors = [model_object.input]

    if len(list_of_input_tensors) == 1:
        these_spatial_dim = numpy.array(
            list_of_input_tensors[0].get_shape().as_list()[1:-1], dtype=int
        )
        soundings_only = len(these_spatial_dim) == 1

    cnn_metafile_name = cnn.find_metafile(
        model_file_name=model_file_name, raise_error_if_missing=True
    )
    print('Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name))
    cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name)

    print('Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name))
    desired_full_id_strings, desired_times_unix_sec = (
        tracking_io.read_ids_and_times(storm_metafile_name)
    )

    unique_spc_date_strings = list(set([
        time_conversion.time_to_spc_date_string(t)
        for t in desired_times_unix_sec
    ]))

    example_file_names = [
        input_examples.find_example_file(
            top_directory_name=top_example_dir_name, shuffled=False,
            spc_date_string=d, raise_error_if_missing=True
        ) for d in unique_spc_date_strings
    ]

    first_spc_date_string = time_conversion.time_to_spc_date_string(
        numpy.min(desired_times_unix_sec)
    )
    last_spc_date_string = time_conversion.time_to_spc_date_string(
        numpy.max(desired_times_unix_sec)
    )

    training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]
    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names
    training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = (
        time_conversion.get_start_of_spc_date(first_spc_date_string)
    )
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = (
        time_conversion.get_end_of_spc_date(last_spc_date_string)
    )
    training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = (
        NUM_EXAMPLES_PER_BATCH
    )

    if soundings_only:
        generator_object = testing_io.sounding_generator(
            option_dict=training_option_dict,
            desired_full_id_strings=desired_full_id_strings,
            desired_times_unix_sec=desired_times_unix_sec)

    elif cnn_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            desired_full_id_strings=desired_full_id_strings,
            desired_times_unix_sec=desired_times_unix_sec,
            list_of_operation_dicts=cnn_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY]
        )

    elif cnn_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict,
            desired_full_id_strings=desired_full_id_strings,
            desired_times_unix_sec=desired_times_unix_sec)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict,
            desired_full_id_strings=desired_full_id_strings,
            desired_times_unix_sec=desired_times_unix_sec)

    include_soundings = (
        training_option_dict[trainval_io.SOUNDING_FIELDS_KEY] is not None
    )

    full_storm_id_strings = []
    storm_times_unix_sec = numpy.array([], dtype=int)
    observed_labels = numpy.array([], dtype=int)
    class_probability_matrix = None

    while True:
        try:
            this_storm_object_dict = next(generator_object)
            print(SEPARATOR_STRING)
        except StopIteration:
            break

        full_storm_id_strings += this_storm_object_dict[testing_io.FULL_IDS_KEY]
        storm_times_unix_sec = numpy.concatenate((
            storm_times_unix_sec,
            this_storm_object_dict[testing_io.STORM_TIMES_KEY]
        ))
        observed_labels = numpy.concatenate((
            observed_labels, this_storm_object_dict[testing_io.TARGET_ARRAY_KEY]
        ))

        if soundings_only:
            these_predictor_matrices = [
                this_storm_object_dict[testing_io.SOUNDING_MATRIX_KEY]
            ]
        else:
            these_predictor_matrices = this_storm_object_dict[
                testing_io.INPUT_MATRICES_KEY]

        if include_soundings:
            this_sounding_matrix = these_predictor_matrices[-1]
        else:
            this_sounding_matrix = None

        if soundings_only:
            this_probability_matrix = cnn.apply_cnn_soundings_only(
                model_object=model_object, sounding_matrix=this_sounding_matrix,
                verbose=True)
        elif cnn_metadata_dict[cnn.CONV_2D3D_KEY]:
            if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]:
                this_probability_matrix = cnn.apply_2d_or_3d_cnn(
                    model_object=model_object,
                    radar_image_matrix=these_predictor_matrices[0],
                    sounding_matrix=this_sounding_matrix, verbose=True)
            else:
                this_probability_matrix = cnn.apply_2d3d_cnn(
                    model_object=model_object,
                    reflectivity_matrix_dbz=these_predictor_matrices[0],
                    azimuthal_shear_matrix_s01=these_predictor_matrices[1],
                    sounding_matrix=this_sounding_matrix, verbose=True)
        else:
            this_probability_matrix = cnn.apply_2d_or_3d_cnn(
                model_object=model_object,
                radar_image_matrix=these_predictor_matrices[0],
                sounding_matrix=this_sounding_matrix, verbose=True)

        print(SEPARATOR_STRING)

        if class_probability_matrix is None:
            class_probability_matrix = this_probability_matrix + 0.
        else:
            class_probability_matrix = numpy.concatenate(
                (class_probability_matrix, this_probability_matrix), axis=0
            )

    output_file_name = prediction_io.find_ungridded_file(
        directory_name=output_dir_name, raise_error_if_missing=False)

    print('Writing results to: "{0:s}"...'.format(output_file_name))

    prediction_io.write_ungridded_predictions(
        netcdf_file_name=output_file_name,
        class_probability_matrix=class_probability_matrix,
        observed_labels=observed_labels, storm_ids=full_storm_id_strings,
        storm_times_unix_sec=storm_times_unix_sec,
        target_name=training_option_dict[trainval_io.TARGET_NAME_KEY],
        model_file_name=model_file_name
    )
def _run(upconvnet_file_name, storm_metafile_name, num_examples,
         top_example_dir_name, top_output_dir_name):
    """Plots upconvnet reconstructions of many examples (storm objects).

    This is effectively the main method.

    :param upconvnet_file_name: See documentation at top of file.
    :param storm_metafile_name: Same.
    :param num_examples: Same.
    :param top_example_dir_name: Same.
    :param top_output_dir_name: Same.
    """

    print 'Reading trained upconvnet from: "{0:s}"...'.format(
        upconvnet_file_name)
    upconvnet_model_object = cnn.read_model(upconvnet_file_name)
    upconvnet_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(upconvnet_file_name)[0]
    )

    print 'Reading upconvnet metadata from: "{0:s}"...'.format(
        upconvnet_metafile_name)
    upconvnet_metadata_dict = upconvnet.read_model_metadata(
        upconvnet_metafile_name)
    cnn_file_name = upconvnet_metadata_dict[upconvnet.CNN_FILE_KEY]

    print 'Reading trained CNN from: "{0:s}"...'.format(cnn_file_name)
    cnn_model_object = cnn.read_model(cnn_file_name)
    cnn_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(cnn_file_name)[0]
    )

    print 'Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name)
    cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name)
    training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    print 'Reading storm IDs and times from: "{0:s}"...'.format(
        storm_metafile_name)
    storm_ids, storm_times_unix_sec = tracking_io.read_ids_and_times(
        storm_metafile_name)

    if 0 < num_examples < len(storm_ids):
        storm_ids = storm_ids[:num_examples]
        storm_times_unix_sec = storm_times_unix_sec[:num_examples]

    print SEPARATOR_STRING
    list_of_predictor_matrices = testing_io.read_specific_examples(
        desired_storm_ids=storm_ids,
        desired_times_unix_sec=storm_times_unix_sec,
        option_dict=training_option_dict,
        top_example_dir_name=top_example_dir_name,
        list_of_layer_operation_dicts=cnn_metadata_dict[
            cnn.LAYER_OPERATIONS_KEY]
    )[0]
    print SEPARATOR_STRING

    actual_radar_matrix = list_of_predictor_matrices[0]
    have_soundings = training_option_dict[trainval_io.SOUNDING_FIELDS_KEY]

    if have_soundings:
        sounding_matrix = list_of_predictor_matrices[-1]
    else:
        sounding_matrix = None

    feature_matrix = cnn.apply_2d_or_3d_cnn(
        model_object=cnn_model_object, radar_image_matrix=actual_radar_matrix,
        sounding_matrix=sounding_matrix, verbose=True, return_features=True,
        feature_layer_name=upconvnet_metadata_dict[
            upconvnet.CNN_FEATURE_LAYER_KEY]
    )
    print '\n'

    reconstructed_radar_matrix = upconvnet.apply_upconvnet(
        model_object=upconvnet_model_object, feature_matrix=feature_matrix,
        verbose=True)
    print '\n'

    print 'Denormalizing actual and reconstructed radar images...'

    cnn_metadata_dict[
        cnn.TRAINING_OPTION_DICT_KEY][trainval_io.SOUNDING_FIELDS_KEY] = None

    actual_radar_matrix = model_interpretation.denormalize_data(
        list_of_input_matrices=[actual_radar_matrix],
        model_metadata_dict=cnn_metadata_dict
    )[0]

    reconstructed_radar_matrix = model_interpretation.denormalize_data(
        list_of_input_matrices=[reconstructed_radar_matrix],
        model_metadata_dict=cnn_metadata_dict
    )[0]

    print SEPARATOR_STRING

    actual_output_dir_name = '{0:s}/actual_images'.format(top_output_dir_name)
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=actual_output_dir_name)

    # TODO(thunderhoser): Calling a method in another script is hacky.  If this
    # method is going to be reused, should be in a module.
    plot_input_examples.plot_examples(
        list_of_predictor_matrices=[actual_radar_matrix], storm_ids=storm_ids,
        storm_times_unix_sec=storm_times_unix_sec,
        model_metadata_dict=cnn_metadata_dict,
        output_dir_name=actual_output_dir_name)
    print SEPARATOR_STRING

    reconstructed_output_dir_name = '{0:s}/reconstructed_images'.format(
        top_output_dir_name)
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=reconstructed_output_dir_name)

    plot_input_examples.plot_examples(
        list_of_predictor_matrices=[reconstructed_radar_matrix],
        storm_ids=storm_ids, storm_times_unix_sec=storm_times_unix_sec,
        model_metadata_dict=cnn_metadata_dict,
        output_dir_name=reconstructed_output_dir_name)
예제 #19
0
def _run(input_model_file_name, sounding_field_names, normalization_type_string,
         normalization_param_file_name, min_normalized_value,
         max_normalized_value, target_name, downsampling_classes,
         downsampling_fractions, monitor_string, weight_loss_function,
         top_training_dir_name, first_training_time_string,
         last_training_time_string, num_examples_per_train_batch,
         top_validation_dir_name, first_validation_time_string,
         last_validation_time_string, num_examples_per_validn_batch, num_epochs,
         num_training_batches_per_epoch, num_validation_batches_per_epoch,
         output_dir_name):
    """Trains CNN with soundings only.

    This is effectively the main method.

    :param input_model_file_name: See documentation at top of file.
    :param sounding_field_names: Same.
    :param normalization_type_string: Same.
    :param normalization_param_file_name: Same.
    :param min_normalized_value: Same.
    :param max_normalized_value: Same.
    :param target_name: Same.
    :param downsampling_classes: Same.
    :param downsampling_fractions: Same.
    :param monitor_string: Same.
    :param weight_loss_function: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param num_examples_per_train_batch: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_validn_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_dir_name: Same.
    """

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    if len(downsampling_classes) > 1:
        downsampling_dict = dict(list(zip(
            downsampling_classes, downsampling_fractions
        )))
    else:
        downsampling_dict = None

    # Set output locations.
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    output_model_file_name = '{0:s}/model.h5'.format(output_dir_name)
    history_file_name = '{0:s}/model_history.csv'.format(output_dir_name)
    tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name)

    # Find training and validation files.
    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name, shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False)

    validation_file_names = input_examples.find_many_example_files(
        top_directory_name=top_validation_dir_name, shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False)

    # Read architecture.
    print('Reading architecture from: "{0:s}"...'.format(input_model_file_name))
    model_object = cnn.read_model(input_model_file_name)
    # model_object = keras.models.clone_model(model_object)

    # TODO(thunderhoser): This is a HACK.
    model_object.compile(
        loss=keras.losses.binary_crossentropy,
        optimizer=keras.optimizers.Adam(),
        metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST)

    print(SEPARATOR_STRING)
    model_object.summary()
    print(SEPARATOR_STRING)

    # Write metadata.
    metadata_dict = {
        cnn.NUM_EPOCHS_KEY: num_epochs,
        cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch,
        cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch,
        cnn.MONITOR_STRING_KEY: monitor_string,
        cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function,
        cnn.CONV_2D3D_KEY: False,
        cnn.VALIDATION_FILES_KEY: validation_file_names,
        cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec,
        cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec,
        cnn.NUM_EX_PER_VALIDN_BATCH_KEY: num_examples_per_validn_batch
    }

    training_option_dict = {
        trainval_io.EXAMPLE_FILES_KEY: training_file_names,
        trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_train_batch,
        trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec,
        trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec,
        trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names,
        trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL,
        trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string,
        trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name,
        trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value,
        trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value,
        trainval_io.TARGET_NAME_KEY: target_name,
        trainval_io.BINARIZE_TARGET_KEY: False,
        trainval_io.SAMPLING_FRACTIONS_KEY: downsampling_dict,
        trainval_io.LOOP_ONCE_KEY: False
    }

    print('Writing metadata to: "{0:s}"...'.format(model_metafile_name))
    cnn.write_model_metadata(
        pickle_file_name=model_metafile_name, metadata_dict=metadata_dict,
        training_option_dict=training_option_dict)

    cnn.train_cnn_with_soundings(
        model_object=model_object, model_file_name=output_model_file_name,
        history_file_name=history_file_name,
        tensorboard_dir_name=tensorboard_dir_name, num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_option_dict=training_option_dict,
        monitor_string=monitor_string,
        weight_loss_function=weight_loss_function,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_file_names=validation_file_names,
        first_validn_time_unix_sec=first_validation_time_unix_sec,
        last_validn_time_unix_sec=last_validation_time_unix_sec,
        num_examples_per_validn_batch=num_examples_per_validn_batch)
예제 #20
0
def _run(model_file_name, target_class, target_layer_name,
         top_example_dir_name, storm_metafile_name, num_examples,
         randomize_weights, cascading_random, output_file_name):
    """Runs Grad-CAM (gradient-weighted class-activation maps).

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param target_class: Same.
    :param target_layer_name: Same.
    :param top_example_dir_name: Same.
    :param storm_metafile_name: Same.
    :param num_examples: Same.
    :param randomize_weights: Same.
    :param cascading_random: Same.
    :param output_file_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    # Read model and metadata.
    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    model_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0])

    print(
        'Reading model metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]
    training_option_dict[trainval_io.REFLECTIVITY_MASK_KEY] = None

    output_dir_name, pathless_output_file_name = os.path.split(
        output_file_name)
    extensionless_output_file_name, output_file_extension = os.path.splitext(
        pathless_output_file_name)

    if randomize_weights:
        conv_dense_layer_names = _find_conv_and_dense_layers(model_object)
        conv_dense_layer_names.reverse()
        num_sets = len(conv_dense_layer_names)
    else:
        conv_dense_layer_names = []
        num_sets = 1

    print(
        'Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name))
    full_storm_id_strings, storm_times_unix_sec = (
        tracking_io.read_ids_and_times(storm_metafile_name))

    print(SEPARATOR_STRING)

    if 0 < num_examples < len(full_storm_id_strings):
        full_storm_id_strings = full_storm_id_strings[:num_examples]
        storm_times_unix_sec = storm_times_unix_sec[:num_examples]

    example_dict = testing_io.read_predictors_specific_examples(
        top_example_dir_name=top_example_dir_name,
        desired_full_id_strings=full_storm_id_strings,
        desired_times_unix_sec=storm_times_unix_sec,
        option_dict=training_option_dict,
        layer_operation_dicts=model_metadata_dict[cnn.LAYER_OPERATIONS_KEY])
    print(SEPARATOR_STRING)

    predictor_matrices = example_dict[testing_io.INPUT_MATRICES_KEY]
    sounding_pressure_matrix_pa = (
        example_dict[testing_io.SOUNDING_PRESSURES_KEY])

    print('Denormalizing model inputs...')
    denorm_predictor_matrices = trainval_io.separate_shear_and_reflectivity(
        list_of_input_matrices=copy.deepcopy(predictor_matrices),
        training_option_dict=training_option_dict)
    denorm_predictor_matrices = model_interpretation.denormalize_data(
        list_of_input_matrices=denorm_predictor_matrices,
        model_metadata_dict=model_metadata_dict)
    print(SEPARATOR_STRING)

    for k in range(num_sets):
        if randomize_weights:
            if cascading_random:
                _reset_weights_in_layer(model_object=model_object,
                                        layer_name=conv_dense_layer_names[k])

                this_model_object = model_object

                this_output_file_name = (
                    '{0:s}/{1:s}_cascading-random_{2:s}{3:s}').format(
                        output_dir_name, extensionless_output_file_name,
                        conv_dense_layer_names[k].replace('_', '-'),
                        output_file_extension)
            else:
                this_model_object = keras.models.Model.from_config(
                    model_object.get_config())
                this_model_object.set_weights(model_object.get_weights())

                _reset_weights_in_layer(model_object=this_model_object,
                                        layer_name=conv_dense_layer_names[k])

                this_output_file_name = '{0:s}/{1:s}_random_{2:s}{3:s}'.format(
                    output_dir_name, extensionless_output_file_name,
                    conv_dense_layer_names[k].replace('_', '-'),
                    output_file_extension)
        else:
            this_model_object = model_object
            this_output_file_name = output_file_name

        # print(K.eval(this_model_object.get_layer(name='dense_53').weights[0]))

        these_cam_matrices, these_guided_cam_matrices = (
            _run_gradcam_one_weight_set(
                model_object=this_model_object,
                target_class=target_class,
                target_layer_name=target_layer_name,
                predictor_matrices=predictor_matrices,
                training_option_dict=training_option_dict))

        print('Writing results to file: "{0:s}"...'.format(
            this_output_file_name))
        gradcam.write_standard_file(
            pickle_file_name=this_output_file_name,
            denorm_predictor_matrices=denorm_predictor_matrices,
            cam_matrices=these_cam_matrices,
            guided_cam_matrices=these_guided_cam_matrices,
            full_storm_id_strings=full_storm_id_strings,
            storm_times_unix_sec=storm_times_unix_sec,
            model_file_name=model_file_name,
            target_class=target_class,
            target_layer_name=target_layer_name,
            sounding_pressure_matrix_pa=sounding_pressure_matrix_pa)

        print(SEPARATOR_STRING)
예제 #21
0
def _run(model_file_name, top_example_dir_name, first_spc_date_string,
         last_spc_date_string, num_examples, class_fraction_keys,
         class_fraction_values, num_bootstrap_iters,
         bootstrap_confidence_level, output_file_name):
    """Runs permutation test for predictor importance.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param num_examples: Same.
    :param class_fraction_keys: Same.
    :param class_fraction_values: Same.
    :param num_bootstrap_iters: Same.
    :param bootstrap_confidence_level: Same.
    :param output_file_name: Same.
    """

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    model_directory_name, _ = os.path.split(model_file_name)
    metadata_file_name = '{0:s}/model_metadata.p'.format(model_directory_name)

    print('Reading metadata from: "{0:s}"...'.format(metadata_file_name))
    model_metadata_dict = cnn.read_model_metadata(metadata_file_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    if len(class_fraction_keys) > 1:
        class_to_sampling_fraction_dict = dict(
            list(zip(class_fraction_keys, class_fraction_values)))
    else:
        class_to_sampling_fraction_dict = None

    training_option_dict[
        trainval_io.SAMPLING_FRACTIONS_KEY] = class_to_sampling_fraction_dict

    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names
    training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = (
        time_conversion.get_start_of_spc_date(first_spc_date_string))
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = (
        time_conversion.get_end_of_spc_date(last_spc_date_string))

    if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            list_of_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY],
            num_examples_total=num_examples)

    elif model_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict, num_examples_total=num_examples)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict, num_examples_total=num_examples)

    full_id_strings = []
    storm_times_unix_sec = numpy.array([], dtype=int)
    target_values = numpy.array([], dtype=int)
    list_of_predictor_matrices = None

    print(SEPARATOR_STRING)

    for _ in range(len(example_file_names)):
        try:
            this_storm_object_dict = next(generator_object)
            print(SEPARATOR_STRING)
        except StopIteration:
            break

        full_id_strings += this_storm_object_dict[testing_io.FULL_IDS_KEY]
        storm_times_unix_sec = numpy.concatenate(
            (storm_times_unix_sec,
             this_storm_object_dict[testing_io.STORM_TIMES_KEY]))

        these_target_values = this_storm_object_dict[
            testing_io.TARGET_ARRAY_KEY]
        if len(these_target_values.shape) > 1:
            these_target_values = numpy.argmax(these_target_values, axis=1)

        target_values = numpy.concatenate((target_values, these_target_values))

        these_predictor_matrices = this_storm_object_dict[
            testing_io.INPUT_MATRICES_KEY]

        if list_of_predictor_matrices is None:
            list_of_predictor_matrices = copy.deepcopy(
                these_predictor_matrices)
        else:
            for k in range(len(list_of_predictor_matrices)):
                list_of_predictor_matrices[k] = numpy.concatenate(
                    (list_of_predictor_matrices[k],
                     these_predictor_matrices[k]))

    predictor_names_by_matrix = _create_predictor_names(
        model_metadata_dict=model_metadata_dict,
        list_of_predictor_matrices=list_of_predictor_matrices)

    for i in range(len(predictor_names_by_matrix)):
        print('Predictors in {0:d}th matrix:\n{1:s}\n'.format(
            i + 1, str(predictor_names_by_matrix[i])))

    print(SEPARATOR_STRING)

    list_of_layer_operation_dicts = model_metadata_dict[
        cnn.LAYER_OPERATIONS_KEY]

    if list_of_layer_operation_dicts is not None:
        correlation_matrix, predictor_names = _get_pearson_correlations(
            list_of_predictor_matrices=list_of_predictor_matrices,
            predictor_names_by_matrix=predictor_names_by_matrix,
            sounding_heights_m_agl=training_option_dict[
                trainval_io.SOUNDING_HEIGHTS_KEY])

        for i in range(len(predictor_names)):
            for j in range(i, len(predictor_names)):
                print((
                    'Pearson correlation between "{0:s}" and "{1:s}" = {2:.4f}'
                ).format(predictor_names[i], predictor_names[j],
                         correlation_matrix[i, j]))

            print('\n')

    if model_metadata_dict[cnn.CONV_2D3D_KEY]:
        prediction_function = permutation.prediction_function_2d3d_cnn
    else:
        num_radar_dimensions = len(list_of_predictor_matrices[0].shape) - 2

        if num_radar_dimensions == 2:
            prediction_function = permutation.prediction_function_2d_cnn
        else:
            prediction_function = permutation.prediction_function_3d_cnn

    print(SEPARATOR_STRING)
    result_dict = permutation.run_permutation_test(
        model_object=model_object,
        list_of_input_matrices=list_of_predictor_matrices,
        predictor_names_by_matrix=predictor_names_by_matrix,
        target_values=target_values,
        prediction_function=prediction_function,
        cost_function=permutation.negative_auc_function,
        num_bootstrap_iters=num_bootstrap_iters,
        bootstrap_confidence_level=bootstrap_confidence_level)
    print(SEPARATOR_STRING)

    result_dict[permutation.MODEL_FILE_KEY] = model_file_name
    result_dict[permutation.TARGET_VALUES_KEY] = target_values
    result_dict[permutation.FULL_IDS_KEY] = full_id_strings
    result_dict[permutation.STORM_TIMES_KEY] = storm_times_unix_sec

    print('Writing results to: "{0:s}"...'.format(output_file_name))
    permutation.write_results(result_dict=result_dict,
                              pickle_file_name=output_file_name)
def _run(input_model_file_name, sounding_field_names,
         normalization_type_string, normalization_param_file_name,
         min_normalized_value, max_normalized_value, target_name,
         downsampling_classes, downsampling_fractions, monitor_string,
         weight_loss_function, x_translations_pixels, y_translations_pixels,
         ccw_rotation_angles_deg, noise_standard_deviation, num_noisings,
         flip_in_x, flip_in_y, top_training_dir_name,
         first_training_time_string, last_training_time_string,
         num_examples_per_train_batch, top_validation_dir_name,
         first_validation_time_string, last_validation_time_string,
         num_examples_per_validn_batch, num_epochs,
         num_training_batches_per_epoch, num_validation_batches_per_epoch,
         output_dir_name):
    """Trains CNN with 2-D and 3-D MYRORSS images.

    This is effectively the main method.

    :param input_model_file_name: See documentation at top of file.
    :param sounding_field_names: Same.
    :param normalization_type_string: Same.
    :param normalization_param_file_name: Same.
    :param min_normalized_value: Same.
    :param max_normalized_value: Same.
    :param target_name: Same.
    :param downsampling_classes: Same.
    :param downsampling_fractions: Same.
    :param monitor_string: Same.
    :param weight_loss_function: Same.
    :param x_translations_pixels: Same.
    :param y_translations_pixels: Same.
    :param ccw_rotation_angles_deg: Same.
    :param noise_standard_deviation: Same.
    :param num_noisings: Same.
    :param flip_in_x: Same.
    :param flip_in_y: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param num_examples_per_train_batch: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_validn_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    # argument_file_name = '{0:s}/input_args.p'.format(output_dir_name)
    # print('Writing input args to: "{0:s}"...'.format(argument_file_name))
    #
    # argument_file_handle = open(argument_file_name, 'wb')
    # pickle.dump(INPUT_ARG_OBJECT.__dict__, argument_file_handle)
    # argument_file_handle.close()
    #
    # return

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    if sounding_field_names[0] in ['', 'None']:
        sounding_field_names = None

    if len(downsampling_classes) > 1:
        downsampling_dict = dict(
            list(zip(downsampling_classes, downsampling_fractions)))
    else:
        downsampling_dict = None

    if (len(x_translations_pixels) == 1
            and x_translations_pixels + y_translations_pixels == 0):
        x_translations_pixels = None
        y_translations_pixels = None

    if len(ccw_rotation_angles_deg) == 1 and ccw_rotation_angles_deg[0] == 0:
        ccw_rotation_angles_deg = None

    if num_noisings <= 0:
        num_noisings = 0
        noise_standard_deviation = None

    # Set output locations.
    output_model_file_name = '{0:s}/model.h5'.format(output_dir_name)
    history_file_name = '{0:s}/model_history.csv'.format(output_dir_name)
    tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name)

    # Find training and validation files.
    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    validation_file_names = input_examples.find_many_example_files(
        top_directory_name=top_validation_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    # Read architecture.
    print(
        'Reading architecture from: "{0:s}"...'.format(input_model_file_name))
    model_object = cnn.read_model(input_model_file_name)
    # model_object = keras.models.clone_model(model_object)

    # TODO(thunderhoser): This is a HACK.
    model_object.compile(loss=keras.losses.binary_crossentropy,
                         optimizer=keras.optimizers.Adam(),
                         metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST)

    print(SEPARATOR_STRING)
    model_object.summary()
    print(SEPARATOR_STRING)

    # Write metadata.
    metadata_dict = {
        cnn.NUM_EPOCHS_KEY: num_epochs,
        cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch,
        cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch,
        cnn.MONITOR_STRING_KEY: monitor_string,
        cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function,
        cnn.CONV_2D3D_KEY: True,
        cnn.VALIDATION_FILES_KEY: validation_file_names,
        cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec,
        cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec,
        cnn.NUM_EX_PER_VALIDN_BATCH_KEY: num_examples_per_validn_batch
    }

    if isinstance(model_object.input, list):
        list_of_input_tensors = model_object.input
    else:
        list_of_input_tensors = [model_object.input]

    upsample_refl = len(list_of_input_tensors) == 2
    num_grid_rows = list_of_input_tensors[0].get_shape().as_list()[1]
    num_grid_columns = list_of_input_tensors[0].get_shape().as_list()[2]

    if upsample_refl:
        num_grid_rows = int(numpy.round(num_grid_rows / 2))
        num_grid_columns = int(numpy.round(num_grid_columns / 2))

    training_option_dict = {
        trainval_io.EXAMPLE_FILES_KEY: training_file_names,
        trainval_io.TARGET_NAME_KEY: target_name,
        trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec,
        trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec,
        trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_train_batch,
        trainval_io.RADAR_FIELDS_KEY:
        input_examples.AZIMUTHAL_SHEAR_FIELD_NAMES,
        trainval_io.RADAR_HEIGHTS_KEY: REFLECTIVITY_HEIGHTS_M_AGL,
        trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names,
        trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL,
        trainval_io.NUM_ROWS_KEY: num_grid_rows,
        trainval_io.NUM_COLUMNS_KEY: num_grid_columns,
        trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string,
        trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name,
        trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value,
        trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value,
        trainval_io.BINARIZE_TARGET_KEY: False,
        trainval_io.SAMPLING_FRACTIONS_KEY: downsampling_dict,
        trainval_io.LOOP_ONCE_KEY: False,
        trainval_io.X_TRANSLATIONS_KEY: x_translations_pixels,
        trainval_io.Y_TRANSLATIONS_KEY: y_translations_pixels,
        trainval_io.ROTATION_ANGLES_KEY: ccw_rotation_angles_deg,
        trainval_io.NOISE_STDEV_KEY: noise_standard_deviation,
        trainval_io.NUM_NOISINGS_KEY: num_noisings,
        trainval_io.FLIP_X_KEY: flip_in_x,
        trainval_io.FLIP_Y_KEY: flip_in_y,
        trainval_io.UPSAMPLE_REFLECTIVITY_KEY: upsample_refl
    }

    print('Writing metadata to: "{0:s}"...'.format(model_metafile_name))
    cnn.write_model_metadata(pickle_file_name=model_metafile_name,
                             metadata_dict=metadata_dict,
                             training_option_dict=training_option_dict)

    cnn.train_cnn_2d3d_myrorss(
        model_object=model_object,
        model_file_name=output_model_file_name,
        history_file_name=history_file_name,
        tensorboard_dir_name=tensorboard_dir_name,
        num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_option_dict=training_option_dict,
        monitor_string=monitor_string,
        weight_loss_function=weight_loss_function,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_file_names=validation_file_names,
        first_validn_time_unix_sec=first_validation_time_unix_sec,
        last_validn_time_unix_sec=last_validation_time_unix_sec,
        num_examples_per_validn_batch=num_examples_per_validn_batch)
예제 #23
0
def _run(model_file_name, component_type_string, target_class, layer_name,
         ideal_activation, neuron_indices, channel_index, top_example_dir_name,
         storm_metafile_name, num_examples, randomize_weights,
         cascading_random, output_file_name):
    """Computes saliency map for each storm object and each model component.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param component_type_string: Same.
    :param target_class: Same.
    :param layer_name: Same.
    :param ideal_activation: Same.
    :param neuron_indices: Same.
    :param channel_index: Same.
    :param top_example_dir_name: Same.
    :param storm_metafile_name: Same.
    :param num_examples: Same.
    :param randomize_weights: Same.
    :param cascading_random: Same.
    :param output_file_name: Same.
    """

    # Check input args.
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)
    model_interpretation.check_component_type(component_type_string)

    # Read model and metadata.
    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    model_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0])

    print(
        'Reading model metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]
    training_option_dict[trainval_io.REFLECTIVITY_MASK_KEY] = None

    output_dir_name, pathless_output_file_name = os.path.split(
        output_file_name)
    extensionless_output_file_name, output_file_extension = os.path.splitext(
        pathless_output_file_name)

    if randomize_weights:
        conv_dense_layer_names = _find_conv_and_dense_layers(model_object)
        conv_dense_layer_names.reverse()
        num_sets = len(conv_dense_layer_names)
    else:
        conv_dense_layer_names = []
        num_sets = 1

    print(
        'Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name))
    full_storm_id_strings, storm_times_unix_sec = (
        tracking_io.read_ids_and_times(storm_metafile_name))

    print(SEPARATOR_STRING)

    if 0 < num_examples < len(full_storm_id_strings):
        full_storm_id_strings = full_storm_id_strings[:num_examples]
        storm_times_unix_sec = storm_times_unix_sec[:num_examples]

    example_dict = testing_io.read_predictors_specific_examples(
        top_example_dir_name=top_example_dir_name,
        desired_full_id_strings=full_storm_id_strings,
        desired_times_unix_sec=storm_times_unix_sec,
        option_dict=training_option_dict,
        layer_operation_dicts=model_metadata_dict[cnn.LAYER_OPERATIONS_KEY])
    print(SEPARATOR_STRING)

    predictor_matrices = example_dict[testing_io.INPUT_MATRICES_KEY]
    sounding_pressure_matrix_pa = example_dict[
        testing_io.SOUNDING_PRESSURES_KEY]

    denorm_predictor_matrices = trainval_io.separate_shear_and_reflectivity(
        list_of_input_matrices=copy.deepcopy(predictor_matrices),
        training_option_dict=training_option_dict)

    print('Denormalizing model inputs...')
    denorm_predictor_matrices = model_interpretation.denormalize_data(
        list_of_input_matrices=denorm_predictor_matrices,
        model_metadata_dict=model_metadata_dict)
    print(SEPARATOR_STRING)

    for k in range(num_sets):
        if randomize_weights:
            if cascading_random:
                _reset_weights_in_layer(model_object=model_object,
                                        layer_name=conv_dense_layer_names[k])

                this_model_object = model_object

                this_output_file_name = (
                    '{0:s}/{1:s}_cascading-random_{2:s}{3:s}').format(
                        output_dir_name, extensionless_output_file_name,
                        conv_dense_layer_names[k].replace('_', '-'),
                        output_file_extension)
            else:
                this_model_object = keras.models.Model.from_config(
                    model_object.get_config())
                this_model_object.set_weights(model_object.get_weights())

                _reset_weights_in_layer(model_object=this_model_object,
                                        layer_name=conv_dense_layer_names[k])

                this_output_file_name = '{0:s}/{1:s}_random_{2:s}{3:s}'.format(
                    output_dir_name, extensionless_output_file_name,
                    conv_dense_layer_names[k].replace('_', '-'),
                    output_file_extension)
        else:
            this_model_object = model_object
            this_output_file_name = output_file_name

        # print(K.eval(this_model_object.get_layer(name='dense_3').weights[0]))

        if component_type_string == CLASS_COMPONENT_TYPE_STRING:
            print('Computing saliency maps for target class {0:d}...'.format(
                target_class))

            saliency_matrices = (
                saliency_maps.get_saliency_maps_for_class_activation(
                    model_object=this_model_object,
                    target_class=target_class,
                    list_of_input_matrices=predictor_matrices))

        elif component_type_string == NEURON_COMPONENT_TYPE_STRING:
            print(
                ('Computing saliency maps for neuron {0:s} in layer "{1:s}"...'
                 ).format(str(neuron_indices), layer_name))

            saliency_matrices = (
                saliency_maps.get_saliency_maps_for_neuron_activation(
                    model_object=this_model_object,
                    layer_name=layer_name,
                    neuron_indices=neuron_indices,
                    list_of_input_matrices=predictor_matrices,
                    ideal_activation=ideal_activation))

        else:
            print((
                'Computing saliency maps for channel {0:d} in layer "{1:s}"...'
            ).format(channel_index, layer_name))

            saliency_matrices = (
                saliency_maps.get_saliency_maps_for_channel_activation(
                    model_object=this_model_object,
                    layer_name=layer_name,
                    channel_index=channel_index,
                    list_of_input_matrices=predictor_matrices,
                    stat_function_for_neuron_activations=K.max,
                    ideal_activation=ideal_activation))

        saliency_matrices = trainval_io.separate_shear_and_reflectivity(
            list_of_input_matrices=saliency_matrices,
            training_option_dict=training_option_dict)

        print('Writing saliency maps to file: "{0:s}"...'.format(
            this_output_file_name))

        saliency_metadata_dict = saliency_maps.check_metadata(
            component_type_string=component_type_string,
            target_class=target_class,
            layer_name=layer_name,
            ideal_activation=ideal_activation,
            neuron_indices=neuron_indices,
            channel_index=channel_index)

        saliency_maps.write_standard_file(
            pickle_file_name=this_output_file_name,
            denorm_predictor_matrices=denorm_predictor_matrices,
            saliency_matrices=saliency_matrices,
            full_storm_id_strings=full_storm_id_strings,
            storm_times_unix_sec=storm_times_unix_sec,
            model_file_name=model_file_name,
            metadata_dict=saliency_metadata_dict,
            sounding_pressure_matrix_pa=sounding_pressure_matrix_pa)
예제 #24
0
def _train_one_cnn(gpu_queue, argument_dict):
    """Trains single CNN with 2-D GridRad data.

    :param gpu_queue: GPU queue (instance of `multiprocessing.Manager.Queue`).
    :param argument_dict: Dictionary of CNN arguments, where each key is an
        input arg to the script train_cnn_gridrad_2d_reduced.py.
    """

    import keras
    from keras import backend as K
    import tensorflow
    from gewittergefahr.deep_learning import cnn
    from gewittergefahr.deep_learning import cnn_setup
    from gewittergefahr.scripts import deep_learning_helper as dl_helper

    gpu_index = -1

    try:
        # Deal with GPU business.
        gpu_index = int(gpu_queue.get())
        os.environ['CUDA_VISIBLE_DEVICES'] = '{0:d}'.format(gpu_index)

        session_object = tensorflow.Session(
            config=tensorflow.ConfigProto(intra_op_parallelism_threads=7,
                                          inter_op_parallelism_threads=7,
                                          allow_soft_placement=False,
                                          log_device_placement=False,
                                          gpu_options=tensorflow.GPUOptions(
                                              allow_growth=True)))

        K.set_session(session_object)

        # Read untrained model.
        untrained_model_file_name = argument_dict[
            dl_helper.INPUT_MODEL_FILE_ARG_NAME]

        with tensorflow.device('/gpu:0'):
            print('Reading untrained model from: "{0:s}"...'.format(
                untrained_model_file_name))
            model_object = cnn.read_model(untrained_model_file_name)

        model_object.compile(loss=keras.losses.binary_crossentropy,
                             optimizer=keras.optimizers.Adam(),
                             metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST)

        print(SEPARATOR_STRING)
        model_object.summary()
        print(SEPARATOR_STRING)

        # Write metadata.
        metadata_dict, training_option_dict = _write_metadata_one_cnn(
            model_object=model_object, argument_dict=argument_dict)

        print('Training CNN on GPU {0:d}...'.format(gpu_index))
        print(SEPARATOR_STRING)

        # Train CNN.
        output_dir_name = argument_dict[dl_helper.OUTPUT_DIR_ARG_NAME]
        output_model_file_name = '{0:s}/model.h5'.format(output_dir_name)
        history_file_name = '{0:s}/model_history.csv'.format(output_dir_name)
        tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name)

        cnn.train_cnn_gridrad_2d_reduced(
            model_object=model_object,
            model_file_name=output_model_file_name,
            history_file_name=history_file_name,
            tensorboard_dir_name=tensorboard_dir_name,
            num_epochs=metadata_dict[cnn.NUM_EPOCHS_KEY],
            num_training_batches_per_epoch=metadata_dict[
                cnn.NUM_TRAINING_BATCHES_KEY],
            training_option_dict=training_option_dict,
            list_of_layer_operation_dicts=metadata_dict[
                cnn.LAYER_OPERATIONS_KEY],
            monitor_string=metadata_dict[cnn.MONITOR_STRING_KEY],
            weight_loss_function=metadata_dict[cnn.WEIGHT_LOSS_FUNCTION_KEY],
            num_validation_batches_per_epoch=metadata_dict[
                cnn.NUM_VALIDATION_BATCHES_KEY],
            validation_file_names=metadata_dict[cnn.VALIDATION_FILES_KEY],
            first_validn_time_unix_sec=metadata_dict[
                cnn.FIRST_VALIDN_TIME_KEY],
            last_validn_time_unix_sec=metadata_dict[cnn.LAST_VALIDN_TIME_KEY],
            num_examples_per_validn_batch=metadata_dict[
                cnn.NUM_EX_PER_VALIDN_BATCH_KEY])

        session_object.close()
        del session_object
        gpu_queue.put(gpu_index)

    except Exception as this_exception:
        if gpu_index >= 0:
            gpu_queue.put(gpu_index)

        print(traceback.format_exc())
        raise this_exception
예제 #25
0
def _run(model_file_name, component_type_string, target_class, layer_name,
         neuron_indices_flattened, channel_indices, top_example_dir_name,
         first_spc_date_string, last_spc_date_string, output_file_name):
    """Creates activation maps for one class, neuron, or channel of a CNN.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param component_type_string: Same.
    :param target_class: Same.
    :param layer_name: Same.
    :param neuron_indices_flattened: Same.
    :param channel_indices: Same.
    :param top_example_dir_name: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param output_file_name: Same.
    """

    # Check input args.
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)
    model_interpretation.check_component_type(component_type_string)

    if component_type_string == CHANNEL_COMPONENT_TYPE_STRING:
        error_checking.assert_is_geq_numpy_array(channel_indices, 0)
    if component_type_string == NEURON_COMPONENT_TYPE_STRING:
        neuron_indices_flattened = neuron_indices_flattened.astype(float)
        neuron_indices_flattened[neuron_indices_flattened < 0] = numpy.nan

        neuron_indices_2d_list = general_utils.split_array_by_nan(
            neuron_indices_flattened)
        neuron_index_matrix = numpy.array(neuron_indices_2d_list, dtype=int)
    else:
        neuron_index_matrix = None

    # Read model and metadata.
    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    metadata_file_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0])

    print('Reading metadata from: "{0:s}"...'.format(metadata_file_name))
    model_metadata_dict = cnn.read_model_metadata(metadata_file_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    # Create generator.
    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    training_option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = None
    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names
    training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = (
        time_conversion.get_start_of_spc_date(first_spc_date_string))
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = (
        time_conversion.get_end_of_spc_date(last_spc_date_string))

    if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            list_of_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY],
            num_examples_total=LARGE_INTEGER)

    elif model_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict, num_examples_total=LARGE_INTEGER)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict, num_examples_total=LARGE_INTEGER)

    # Compute activation for each example (storm object) and model component.
    full_id_strings = []
    storm_times_unix_sec = numpy.array([], dtype=int)
    activation_matrix = None

    print(SEPARATOR_STRING)

    for _ in range(len(example_file_names)):
        try:
            this_storm_object_dict = next(generator_object)
        except StopIteration:
            break

        this_list_of_input_matrices = this_storm_object_dict[
            testing_io.INPUT_MATRICES_KEY]
        these_id_strings = this_storm_object_dict[testing_io.FULL_IDS_KEY]
        these_times_unix_sec = this_storm_object_dict[
            testing_io.STORM_TIMES_KEY]

        full_id_strings += these_id_strings
        storm_times_unix_sec = numpy.concatenate(
            (storm_times_unix_sec, these_times_unix_sec))

        if component_type_string == CLASS_COMPONENT_TYPE_STRING:
            print('Computing activations for target class {0:d}...'.format(
                target_class))

            this_activation_matrix = (
                model_activation.get_class_activation_for_examples(
                    model_object=model_object,
                    target_class=target_class,
                    list_of_input_matrices=this_list_of_input_matrices))

            this_activation_matrix = numpy.reshape(
                this_activation_matrix, (len(this_activation_matrix), 1))

        elif component_type_string == NEURON_COMPONENT_TYPE_STRING:
            this_activation_matrix = None

            for j in range(neuron_index_matrix.shape[0]):
                print((
                    'Computing activations for neuron {0:s} in layer "{1:s}"...'
                ).format(str(neuron_index_matrix[j, :]), layer_name))

                these_activations = (
                    model_activation.get_neuron_activation_for_examples(
                        model_object=model_object,
                        layer_name=layer_name,
                        neuron_indices=neuron_index_matrix[j, :],
                        list_of_input_matrices=this_list_of_input_matrices))

                these_activations = numpy.reshape(these_activations,
                                                  (len(these_activations), 1))

                if this_activation_matrix is None:
                    this_activation_matrix = these_activations + 0.
                else:
                    this_activation_matrix = numpy.concatenate(
                        (this_activation_matrix, these_activations), axis=1)
        else:
            this_activation_matrix = None

            for this_channel_index in channel_indices:
                print(('Computing activations for channel {0:d} in layer '
                       '"{1:s}"...').format(this_channel_index, layer_name))

                these_activations = (
                    model_activation.get_channel_activation_for_examples(
                        model_object=model_object,
                        layer_name=layer_name,
                        channel_index=this_channel_index,
                        list_of_input_matrices=this_list_of_input_matrices,
                        stat_function_for_neuron_activations=K.max))

                these_activations = numpy.reshape(these_activations,
                                                  (len(these_activations), 1))

                if this_activation_matrix is None:
                    this_activation_matrix = these_activations + 0.
                else:
                    this_activation_matrix = numpy.concatenate(
                        (this_activation_matrix, these_activations), axis=1)

        if activation_matrix is None:
            activation_matrix = this_activation_matrix + 0.
        else:
            activation_matrix = numpy.concatenate(
                (activation_matrix, this_activation_matrix), axis=0)

        print(SEPARATOR_STRING)

    print('Writing activations to file: "{0:s}"...'.format(output_file_name))
    model_activation.write_file(pickle_file_name=output_file_name,
                                activation_matrix=activation_matrix,
                                full_id_strings=full_id_strings,
                                storm_times_unix_sec=storm_times_unix_sec,
                                model_file_name=model_file_name,
                                component_type_string=component_type_string,
                                target_class=target_class,
                                layer_name=layer_name,
                                neuron_index_matrix=neuron_index_matrix,
                                channel_indices=channel_indices)
예제 #26
0
def _run(model_file_name, layer_names, top_example_dir_name,
         storm_metafile_name, num_examples, top_output_dir_name):
    """Evaluates CNN (convolutional neural net) predictions.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param layer_names: Same.
    :param top_example_dir_name: Same.
    :param storm_metafile_name: Same.
    :param num_examples: Same.
    :param top_output_dir_name: Same.
    :raises: ValueError: if feature maps do not have 2 or 3 spatial dimensions.
    """

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    model_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0])

    print(
        'Reading model metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]
    training_option_dict[trainval_io.REFLECTIVITY_MASK_KEY] = None

    print(
        'Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name))
    full_id_strings, storm_times_unix_sec = tracking_io.read_ids_and_times(
        storm_metafile_name)

    print(SEPARATOR_STRING)

    if 0 < num_examples < len(full_id_strings):
        full_id_strings = full_id_strings[:num_examples]
        storm_times_unix_sec = storm_times_unix_sec[:num_examples]

    list_of_predictor_matrices = testing_io.read_specific_examples(
        top_example_dir_name=top_example_dir_name,
        desired_full_id_strings=full_id_strings,
        desired_times_unix_sec=storm_times_unix_sec,
        option_dict=training_option_dict,
        list_of_layer_operation_dicts=model_metadata_dict[
            cnn.LAYER_OPERATIONS_KEY])[0]

    print(SEPARATOR_STRING)

    include_soundings = (training_option_dict[trainval_io.SOUNDING_FIELDS_KEY]
                         is not None)

    if include_soundings:
        sounding_matrix = list_of_predictor_matrices[-1]
    else:
        sounding_matrix = None

    num_layers = len(layer_names)
    feature_matrix_by_layer = [None] * num_layers

    for k in range(num_layers):
        if model_metadata_dict[cnn.CONV_2D3D_KEY]:
            if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]:
                feature_matrix_by_layer[k] = cnn.apply_2d_or_3d_cnn(
                    model_object=model_object,
                    radar_image_matrix=list_of_predictor_matrices[0],
                    sounding_matrix=sounding_matrix,
                    return_features=True,
                    feature_layer_name=layer_names[k])
            else:
                feature_matrix_by_layer[k] = cnn.apply_2d3d_cnn(
                    model_object=model_object,
                    reflectivity_matrix_dbz=list_of_predictor_matrices[0],
                    azimuthal_shear_matrix_s01=list_of_predictor_matrices[1],
                    sounding_matrix=sounding_matrix,
                    return_features=True,
                    feature_layer_name=layer_names[k])
        else:
            feature_matrix_by_layer[k] = cnn.apply_2d_or_3d_cnn(
                model_object=model_object,
                radar_image_matrix=list_of_predictor_matrices[0],
                sounding_matrix=sounding_matrix,
                return_features=True,
                feature_layer_name=layer_names[k])

    for k in range(num_layers):
        this_output_dir_name = '{0:s}/{1:s}'.format(top_output_dir_name,
                                                    layer_names[k])

        file_system_utils.mkdir_recursive_if_necessary(
            directory_name=this_output_dir_name)

        _plot_feature_maps_one_layer(feature_matrix=feature_matrix_by_layer[k],
                                     full_id_strings=full_id_strings,
                                     storm_times_unix_sec=storm_times_unix_sec,
                                     layer_name=layer_names[k],
                                     output_dir_name=this_output_dir_name)

        print(SEPARATOR_STRING)
def _run(model_file_name, component_type_string, target_class, layer_name,
         ideal_activation, neuron_indices, channel_index, top_example_dir_name,
         storm_metafile_name, num_examples, output_file_name):
    """Computes saliency map for each storm object and each model component.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param component_type_string: Same.
    :param target_class: Same.
    :param layer_name: Same.
    :param ideal_activation: Same.
    :param neuron_indices: Same.
    :param channel_index: Same.
    :param top_example_dir_name: Same.
    :param storm_metafile_name: Same.
    :param num_examples: Same.
    :param output_file_name: Same.
    """

    # Check input args.
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)
    model_interpretation.check_component_type(component_type_string)

    # Read model and metadata.
    print 'Reading model from: "{0:s}"...'.format(model_file_name)
    model_object = cnn.read_model(model_file_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0])

    print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]
    training_option_dict[trainval_io.REFLECTIVITY_MASK_KEY] = None

    print 'Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name)
    storm_ids, storm_times_unix_sec = tracking_io.read_ids_and_times(
        storm_metafile_name)
    print SEPARATOR_STRING

    if 0 < num_examples < len(storm_ids):
        storm_ids = storm_ids[:num_examples]
        storm_times_unix_sec = storm_times_unix_sec[:num_examples]

    list_of_input_matrices, sounding_pressure_matrix_pascals = (
        testing_io.read_specific_examples(
            top_example_dir_name=top_example_dir_name,
            desired_storm_ids=storm_ids,
            desired_times_unix_sec=storm_times_unix_sec,
            option_dict=training_option_dict,
            list_of_layer_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY]))
    print SEPARATOR_STRING

    if component_type_string == CLASS_COMPONENT_TYPE_STRING:
        print 'Computing saliency maps for target class {0:d}...'.format(
            target_class)

        list_of_saliency_matrices = (
            saliency_maps.get_saliency_maps_for_class_activation(
                model_object=model_object,
                target_class=target_class,
                list_of_input_matrices=list_of_input_matrices))

    elif component_type_string == NEURON_COMPONENT_TYPE_STRING:
        print('Computing saliency maps for neuron {0:s} in layer "{1:s}"...'
              ).format(str(neuron_indices), layer_name)

        list_of_saliency_matrices = (
            saliency_maps.get_saliency_maps_for_neuron_activation(
                model_object=model_object,
                layer_name=layer_name,
                neuron_indices=neuron_indices,
                list_of_input_matrices=list_of_input_matrices,
                ideal_activation=ideal_activation))

    else:
        print('Computing saliency maps for channel {0:d} in layer "{1:s}"...'
              ).format(channel_index, layer_name)

        list_of_saliency_matrices = (
            saliency_maps.get_saliency_maps_for_channel_activation(
                model_object=model_object,
                layer_name=layer_name,
                channel_index=channel_index,
                list_of_input_matrices=list_of_input_matrices,
                stat_function_for_neuron_activations=K.max,
                ideal_activation=ideal_activation))

    print 'Denormalizing model inputs...'
    list_of_input_matrices = model_interpretation.denormalize_data(
        list_of_input_matrices=list_of_input_matrices,
        model_metadata_dict=model_metadata_dict)

    print 'Writing saliency maps to file: "{0:s}"...'.format(output_file_name)

    saliency_metadata_dict = saliency_maps.check_metadata(
        component_type_string=component_type_string,
        target_class=target_class,
        layer_name=layer_name,
        ideal_activation=ideal_activation,
        neuron_indices=neuron_indices,
        channel_index=channel_index)

    saliency_maps.write_standard_file(
        pickle_file_name=output_file_name,
        list_of_input_matrices=list_of_input_matrices,
        list_of_saliency_matrices=list_of_saliency_matrices,
        storm_ids=storm_ids,
        storm_times_unix_sec=storm_times_unix_sec,
        model_file_name=model_file_name,
        saliency_metadata_dict=saliency_metadata_dict,
        sounding_pressure_matrix_pascals=sounding_pressure_matrix_pascals)