Ejemplo n.º 1
0
def _average_saliency_maps(input_file_name, use_pmm, max_pmm_percentile_level,
                           output_file_name):
    """Averages many saliency maps.

    :param input_file_name: See documentation at top of file.
    :param use_pmm: Same.
    :param max_pmm_percentile_level: Same.
    :param output_file_name: Same.
    """

    print('Reading saliency maps from: "{0:s}"...'.format(input_file_name))
    saliency_dict = saliency.read_file(input_file_name)

    vector_saliency_matrix = saliency_dict[saliency.VECTOR_SALIENCY_KEY]
    scalar_saliency_matrix = saliency_dict[saliency.VECTOR_SALIENCY_KEY]

    if vector_saliency_matrix.size == 0:
        vector_saliency_matrix = vector_saliency_matrix[0, ...]
    elif use_pmm:
        vector_saliency_matrix = pmm.run_pmm_many_variables(
            input_matrix=vector_saliency_matrix,
            max_percentile_level=max_pmm_percentile_level)
    else:
        vector_saliency_matrix = numpy.mean(vector_saliency_matrix, axis=0)

    if scalar_saliency_matrix.size == 0:
        scalar_saliency_matrix = scalar_saliency_matrix[0, ...]
    elif use_pmm and len(scalar_saliency_matrix.shape) == 3:
        scalar_saliency_matrix = pmm.run_pmm_many_variables(
            input_matrix=scalar_saliency_matrix,
            max_percentile_level=max_pmm_percentile_level)
    else:
        scalar_saliency_matrix = numpy.mean(scalar_saliency_matrix, axis=0)

    vector_saliency_matrix = numpy.expand_dims(vector_saliency_matrix, axis=0)
    scalar_saliency_matrix = numpy.expand_dims(scalar_saliency_matrix, axis=0)

    if use_pmm:
        example_id_strings = [saliency.DUMMY_EXAMPLE_ID_PMM]
    else:
        example_id_strings = [saliency.DUMMY_EXAMPLE_ID_AVERAGE]

    print(
        'Writing average saliency map to: "{0:s}"...'.format(output_file_name))

    saliency.write_file(
        netcdf_file_name=output_file_name,
        scalar_saliency_matrix=scalar_saliency_matrix,
        vector_saliency_matrix=vector_saliency_matrix,
        example_id_strings=example_id_strings,
        model_file_name=saliency_dict[saliency.MODEL_FILE_KEY],
        layer_name=saliency_dict[saliency.LAYER_NAME_KEY],
        neuron_indices=saliency_dict[saliency.NEURON_INDICES_KEY],
        ideal_activation=saliency_dict[saliency.IDEAL_ACTIVATION_KEY],
        target_field_name=saliency_dict[saliency.TARGET_FIELD_KEY],
        target_height_m_agl=saliency_dict[saliency.TARGET_HEIGHT_KEY])
Ejemplo n.º 2
0
def _composite_gradcam(input_file_name, max_percentile_level, output_file_name):
    """Composites predictors and resulting class-activation maps.

    :param input_file_name: Path to input file.  Will be read by
        `gradcam.read_file`.
    :param max_percentile_level: See documentation at top of file.
    :param output_file_name: Path to output file.  Will be written by
        `gradcam.write_pmm_file`.
    """

    print('Reading data from: "{0:s}"...'.format(input_file_name))
    gradcam_dict = gradcam.read_file(input_file_name)[0]

    predictor_matrices = gradcam_dict[gradcam.PREDICTOR_MATRICES_KEY]
    cam_matrices = gradcam_dict[gradcam.CAM_MATRICES_KEY]
    guided_cam_matrices = gradcam_dict[gradcam.GUIDED_CAM_MATRICES_KEY]
    sounding_pressure_matrix_pa = gradcam_dict[
        gradcam.SOUNDING_PRESSURES_KEY]

    print('Compositing predictor matrices...')
    mean_predictor_matrices, mean_sounding_pressures_pa = _composite_predictors(
        predictor_matrices=predictor_matrices,
        max_percentile_level=max_percentile_level,
        sounding_pressure_matrix_pa=sounding_pressure_matrix_pa)

    print('Compositing class-activation maps...')
    num_matrices = len(predictor_matrices)
    mean_cam_matrices = [None] * num_matrices
    mean_guided_cam_matrices = [None] * num_matrices

    for i in range(num_matrices):
        if cam_matrices[i] is None:
            continue

        mean_cam_matrices[i] = pmm.run_pmm_many_variables(
            input_matrix=numpy.expand_dims(cam_matrices[i], axis=-1),
            max_percentile_level=max_percentile_level
        )[..., 0]

        mean_guided_cam_matrices[i] = pmm.run_pmm_many_variables(
            input_matrix=guided_cam_matrices[i],
            max_percentile_level=max_percentile_level)

    print('Writing output to: "{0:s}"...'.format(output_file_name))
    gradcam.write_pmm_file(
        pickle_file_name=output_file_name,
        mean_denorm_predictor_matrices=mean_predictor_matrices,
        mean_cam_matrices=mean_cam_matrices,
        mean_guided_cam_matrices=mean_guided_cam_matrices,
        model_file_name=gradcam_dict[gradcam.MODEL_FILE_KEY],
        non_pmm_file_name=input_file_name,
        pmm_max_percentile_level=max_percentile_level,
        mean_sounding_pressures_pa=mean_sounding_pressures_pa)
Ejemplo n.º 3
0
def _composite_novelty(
        input_file_name, max_percentile_level, output_file_name):
    """Composites inputs and outputs for novelty detection.

    :param input_file_name: Path to input file.  Will be read by
        `novelty_detection.read_standard_file`.
    :param max_percentile_level: See documentation at top of file.
    :param output_file_name: Path to output file.  Will be written by
        `novelty_detection.write_pmm_file`.
    """

    print('Reading data from: "{0:s}"...'.format(input_file_name))
    novelty_dict = novelty_detection.read_file(input_file_name)[0]

    print('Compositing baseline radar images...')
    mean_radar_matrix_baseline = pmm.run_pmm_many_variables(
        input_matrix=novelty_dict[novelty_detection.BASELINE_MATRIX_KEY],
        max_percentile_level=max_percentile_level
    )

    print('Compositing novel radar images (in trial set)...')
    novel_indices = novelty_dict[novelty_detection.NOVEL_INDICES_KEY]
    radar_matrix_novel = novelty_dict[novelty_detection.TRIAL_MATRIX_KEY][
        novel_indices, ...]

    mean_radar_matrix_novel = pmm.run_pmm_many_variables(
        input_matrix=radar_matrix_novel,
        max_percentile_level=max_percentile_level)

    print('Compositing reconstructions of novel radar images...')
    mean_radar_matrix_upconv = pmm.run_pmm_many_variables(
        input_matrix=novelty_dict[novelty_detection.UPCONV_MATRIX_KEY],
        max_percentile_level=max_percentile_level
    )

    mean_radar_matrix_upconv_svd = pmm.run_pmm_many_variables(
        input_matrix=novelty_dict[novelty_detection.UPCONV_SVD_MATRIX_KEY],
        max_percentile_level=max_percentile_level
    )

    print('Writing output to: "{0:s}"...'.format(output_file_name))
    novelty_detection.write_pmm_file(
        pickle_file_name=output_file_name,
        mean_denorm_radar_matrix_baseline=mean_radar_matrix_baseline,
        mean_denorm_radar_matrix_novel=mean_radar_matrix_novel,
        mean_denorm_radar_matrix_upconv=mean_radar_matrix_upconv,
        mean_denorm_radar_matrix_upconv_svd=mean_radar_matrix_upconv_svd,
        cnn_file_name=novelty_dict[novelty_detection.CNN_FILE_KEY],
        non_pmm_file_name=input_file_name,
        pmm_max_percentile_level=max_percentile_level)
Ejemplo n.º 4
0
def _composite_predictors(
        predictor_matrices, max_percentile_level,
        sounding_pressure_matrix_pa=None):
    """Runs PMM on predictors.

    T = number of input tensors to the model
    E = number of examples
    H_s = number of sounding heights

    :param predictor_matrices: length-T list of numpy arrays, each containing
        one type of predictor.
    :param max_percentile_level: See documentation at top of file.
    :param sounding_pressure_matrix_pa: numpy array (E x H_s) of sounding
        pressures.  This may be None, in which case the method will not bother
        trying to composite sounding pressures.
    :return: mean_predictor_matrices: length-T list of numpy arrays, where
        mean_predictor_matrices[i] is a composite over all examples in
        predictor_matrices[i].
    :return: mean_sounding_pressures_pa: numpy array (length H_s) of
        sounding pressures.  If `sounding_pressure_matrix_pa is None`, this
        is also None.
    """

    num_matrices = len(predictor_matrices)
    mean_predictor_matrices = [None] * num_matrices

    for i in range(num_matrices):
        mean_predictor_matrices[i] = pmm.run_pmm_many_variables(
            input_matrix=predictor_matrices[i],
            max_percentile_level=max_percentile_level)

    if sounding_pressure_matrix_pa is None:
        mean_sounding_pressures_pa = None
    else:
        this_input_matrix = numpy.expand_dims(
            sounding_pressure_matrix_pa, axis=-1)

        mean_sounding_pressures_pa = pmm.run_pmm_many_variables(
            input_matrix=this_input_matrix,
            max_percentile_level=max_percentile_level
        )[..., 0]

    return mean_predictor_matrices, mean_sounding_pressures_pa
    def test_run_pmm_many_variables(self):
        """Ensures correct output from run_pmm_many_variables."""

        this_mean_field_matrix = pmm.run_pmm_many_variables(
            input_matrix=INPUT_MATRIX_MANY_VARS,
            max_percentile_level=MAX_PERCENTILE_LEVEL)

        self.assertTrue(numpy.allclose(
            this_mean_field_matrix, MEAN_FIELD_MATRIX_MANY_VARS, atol=TOLERANCE
        ))
Ejemplo n.º 6
0
    def test_run_pmm_many_variables_no_threshold(self):
        """Ensures correct output from run_pmm_many_variables.

        In this case there is no thresholding.
        """

        this_mean_field_matrix, this_threshold_count_matrix = (
            pmm.run_pmm_many_variables(
                input_matrix=INPUT_MATRIX_MANY_VARS,
                max_percentile_level=MAX_PERCENTILE_LEVEL))

        self.assertTrue(
            numpy.allclose(this_mean_field_matrix,
                           MEAN_FIELD_MATRIX_MANY_VARS,
                           atol=TOLERANCE))
        self.assertTrue(this_threshold_count_matrix is None)
Ejemplo n.º 7
0
def _composite_saliency_maps(
        input_file_name, max_percentile_level, output_file_name):
    """Composites predictors and resulting saliency maps.

    :param input_file_name: Path to input file.  Will be read by
        `saliency_maps.read_file`.
    :param max_percentile_level: See documentation at top of file.
    :param output_file_name: Path to output file.  Will be written by
        `saliency_maps.write_pmm_file`.
    """

    print('Reading data from: "{0:s}"...'.format(input_file_name))
    saliency_dict = saliency_maps.read_file(input_file_name)[0]

    predictor_matrices = saliency_dict[saliency_maps.PREDICTOR_MATRICES_KEY]
    saliency_matrices = saliency_dict[saliency_maps.SALIENCY_MATRICES_KEY]
    sounding_pressure_matrix_pa = saliency_dict[
        saliency_maps.SOUNDING_PRESSURES_KEY]

    print('Compositing predictor matrices...')
    mean_predictor_matrices, mean_sounding_pressures_pa = _composite_predictors(
        predictor_matrices=predictor_matrices,
        max_percentile_level=max_percentile_level,
        sounding_pressure_matrix_pa=sounding_pressure_matrix_pa)

    print('Compositing saliency maps...')
    num_matrices = len(predictor_matrices)
    mean_saliency_matrices = [None] * num_matrices

    for i in range(num_matrices):
        mean_saliency_matrices[i] = pmm.run_pmm_many_variables(
            input_matrix=saliency_matrices[i],
            max_percentile_level=max_percentile_level)

    print('Writing output to: "{0:s}"...'.format(output_file_name))
    saliency_maps.write_pmm_file(
        pickle_file_name=output_file_name,
        mean_denorm_predictor_matrices=mean_predictor_matrices,
        mean_saliency_matrices=mean_saliency_matrices,
        model_file_name=saliency_dict[saliency_maps.MODEL_FILE_KEY],
        non_pmm_file_name=input_file_name,
        pmm_max_percentile_level=max_percentile_level,
        mean_sounding_pressures_pa=mean_sounding_pressures_pa)
Ejemplo n.º 8
0
    def test_run_pmm_many_variables_max_threshold(self):
        """Ensures correct output from run_pmm_many_variables.

        In this case there is maximum-thresholding.
        """

        this_mean_field_matrix, this_threshold_count_matrix = (
            pmm.run_pmm_many_variables(
                input_matrix=INPUT_MATRIX_MANY_VARS,
                max_percentile_level=MAX_PERCENTILE_LEVEL,
                threshold_var_index=THRESHOLD_VAR_INDEX,
                threshold_value=THRESHOLD_VALUE,
                threshold_type_string=pmm.MAXIMUM_STRING))

        self.assertTrue(
            numpy.allclose(this_mean_field_matrix,
                           MEAN_FIELD_MATRIX_MANY_VARS,
                           atol=TOLERANCE))

        self.assertTrue(
            numpy.array_equal(this_threshold_count_matrix,
                              MAX_THRESHOLD_COUNT_MATRIX))
Ejemplo n.º 9
0
def average_examples(
        example_dict,
        use_pmm,
        max_pmm_percentile_level=DEFAULT_MAX_PMM_PERCENTILE_LEVEL):
    """Averages predictor and target fields over many examples.

    H = number of heights
    P_s = number of scalar predictors
    P_v = number of vector predictors
    T_s = number of scalar targets
    T_v = number of vector targets

    :param example_dict: See doc for `example_io.read_file`.
    :param use_pmm: Boolean flag.  If True, will use probability-matched means
        for vector fields (vertical profiles).  If False, will use arithmetic
        means for vector fields.
    :param max_pmm_percentile_level: [used only if `use_pmm == True`]
        Max percentile level for probability-matched means.
    :return: mean_example_dict: Dictionary with the following keys.
    mean_example_dict['scalar_predictor_matrix']: numpy array (1 x P_s) with
        values of scalar predictors.
    mean_example_dict['scalar_predictor_names']: Same as input.
    mean_example_dict['vector_predictor_matrix']: numpy array (1 x H x P_v) with
        values of vector predictors.
    mean_example_dict['vector_predictor_names']: Same as input.
    mean_example_dict['scalar_target_matrix']: numpy array (1 x T_s) with values
        of scalar targets.
    mean_example_dict['scalar_predictor_names']: Same as input.
    mean_example_dict['vector_target_matrix']: numpy array (1 x H x T_v) with
        values of vector targets.
    mean_example_dict['vector_predictor_names']: Same as input.
    mean_example_dict['heights_m_agl']: length-H numpy array of heights (metres
        above ground level).
    """

    error_checking.assert_is_boolean(use_pmm)
    error_checking.assert_is_geq(max_pmm_percentile_level, 90.)
    error_checking.assert_is_leq(max_pmm_percentile_level, 100.)

    mean_scalar_predictor_matrix = numpy.mean(
        example_dict[SCALAR_PREDICTOR_VALS_KEY], axis=0)
    mean_scalar_predictor_matrix = numpy.expand_dims(
        mean_scalar_predictor_matrix, axis=0)

    mean_scalar_target_matrix = numpy.mean(
        example_dict[SCALAR_TARGET_VALS_KEY], axis=0)
    mean_scalar_target_matrix = numpy.expand_dims(mean_scalar_target_matrix,
                                                  axis=0)

    if use_pmm:
        mean_vector_predictor_matrix = pmm.run_pmm_many_variables(
            input_matrix=example_dict[VECTOR_PREDICTOR_VALS_KEY],
            max_percentile_level=max_pmm_percentile_level)
    else:
        mean_vector_predictor_matrix = numpy.mean(
            example_dict[VECTOR_PREDICTOR_VALS_KEY], axis=0)

    mean_vector_predictor_matrix = numpy.expand_dims(
        mean_vector_predictor_matrix, axis=0)

    if use_pmm:
        mean_vector_target_matrix = pmm.run_pmm_many_variables(
            input_matrix=example_dict[VECTOR_TARGET_VALS_KEY],
            max_percentile_level=max_pmm_percentile_level)
    else:
        mean_vector_target_matrix = numpy.mean(
            example_dict[VECTOR_TARGET_VALS_KEY], axis=0)

    mean_vector_target_matrix = numpy.expand_dims(mean_vector_target_matrix,
                                                  axis=0)

    return {
        SCALAR_PREDICTOR_NAMES_KEY: example_dict[SCALAR_PREDICTOR_NAMES_KEY],
        SCALAR_PREDICTOR_VALS_KEY: mean_scalar_predictor_matrix,
        SCALAR_TARGET_NAMES_KEY: example_dict[SCALAR_TARGET_NAMES_KEY],
        SCALAR_TARGET_VALS_KEY: mean_scalar_target_matrix,
        VECTOR_PREDICTOR_NAMES_KEY: example_dict[VECTOR_PREDICTOR_NAMES_KEY],
        VECTOR_PREDICTOR_VALS_KEY: mean_vector_predictor_matrix,
        VECTOR_TARGET_NAMES_KEY: example_dict[VECTOR_TARGET_NAMES_KEY],
        VECTOR_TARGET_VALS_KEY: mean_vector_target_matrix,
        HEIGHTS_KEY: example_dict[HEIGHTS_KEY]
    }
Ejemplo n.º 10
0
def _run(input_saliency_file_name, input_gradcam_file_name,
         input_bwo_file_name, input_novelty_file_name, max_percentile_level,
         radar_channel_idx_for_thres, threshold_value, threshold_type_string,
         output_file_name):
    """Runs probability-matched means (PMM).

    This is effectively the main method.

    :param input_saliency_file_name: See documentation at top of file.
    :param input_gradcam_file_name: Same.
    :param input_bwo_file_name: Same.
    :param input_novelty_file_name: Same.
    :param max_percentile_level: Same.
    :param radar_channel_idx_for_thres: Same.
    :param threshold_value: Same.
    :param threshold_type_string: Same.
    :param output_file_name: Same.
    """

    if input_saliency_file_name not in NONE_STRINGS:
        input_gradcam_file_name = None
        input_bwo_file_name = None
        input_novelty_file_name = None
    elif input_gradcam_file_name not in NONE_STRINGS:
        input_saliency_file_name = None
        input_bwo_file_name = None
        input_novelty_file_name = None
    elif input_bwo_file_name not in NONE_STRINGS:
        input_saliency_file_name = None
        input_gradcam_file_name = None
        input_novelty_file_name = None
    else:
        input_saliency_file_name = None
        input_gradcam_file_name = None
        input_bwo_file_name = None

    if radar_channel_idx_for_thres < 0:
        radar_channel_idx_for_thres = None
        threshold_value = None
        threshold_type_string = None

    if input_saliency_file_name is not None:
        print('Reading data from: "{0:s}"...'.format(input_saliency_file_name))

        saliency_dict = saliency_maps.read_standard_file(
            input_saliency_file_name)
        list_of_input_matrices = saliency_dict[
            saliency_maps.INPUT_MATRICES_KEY]

    elif input_gradcam_file_name is not None:
        print('Reading data from: "{0:s}"...'.format(input_gradcam_file_name))

        gradcam_dict = gradcam.read_standard_file(input_gradcam_file_name)
        list_of_input_matrices = gradcam_dict[gradcam.INPUT_MATRICES_KEY]

    elif input_bwo_file_name is not None:
        print('Reading data from: "{0:s}"...'.format(input_bwo_file_name))

        bwo_dictionary = backwards_opt.read_standard_file(input_bwo_file_name)
        list_of_input_matrices = bwo_dictionary[
            backwards_opt.INIT_FUNCTION_KEY]

    else:
        print('Reading data from: "{0:s}"...'.format(input_novelty_file_name))
        novelty_dict = novelty_detection.read_standard_file(
            input_novelty_file_name)

        list_of_input_matrices = novelty_dict[
            novelty_detection.TRIAL_INPUTS_KEY]
        novel_indices = novelty_dict[novelty_detection.NOVEL_INDICES_KEY]

        list_of_input_matrices = [
            a[novel_indices, ...] for a in list_of_input_matrices
        ]

    print('Running PMM on denormalized predictor matrices...')

    num_input_matrices = len(list_of_input_matrices)
    list_of_mean_input_matrices = [None] * num_input_matrices
    pmm_metadata_dict = None
    threshold_count_matrix = None

    for i in range(num_input_matrices):
        if i == 0:
            list_of_mean_input_matrices[i], threshold_count_matrix = (
                pmm.run_pmm_many_variables(
                    input_matrix=list_of_input_matrices[i],
                    max_percentile_level=max_percentile_level,
                    threshold_var_index=radar_channel_idx_for_thres,
                    threshold_value=threshold_value,
                    threshold_type_string=threshold_type_string))

            pmm_metadata_dict = pmm.check_input_args(
                input_matrix=list_of_input_matrices[i],
                max_percentile_level=max_percentile_level,
                threshold_var_index=radar_channel_idx_for_thres,
                threshold_value=threshold_value,
                threshold_type_string=threshold_type_string)
        else:
            list_of_mean_input_matrices[i] = pmm.run_pmm_many_variables(
                input_matrix=list_of_input_matrices[i],
                max_percentile_level=max_percentile_level)[0]

    if input_saliency_file_name is not None:
        print('Running PMM on saliency matrices...')
        list_of_saliency_matrices = saliency_dict[
            saliency_maps.SALIENCY_MATRICES_KEY]

        num_input_matrices = len(list_of_input_matrices)
        list_of_mean_saliency_matrices = [None] * num_input_matrices

        for i in range(num_input_matrices):
            list_of_mean_saliency_matrices[i] = pmm.run_pmm_many_variables(
                input_matrix=list_of_saliency_matrices[i],
                max_percentile_level=max_percentile_level)[0]

        print('Writing output to: "{0:s}"...'.format(output_file_name))
        saliency_maps.write_pmm_file(
            pickle_file_name=output_file_name,
            list_of_mean_input_matrices=list_of_mean_input_matrices,
            list_of_mean_saliency_matrices=list_of_mean_saliency_matrices,
            threshold_count_matrix=threshold_count_matrix,
            model_file_name=saliency_dict[saliency_maps.MODEL_FILE_KEY],
            standard_saliency_file_name=input_saliency_file_name,
            pmm_metadata_dict=pmm_metadata_dict)

        return

    if input_gradcam_file_name is not None:
        print('Running PMM on class-activation matrices...')

        list_of_cam_matrices = gradcam_dict[gradcam.CAM_MATRICES_KEY]
        list_of_guided_cam_matrices = gradcam_dict[
            gradcam.GUIDED_CAM_MATRICES_KEY]

        num_input_matrices = len(list_of_input_matrices)
        list_of_mean_cam_matrices = [None] * num_input_matrices
        list_of_mean_guided_cam_matrices = [None] * num_input_matrices

        for i in range(num_input_matrices):
            if list_of_cam_matrices[i] is None:
                continue

            list_of_mean_cam_matrices[i] = pmm.run_pmm_many_variables(
                input_matrix=numpy.expand_dims(list_of_cam_matrices[i],
                                               axis=-1),
                max_percentile_level=max_percentile_level)[0]

            list_of_mean_cam_matrices[i] = list_of_mean_cam_matrices[i][..., 0]

            list_of_mean_guided_cam_matrices[i] = pmm.run_pmm_many_variables(
                input_matrix=list_of_guided_cam_matrices[i],
                max_percentile_level=max_percentile_level)[0]

        print('Writing output to: "{0:s}"...'.format(output_file_name))
        gradcam.write_pmm_file(
            pickle_file_name=output_file_name,
            list_of_mean_input_matrices=list_of_mean_input_matrices,
            list_of_mean_cam_matrices=list_of_mean_cam_matrices,
            list_of_mean_guided_cam_matrices=list_of_mean_guided_cam_matrices,
            model_file_name=gradcam_dict[gradcam.MODEL_FILE_KEY],
            standard_gradcam_file_name=input_gradcam_file_name,
            pmm_metadata_dict=pmm_metadata_dict)

        return

    if input_bwo_file_name is not None:
        print('Running PMM on backwards-optimization output...')
        list_of_optimized_matrices = bwo_dictionary[
            backwards_opt.OPTIMIZED_MATRICES_KEY]

        num_input_matrices = len(list_of_input_matrices)
        list_of_mean_optimized_matrices = [None] * num_input_matrices

        for i in range(num_input_matrices):
            list_of_mean_optimized_matrices[i] = pmm.run_pmm_many_variables(
                input_matrix=list_of_optimized_matrices[i],
                max_percentile_level=max_percentile_level)[0]

        mean_initial_activation = numpy.mean(
            bwo_dictionary[backwards_opt.INITIAL_ACTIVATIONS_KEY])
        mean_final_activation = numpy.mean(
            bwo_dictionary[backwards_opt.FINAL_ACTIVATIONS_KEY])

        print('Writing output to: "{0:s}"...'.format(output_file_name))
        backwards_opt.write_pmm_file(
            pickle_file_name=output_file_name,
            list_of_mean_input_matrices=list_of_mean_input_matrices,
            list_of_mean_optimized_matrices=list_of_mean_optimized_matrices,
            mean_initial_activation=mean_initial_activation,
            mean_final_activation=mean_final_activation,
            threshold_count_matrix=threshold_count_matrix,
            model_file_name=bwo_dictionary[backwards_opt.MODEL_FILE_KEY],
            standard_bwo_file_name=input_bwo_file_name,
            pmm_metadata_dict=pmm_metadata_dict)

        return

    print('Running PMM on novelty-detection output...')

    mean_novel_image_matrix_upconv = pmm.run_pmm_many_variables(
        input_matrix=novelty_dict[novelty_detection.NOVEL_IMAGES_UPCONV_KEY],
        max_percentile_level=max_percentile_level)[0]

    mean_novel_image_matrix_upconv_svd = pmm.run_pmm_many_variables(
        input_matrix=novelty_dict[
            novelty_detection.NOVEL_IMAGES_UPCONV_SVD_KEY],
        max_percentile_level=max_percentile_level)[0]

    print('Writing output to: "{0:s}"...'.format(output_file_name))
    novelty_detection.write_pmm_file(
        pickle_file_name=output_file_name,
        mean_novel_image_matrix=list_of_mean_input_matrices[0],
        mean_novel_image_matrix_upconv=mean_novel_image_matrix_upconv,
        mean_novel_image_matrix_upconv_svd=mean_novel_image_matrix_upconv_svd,
        threshold_count_matrix=threshold_count_matrix,
        standard_novelty_file_name=input_novelty_file_name,
        pmm_metadata_dict=pmm_metadata_dict)
Ejemplo n.º 11
0
def run_monte_carlo_test(list_of_baseline_matrices, list_of_trial_matrices,
                         max_pmm_percentile_level, num_iterations,
                         confidence_level):
    """Runs Monte Carlo significance test.

    E = number of examples in each set
    T = number of matrices in each set

    :param list_of_baseline_matrices: length-T list of numpy arrays, where the
        first axis of each numpy array has length E.
    :param list_of_trial_matrices: See above.
    :param max_pmm_percentile_level: Max percentile for probability-matched
        means (PMM).  For more details, see documentation for
        `pmm.run_pmm_many_variables`.
    :param num_iterations: Number of Monte Carlo iterations.
    :param confidence_level: Confidence level for statistical significance.
    :return: monte_carlo_dict: Dictionary with the following keys.
    monte_carlo_dict['list_of_trial_pmm_matrices']: length-T list of numpy
        arrays, where list_of_trial_pmm_matrices[i] is the PMM composite over
        list_of_trial_matrices[i].  Thus, list_of_trial_pmm_matrices[i] has the
        same dimensions as list_of_trial_matrices[i], except without the first
        axis.
    monte_carlo_dict['list_of_min_matrices']: length-T list of numpy arrays,
        where list_of_min_matrices[i] has the same dimensions as
        list_of_baseline_matrices[i], except without the first axis.  Each
        matrix defines MIN thresholds for stat significance.  In other words, if
        list_of_trial_pmm_matrices[i][j] < list_of_min_matrices[i][j],
        list_of_trial_pmm_matrices[i][j] is significantly different from the
        baseline.
    monte_carlo_dict['list_of_max_matrices']: Same but for MAX thresholds.
    monte_carlo_dict['max_pmm_percentile_level']: Same as input.
    monte_carlo_dict['num_iterations']: Same as input.
    monte_carlo_dict['confidence_level']: Same as input.
    """

    num_examples_per_set = _check_input_args(
        list_of_baseline_matrices=list_of_baseline_matrices,
        list_of_trial_matrices=list_of_trial_matrices,
        num_iterations=num_iterations,
        confidence_level=confidence_level)

    example_indices = numpy.linspace(0,
                                     2 * num_examples_per_set - 1,
                                     num=2 * num_examples_per_set,
                                     dtype=int)

    num_matrices = len(list_of_trial_matrices)
    list_of_shuffled_pmm_matrices = [None] * num_matrices
    print(SEPARATOR_STRING)

    for i in range(num_iterations):
        if numpy.mod(i, 25) == 0:
            print('Have run {0:d} of {1:d} Monte Carlo iterations...'.format(
                i, num_iterations))

        these_indices = numpy.random.choice(example_indices,
                                            size=num_examples_per_set,
                                            replace=False)

        these_baseline_indices = these_indices[
            these_indices < num_examples_per_set]

        these_trial_indices = (
            these_indices[these_indices >= num_examples_per_set] -
            num_examples_per_set)

        for j in range(num_matrices):
            if list_of_trial_matrices[j] is None:
                continue

            this_shuffled_matrix = numpy.concatenate(
                (list_of_baseline_matrices[j][these_baseline_indices, ...],
                 list_of_trial_matrices[j][these_trial_indices, ...]))

            this_shuffled_pmm_matrix = pmm.run_pmm_many_variables(
                input_matrix=this_shuffled_matrix,
                max_percentile_level=max_pmm_percentile_level)[0]

            this_shuffled_pmm_matrix = numpy.expand_dims(
                this_shuffled_pmm_matrix, axis=0)

            if list_of_shuffled_pmm_matrices[j] is None:
                list_of_shuffled_pmm_matrices[
                    j] = this_shuffled_pmm_matrix + 0.
            else:
                list_of_shuffled_pmm_matrices[j] = numpy.concatenate(
                    (list_of_shuffled_pmm_matrices[j],
                     this_shuffled_pmm_matrix))

    print('Have run all {0:d} Monte Carlo iterations!'.format(num_iterations))
    print(SEPARATOR_STRING)

    list_of_min_matrices = [None] * num_matrices
    list_of_max_matrices = [None] * num_matrices
    list_of_trial_pmm_matrices = [None] * num_matrices

    for j in range(num_matrices):
        if list_of_trial_matrices[j] is None:
            continue

        list_of_min_matrices[j] = numpy.percentile(
            a=list_of_shuffled_pmm_matrices[j],
            q=50. * (1 - confidence_level),
            axis=0)

        list_of_max_matrices[j] = numpy.percentile(
            a=list_of_shuffled_pmm_matrices[j],
            q=50. * (1 + confidence_level),
            axis=0)

        list_of_trial_pmm_matrices[j] = pmm.run_pmm_many_variables(
            input_matrix=list_of_trial_matrices[j],
            max_percentile_level=max_pmm_percentile_level)[0]

        this_num_low_significant = numpy.sum(
            list_of_trial_pmm_matrices[j] < list_of_min_matrices[j])
        this_num_high_significant = numpy.sum(
            list_of_trial_pmm_matrices[j] > list_of_max_matrices[j])

        print((
            'Number of elements in {0:d}th matrix = {1:d} ... num significant '
            'on low end = {2:d} ... num significant on high end = {3:d}'
        ).format(j + 1, list_of_trial_pmm_matrices[j].size,
                 this_num_low_significant, this_num_high_significant))

    return {
        TRIAL_PMM_MATRICES_KEY: list_of_trial_pmm_matrices,
        MIN_MATRICES_KEY: list_of_min_matrices,
        MAX_MATRICES_KEY: list_of_max_matrices,
        MAX_PMM_PERCENTILE_KEY: max_pmm_percentile_level,
        NUM_ITERATIONS_KEY: num_iterations,
        CONFIDENCE_LEVEL_KEY: confidence_level
    }
def _run(interpretation_type_string, baseline_file_name, trial_file_name,
         max_pmm_percentile_level, num_iterations, confidence_level,
         output_file_name):
    """Runs Monte Carlo significance test for interpretation output.

    This is effectively the main method.

    :param interpretation_type_string: See documentation at top of file.
    :param baseline_file_name: Same.
    :param trial_file_name: Same.
    :param max_pmm_percentile_level: Same.
    :param num_iterations: Same.
    :param confidence_level: Same.
    :param output_file_name: Same.
    :raises: ValueError: if
        `interpretation_type_string not in VALID_INTERPRETATION_TYPE_STRINGS`.
    """

    if interpretation_type_string not in VALID_INTERPRETATION_TYPE_STRINGS:
        error_string = (
            '\n{0:s}\nValid interpretation types (listed above) do not include '
            '"{1:s}".'
        ).format(
            str(VALID_INTERPRETATION_TYPE_STRINGS), interpretation_type_string
        )

        raise ValueError(error_string)

    print('Reading baseline set from: "{0:s}"...'.format(baseline_file_name))

    if interpretation_type_string == SALIENCY_STRING:
        baseline_dict = saliency_maps.read_standard_file(baseline_file_name)
    elif interpretation_type_string == GRADCAM_STRING:
        baseline_dict = gradcam.read_standard_file(baseline_file_name)
    else:
        baseline_dict = backwards_opt.read_standard_file(baseline_file_name)

    print('Reading trial set from: "{0:s}"...'.format(trial_file_name))
    monte_carlo_dict = None
    cam_monte_carlo_dict = None
    guided_cam_monte_carlo_dict = None

    if interpretation_type_string == SALIENCY_STRING:
        trial_dict = saliency_maps.read_standard_file(trial_file_name)

        monte_carlo_dict = monte_carlo.run_monte_carlo_test(
            list_of_baseline_matrices=baseline_dict[
                saliency_maps.SALIENCY_MATRICES_KEY],
            list_of_trial_matrices=trial_dict[
                saliency_maps.SALIENCY_MATRICES_KEY],
            max_pmm_percentile_level=max_pmm_percentile_level,
            num_iterations=num_iterations, confidence_level=confidence_level)

        monte_carlo_dict[monte_carlo.BASELINE_FILE_KEY] = baseline_file_name
        list_of_input_matrices = trial_dict[saliency_maps.INPUT_MATRICES_KEY]

    elif interpretation_type_string == GRADCAM_STRING:
        trial_dict = gradcam.read_standard_file(trial_file_name)

        cam_monte_carlo_dict = monte_carlo.run_monte_carlo_test(
            list_of_baseline_matrices=baseline_dict[gradcam.CAM_MATRICES_KEY],
            list_of_trial_matrices=trial_dict[gradcam.CAM_MATRICES_KEY],
            max_pmm_percentile_level=max_pmm_percentile_level,
            num_iterations=num_iterations, confidence_level=confidence_level)

        guided_cam_monte_carlo_dict = monte_carlo.run_monte_carlo_test(
            list_of_baseline_matrices=baseline_dict[
                gradcam.GUIDED_CAM_MATRICES_KEY],
            list_of_trial_matrices=trial_dict[
                gradcam.GUIDED_CAM_MATRICES_KEY],
            max_pmm_percentile_level=max_pmm_percentile_level,
            num_iterations=num_iterations, confidence_level=confidence_level)

        cam_monte_carlo_dict[
            monte_carlo.BASELINE_FILE_KEY] = baseline_file_name
        guided_cam_monte_carlo_dict[
            monte_carlo.BASELINE_FILE_KEY] = baseline_file_name
        list_of_input_matrices = trial_dict[gradcam.INPUT_MATRICES_KEY]

    else:
        trial_dict = backwards_opt.read_standard_file(trial_file_name)

        monte_carlo_dict = monte_carlo.run_monte_carlo_test(
            list_of_baseline_matrices=baseline_dict[
                backwards_opt.OPTIMIZED_MATRICES_KEY],
            list_of_trial_matrices=trial_dict[
                backwards_opt.OPTIMIZED_MATRICES_KEY],
            max_pmm_percentile_level=max_pmm_percentile_level,
            num_iterations=num_iterations, confidence_level=confidence_level)

        monte_carlo_dict[monte_carlo.BASELINE_FILE_KEY] = baseline_file_name
        list_of_input_matrices = trial_dict[backwards_opt.INIT_FUNCTION_KEY]

    print(SEPARATOR_STRING)

    num_matrices = len(list_of_input_matrices)
    list_of_mean_input_matrices = [None] * num_matrices

    for i in range(num_matrices):
        list_of_mean_input_matrices[i] = pmm.run_pmm_many_variables(
            input_matrix=list_of_input_matrices[i],
            max_percentile_level=max_pmm_percentile_level
        )[0]

    pmm_metadata_dict = pmm.check_input_args(
        input_matrix=list_of_input_matrices[0],
        max_percentile_level=max_pmm_percentile_level,
        threshold_var_index=None, threshold_value=None,
        threshold_type_string=None)

    print('Writing results to: "{0:s}"...'.format(output_file_name))

    if interpretation_type_string == SALIENCY_STRING:
        saliency_maps.write_pmm_file(
            pickle_file_name=output_file_name,
            list_of_mean_input_matrices=list_of_mean_input_matrices,
            list_of_mean_saliency_matrices=copy.deepcopy(
                monte_carlo_dict[monte_carlo.TRIAL_PMM_MATRICES_KEY]
            ),
            threshold_count_matrix=None,
            model_file_name=trial_dict[saliency_maps.MODEL_FILE_KEY],
            standard_saliency_file_name=trial_file_name,
            pmm_metadata_dict=pmm_metadata_dict,
            monte_carlo_dict=monte_carlo_dict)

    elif interpretation_type_string == GRADCAM_STRING:
        gradcam.write_pmm_file(
            pickle_file_name=output_file_name,
            list_of_mean_input_matrices=list_of_mean_input_matrices,
            list_of_mean_cam_matrices=copy.deepcopy(
                cam_monte_carlo_dict[monte_carlo.TRIAL_PMM_MATRICES_KEY]
            ),
            list_of_mean_guided_cam_matrices=copy.deepcopy(
                guided_cam_monte_carlo_dict[monte_carlo.TRIAL_PMM_MATRICES_KEY]
            ),
            model_file_name=trial_dict[gradcam.MODEL_FILE_KEY],
            standard_gradcam_file_name=trial_file_name,
            pmm_metadata_dict=pmm_metadata_dict,
            cam_monte_carlo_dict=cam_monte_carlo_dict,
            guided_cam_monte_carlo_dict=guided_cam_monte_carlo_dict)

    else:
        backwards_opt.write_pmm_file(
            pickle_file_name=output_file_name,
            list_of_mean_input_matrices=list_of_mean_input_matrices,
            list_of_mean_optimized_matrices=copy.deepcopy(
                monte_carlo_dict[monte_carlo.TRIAL_PMM_MATRICES_KEY]
            ),
            mean_initial_activation=numpy.mean(
                trial_dict[backwards_opt.INITIAL_ACTIVATIONS_KEY]
            ),
            mean_final_activation=numpy.mean(
                trial_dict[backwards_opt.FINAL_ACTIVATIONS_KEY]
            ),
            threshold_count_matrix=None,
            model_file_name=trial_dict[backwards_opt.MODEL_FILE_KEY],
            standard_bwo_file_name=trial_file_name,
            pmm_metadata_dict=pmm_metadata_dict,
            monte_carlo_dict=monte_carlo_dict)
Ejemplo n.º 13
0
def average_predictions(
        prediction_dict,
        use_pmm,
        max_pmm_percentile_level=DEFAULT_MAX_PMM_PERCENTILE_LEVEL):
    """Averages predicted and target values over many examples.

    H = number of heights
    T_s = number of scalar targets
    T_v = number of vector targets

    :param prediction_dict: See doc for `write_file`.
    :param use_pmm: Boolean flag.  If True, will use probability-matched means
        for vector fields (vertical profiles).  If False, will use arithmetic
        means for vector fields.
    :param max_pmm_percentile_level: [used only if `use_pmm == True`]
        Max percentile level for probability-matched means.
    :return: mean_prediction_dict: Dictionary with the following keys.
    mean_prediction_dict['scalar_target_matrix']: numpy array (1 x T_s) with
        mean target (actual) values for scalar variables.
    mean_prediction_dict['scalar_prediction_matrix']: Same but with predicted
        values.
    mean_prediction_dict['vector_target_matrix']: numpy array (1 x H x T_v) with
        mean target (actual) values for vector variables.
    mean_prediction_dict['vector_prediction_matrix']: Same but with predicted
        values.
    mean_prediction_dict['heights_m_agl']: length-H numpy array of heights
        (metres above ground level).
    mean_prediction_dict['model_file_name']: Path to file with trained model
        (readable by `neural_net.read_model`).
    mean_prediction_dict['isotonic_model_file_name']: Path to file with trained
        isotonic-regression models (readable by `isotonic_regression.read_file`)
        used to make predictions.  If isotonic regression was not used, this is
        None.
    """

    error_checking.assert_is_boolean(use_pmm)
    error_checking.assert_is_geq(max_pmm_percentile_level, 90.)
    error_checking.assert_is_leq(max_pmm_percentile_level, 100.)

    mean_scalar_target_matrix = numpy.mean(prediction_dict[SCALAR_TARGETS_KEY],
                                           axis=0)
    mean_scalar_target_matrix = numpy.expand_dims(mean_scalar_target_matrix,
                                                  axis=0)

    mean_scalar_prediction_matrix = numpy.mean(
        prediction_dict[SCALAR_PREDICTIONS_KEY], axis=0)
    mean_scalar_prediction_matrix = numpy.expand_dims(
        mean_scalar_prediction_matrix, axis=0)

    if use_pmm:
        mean_vector_target_matrix = pmm.run_pmm_many_variables(
            input_matrix=prediction_dict[VECTOR_TARGETS_KEY],
            max_percentile_level=max_pmm_percentile_level)
    else:
        mean_vector_target_matrix = numpy.mean(
            prediction_dict[VECTOR_TARGETS_KEY], axis=0)

    mean_vector_target_matrix = numpy.expand_dims(mean_vector_target_matrix,
                                                  axis=0)

    if use_pmm:
        mean_vector_prediction_matrix = pmm.run_pmm_many_variables(
            input_matrix=prediction_dict[VECTOR_PREDICTIONS_KEY],
            max_percentile_level=max_pmm_percentile_level)
    else:
        mean_vector_prediction_matrix = numpy.mean(
            prediction_dict[VECTOR_PREDICTIONS_KEY], axis=0)

    mean_vector_prediction_matrix = numpy.expand_dims(
        mean_vector_prediction_matrix, axis=0)

    return {
        SCALAR_TARGETS_KEY: mean_scalar_target_matrix,
        SCALAR_PREDICTIONS_KEY: mean_scalar_prediction_matrix,
        VECTOR_TARGETS_KEY: mean_vector_target_matrix,
        VECTOR_PREDICTIONS_KEY: mean_vector_prediction_matrix,
        HEIGHTS_KEY: prediction_dict[HEIGHTS_KEY],
        MODEL_FILE_KEY: prediction_dict[MODEL_FILE_KEY],
        ISOTONIC_MODEL_FILE_KEY: prediction_dict[ISOTONIC_MODEL_FILE_KEY]
    }
Ejemplo n.º 14
0
def run_monte_carlo_test(list_of_baseline_matrices, list_of_trial_matrices,
                         max_pmm_percentile_level, num_iterations):
    """Runs Monte Carlo significance test.

    E = number of examples in each set
    T = number of matrices in each set

    :param list_of_baseline_matrices: length-T list of numpy arrays, where the
        first axis of each numpy array has length E.
    :param list_of_trial_matrices: See above.
    :param max_pmm_percentile_level: Max percentile for probability-matched
        means (PMM).  For more details, see documentation for
        `pmm.run_pmm_many_variables`.  If you want to use pixelwise means, make
        this None.
    :param num_iterations: Number of Monte Carlo iterations.
    :return: monte_carlo_dict: Dictionary with the following keys.
    monte_carlo_dict['list_of_trial_pmm_matrices']: length-T list of numpy
        arrays, where list_of_trial_pmm_matrices[i] is the PMM composite over
        list_of_trial_matrices[i].  Thus, list_of_trial_pmm_matrices[i] has the
        same dimensions as list_of_trial_matrices[i], except without the first
        axis.
    monte_carlo_dict['list_of_p_value_matrices']: Same as above but containing
        p-values.
    monte_carlo_dict['list_of_percentile_matrices']: Same as above but
        containing percentiles.
    monte_carlo_dict['max_pmm_percentile_level']: Same as input.
    monte_carlo_dict['num_iterations']: Same as input.
    """

    num_examples_per_set = _check_input_args(
        list_of_baseline_matrices=list_of_baseline_matrices,
        list_of_trial_matrices=list_of_trial_matrices,
        num_iterations=num_iterations)

    example_indices = numpy.linspace(0,
                                     2 * num_examples_per_set - 1,
                                     num=2 * num_examples_per_set,
                                     dtype=int)

    num_matrices = len(list_of_trial_matrices)
    list_of_shuffled_pmm_matrices = [None] * num_matrices
    print(SEPARATOR_STRING)

    for i in range(num_iterations):
        if numpy.mod(i, 25) == 0:
            print('Have run {0:d} of {1:d} Monte Carlo iterations...'.format(
                i, num_iterations))

        these_indices = numpy.random.choice(example_indices,
                                            size=num_examples_per_set,
                                            replace=False)

        these_baseline_indices = these_indices[
            these_indices < num_examples_per_set]

        these_trial_indices = (
            these_indices[these_indices >= num_examples_per_set] -
            num_examples_per_set)

        for j in range(num_matrices):
            if list_of_trial_matrices[j] is None:
                continue

            this_shuffled_matrix = numpy.concatenate(
                (list_of_baseline_matrices[j][these_baseline_indices, ...],
                 list_of_trial_matrices[j][these_trial_indices, ...]))

            if max_pmm_percentile_level is None:
                this_shuffled_pmm_matrix = numpy.mean(this_shuffled_matrix,
                                                      axis=0)
            else:
                this_shuffled_pmm_matrix = pmm.run_pmm_many_variables(
                    input_matrix=this_shuffled_matrix,
                    max_percentile_level=max_pmm_percentile_level)

            if list_of_shuffled_pmm_matrices[j] is None:
                dimensions = numpy.array(
                    (num_iterations, ) + this_shuffled_pmm_matrix.shape,
                    dtype=int)
                list_of_shuffled_pmm_matrices[j] = numpy.full(
                    dimensions, numpy.nan)

            list_of_shuffled_pmm_matrices[j][i, ...] = this_shuffled_pmm_matrix

    print('Have run all {0:d} Monte Carlo iterations!'.format(num_iterations))
    print(SEPARATOR_STRING)

    list_of_trial_pmm_matrices = [None] * num_matrices
    list_of_percentile_matrices = [None] * num_matrices
    list_of_p_value_matrices = [None] * num_matrices

    for j in range(num_matrices):
        if list_of_trial_matrices[j] is None:
            continue

        if max_pmm_percentile_level is None:
            list_of_trial_pmm_matrices[j] = numpy.mean(
                list_of_trial_matrices[j], axis=0)
        else:
            list_of_trial_pmm_matrices[j] = pmm.run_pmm_many_variables(
                input_matrix=list_of_trial_matrices[j],
                max_percentile_level=max_pmm_percentile_level)

        trial_pmm_values = numpy.ravel(list_of_trial_pmm_matrices[j])
        percentiles = numpy.full(trial_pmm_values.shape, numpy.nan)

        for linear_index in range(len(trial_pmm_values)):
            if numpy.mod(linear_index, 25) == 0:
                print('Have computed {0:d} of {1:d} p-values...'.format(
                    linear_index, len(trial_pmm_values)))

            these_indices = numpy.unravel_index(
                linear_index, list_of_trial_pmm_matrices[j].shape)
            shuffled_pmm_matrix = list_of_shuffled_pmm_matrices[j] + 0.

            for this_index in these_indices[::-1]:
                shuffled_pmm_matrix = shuffled_pmm_matrix[..., this_index]

            percentiles[linear_index] = 0.01 * percentileofscore(
                a=numpy.ravel(shuffled_pmm_matrix),
                score=trial_pmm_values[linear_index],
                kind='mean')

        list_of_percentile_matrices[j] = numpy.reshape(
            percentiles, list_of_trial_pmm_matrices[j].shape)

        p_values = percentiles + 0.
        bottom_indices = numpy.where(p_values < 0.5)[0]
        top_indices = numpy.where(p_values >= 0.5)[0]
        p_values[bottom_indices] = 2 * p_values[bottom_indices]
        p_values[top_indices] = 2 * (1. - p_values[top_indices])

        print('Fraction of p-values <= 0.05: {0:.4f}'.format(
            numpy.mean(p_values <= 0.05)))

        list_of_p_value_matrices[j] = numpy.reshape(
            p_values, list_of_trial_pmm_matrices[j].shape)

    return {
        TRIAL_PMM_MATRICES_KEY: list_of_trial_pmm_matrices,
        PERCENTILE_MATRICES_KEY: list_of_percentile_matrices,
        P_VALUE_MATRICES_KEY: list_of_p_value_matrices,
        MAX_PMM_PERCENTILE_KEY: max_pmm_percentile_level,
        NUM_ITERATIONS_KEY: num_iterations
    }
Ejemplo n.º 15
0
def _average_bwo_results(input_file_name, use_pmm, max_pmm_percentile_level,
                         output_file_name):
    """Averages results of backwards optimization.

    :param input_file_name: See documentation at top of file.
    :param use_pmm: Same.
    :param max_pmm_percentile_level: Same.
    :param output_file_name: Same.
    """

    print('Reading backwards-optimization results from: "{0:s}"...'.format(
        input_file_name))
    bwo_dict = bwo.read_file(input_file_name)

    init_vector_predictor_matrix = bwo_dict[bwo.INIT_VECTOR_PREDICTORS_KEY]
    final_vector_predictor_matrix = bwo_dict[bwo.FINAL_VECTOR_PREDICTORS_KEY]
    init_scalar_predictor_matrix = bwo_dict[bwo.INIT_SCALAR_PREDICTORS_KEY]
    final_scalar_predictor_matrix = bwo_dict[bwo.FINAL_SCALAR_PREDICTORS_KEY]

    if init_vector_predictor_matrix.size == 0:
        init_vector_predictor_matrix = init_vector_predictor_matrix[0, ...]
        final_vector_predictor_matrix = final_vector_predictor_matrix[0, ...]
    elif use_pmm:
        init_vector_predictor_matrix = pmm.run_pmm_many_variables(
            input_matrix=init_vector_predictor_matrix,
            max_percentile_level=max_pmm_percentile_level)
        final_vector_predictor_matrix = pmm.run_pmm_many_variables(
            input_matrix=final_vector_predictor_matrix,
            max_percentile_level=max_pmm_percentile_level)
    else:
        init_vector_predictor_matrix = numpy.mean(init_vector_predictor_matrix,
                                                  axis=0)
        final_vector_predictor_matrix = numpy.mean(
            final_vector_predictor_matrix, axis=0)

    if init_scalar_predictor_matrix.size == 0:
        init_scalar_predictor_matrix = init_scalar_predictor_matrix[0, ...]
        final_scalar_predictor_matrix = final_scalar_predictor_matrix[0, ...]
    else:
        init_scalar_predictor_matrix = numpy.mean(init_scalar_predictor_matrix,
                                                  axis=0)
        final_scalar_predictor_matrix = numpy.mean(
            final_scalar_predictor_matrix, axis=0)

    init_vector_predictor_matrix = numpy.expand_dims(
        init_vector_predictor_matrix, axis=0)
    final_vector_predictor_matrix = numpy.expand_dims(
        final_vector_predictor_matrix, axis=0)
    init_scalar_predictor_matrix = numpy.expand_dims(
        init_scalar_predictor_matrix, axis=0)
    final_scalar_predictor_matrix = numpy.expand_dims(
        final_scalar_predictor_matrix, axis=0)

    if use_pmm:
        example_id_strings = [bwo.DUMMY_EXAMPLE_ID_PMM]
    else:
        example_id_strings = [bwo.DUMMY_EXAMPLE_ID_AVERAGE]

    initial_activations = numpy.array(
        [numpy.mean(bwo_dict[bwo.INITIAL_ACTIVATIONS_KEY])])
    final_activations = numpy.array(
        [numpy.mean(bwo_dict[bwo.FINAL_ACTIVATIONS_KEY])])

    print(('Writing average backwards-optimization results to: "{0:s}"...'
           ).format(output_file_name))

    bwo.write_file(netcdf_file_name=output_file_name,
                   init_scalar_predictor_matrix=init_scalar_predictor_matrix,
                   final_scalar_predictor_matrix=final_scalar_predictor_matrix,
                   init_vector_predictor_matrix=init_vector_predictor_matrix,
                   final_vector_predictor_matrix=final_vector_predictor_matrix,
                   initial_activations=initial_activations,
                   final_activations=final_activations,
                   example_id_strings=example_id_strings,
                   model_file_name=bwo_dict[bwo.MODEL_FILE_KEY],
                   layer_name=bwo_dict[bwo.LAYER_NAME_KEY],
                   neuron_indices=bwo_dict[bwo.NEURON_INDICES_KEY],
                   ideal_activation=bwo_dict[bwo.IDEAL_ACTIVATION_KEY],
                   num_iterations=bwo_dict[bwo.NUM_ITERATIONS_KEY],
                   learning_rate=bwo_dict[bwo.LEARNING_RATE_KEY],
                   l2_weight=bwo_dict[bwo.L2_WEIGHT_KEY])
Ejemplo n.º 16
0
def _average_saliency_maps_all_targets(input_file_name, use_pmm,
                                       max_pmm_percentile_level,
                                       output_file_name):
    """Averages saliency maps for each target variable.

    :param input_file_name: See documentation at top of file.
    :param use_pmm: Same.
    :param max_pmm_percentile_level: Same.
    :param output_file_name: Same.
    """

    print('Reading saliency maps from: "{0:s}"...'.format(input_file_name))
    saliency_dict = saliency.read_all_targets_file(input_file_name)

    saliency_matrix_vector_p_scalar_t = (
        saliency_dict[saliency.SALIENCY_VECTOR_P_SCALAR_T_KEY])
    num_scalar_targets = saliency_matrix_vector_p_scalar_t.shape[-1]

    if saliency_matrix_vector_p_scalar_t.size != 0:
        for k in range(num_scalar_targets):
            if use_pmm:
                saliency_matrix_vector_p_scalar_t[0, ..., k] = (
                    pmm.run_pmm_many_variables(
                        input_matrix=saliency_matrix_vector_p_scalar_t[..., k],
                        max_percentile_level=max_pmm_percentile_level))
            else:
                saliency_matrix_vector_p_scalar_t[0, ..., k] = numpy.mean(
                    saliency_matrix_vector_p_scalar_t[..., k], axis=0)

    saliency_matrix_scalar_p_scalar_t = (
        saliency_dict[saliency.SALIENCY_SCALAR_P_SCALAR_T_KEY])

    if saliency_matrix_scalar_p_scalar_t.size != 0:
        for k in range(num_scalar_targets):
            if use_pmm and len(saliency_matrix_scalar_p_scalar_t.shape) == 4:
                saliency_matrix_scalar_p_scalar_t[0, ..., k] = (
                    pmm.run_pmm_many_variables(
                        input_matrix=saliency_matrix_scalar_p_scalar_t[..., k],
                        max_percentile_level=max_pmm_percentile_level))
            else:
                saliency_matrix_scalar_p_scalar_t[0, ..., k] = numpy.mean(
                    saliency_matrix_scalar_p_scalar_t[..., k], axis=0)

    saliency_matrix_vector_p_vector_t = (
        saliency_dict[saliency.SALIENCY_VECTOR_P_VECTOR_T_KEY])
    num_heights = saliency_matrix_vector_p_vector_t.shape[-2]
    num_vector_targets = saliency_matrix_vector_p_vector_t.shape[-1]

    if saliency_matrix_vector_p_vector_t.size != 0:
        for j in range(num_heights):
            for k in range(num_vector_targets):
                if use_pmm:
                    saliency_matrix_vector_p_vector_t[0, ..., j, k] = (
                        pmm.run_pmm_many_variables(
                            input_matrix=saliency_matrix_vector_p_vector_t[...,
                                                                           j,
                                                                           k],
                            max_percentile_level=max_pmm_percentile_level))
                else:
                    saliency_matrix_vector_p_vector_t[0, ..., j, k] = (
                        numpy.mean(saliency_matrix_vector_p_vector_t[..., j,
                                                                     k],
                                   axis=0))

    saliency_matrix_scalar_p_vector_t = (
        saliency_dict[saliency.SALIENCY_SCALAR_P_VECTOR_T_KEY])

    if saliency_matrix_scalar_p_vector_t.size != 0:
        for j in range(num_heights):
            for k in range(num_vector_targets):
                if (use_pmm
                        and len(saliency_matrix_scalar_p_vector_t.shape) == 5):
                    saliency_matrix_scalar_p_vector_t[0, ..., j, k] = (
                        pmm.run_pmm_many_variables(
                            input_matrix=saliency_matrix_scalar_p_vector_t[...,
                                                                           j,
                                                                           k],
                            max_percentile_level=max_pmm_percentile_level))
                else:
                    saliency_matrix_scalar_p_vector_t[0, ..., j, k] = (
                        numpy.mean(saliency_matrix_scalar_p_vector_t[..., j,
                                                                     k],
                                   axis=0))

    saliency_matrix_vector_p_scalar_t = (
        saliency_matrix_vector_p_scalar_t[[0], ...])
    saliency_matrix_scalar_p_scalar_t = (
        saliency_matrix_scalar_p_scalar_t[[0], ...])
    saliency_matrix_vector_p_vector_t = (
        saliency_matrix_vector_p_vector_t[[0], ...])
    saliency_matrix_scalar_p_vector_t = (
        saliency_matrix_scalar_p_vector_t[[0], ...])

    if use_pmm:
        example_id_strings = [saliency.DUMMY_EXAMPLE_ID_PMM]
    else:
        example_id_strings = [saliency.DUMMY_EXAMPLE_ID_AVERAGE]

    print('Writing average saliency maps to: "{0:s}"...'.format(
        output_file_name))

    saliency.write_all_targets_file(
        netcdf_file_name=output_file_name,
        saliency_matrix_scalar_p_scalar_t=saliency_matrix_scalar_p_scalar_t,
        saliency_matrix_vector_p_scalar_t=saliency_matrix_vector_p_scalar_t,
        saliency_matrix_scalar_p_vector_t=saliency_matrix_scalar_p_vector_t,
        saliency_matrix_vector_p_vector_t=saliency_matrix_vector_p_vector_t,
        example_id_strings=example_id_strings,
        model_file_name=saliency_dict[saliency.MODEL_FILE_KEY],
        ideal_activation=saliency_dict[saliency.IDEAL_ACTIVATION_KEY])