Esempio n. 1
0
    def test_mnist_unet_with_shape_valid(self):
        num_subsamples = 100
        (x_train,
         y_train), (x_test,
                    y_test) = TestUtil.get_mnist(flattened=False,
                                                 num_subsamples=num_subsamples)

        explained_model_builder = MLPModelBuilder(num_layers=2,
                                                  num_units=64,
                                                  activation="relu",
                                                  p_dropout=0.2,
                                                  verbose=0,
                                                  batch_size=256,
                                                  learning_rate=0.001,
                                                  num_epochs=2,
                                                  early_stopping_patience=128)
        input_shape = x_train.shape[1:]
        input_layer = Input(shape=input_shape)
        last_layer = Flatten()(input_layer)
        last_layer = explained_model_builder.build(last_layer)
        last_layer = Dense(y_train.shape[-1], activation="softmax")(last_layer)
        explained_model = Model(input_layer, last_layer)
        explained_model.compile(loss="categorical_crossentropy",
                                optimizer="adam")
        explained_model.fit(x_train, y_train)
        masking_operation = ZeroMasking()
        loss = categorical_crossentropy

        downsample_factors = [(2, 2), (4, 4), (4, 7), (7, 4), (7, 7)]
        with_bns = [
            True if i % 2 == 0 else False
            for i in range(len(downsample_factors))
        ]
        for downsample_factor, with_bn in zip(downsample_factors, with_bns):
            model_builder = UNetModelBuilder(downsample_factor,
                                             num_layers=2,
                                             num_units=64,
                                             activation="relu",
                                             p_dropout=0.2,
                                             verbose=0,
                                             batch_size=256,
                                             learning_rate=0.001,
                                             num_epochs=2,
                                             early_stopping_patience=128,
                                             with_bn=with_bn)

            explainer = CXPlain(explained_model,
                                model_builder,
                                masking_operation,
                                loss,
                                downsample_factors=downsample_factor)

            explainer.fit(x_train, y_train)
            eval_score = explainer.score(x_test, y_test)
            train_score = explainer.get_last_fit_score()
            median = explainer.predict(x_test)
            self.assertTrue(median.shape == x_test.shape)
Esempio n. 2
0
    def test_boston_housing_no_fit_invalid(self):
        (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing()
        explained_model = RandomForestRegressor(n_estimators=64,
                                                max_depth=5,
                                                random_state=1)
        explained_model.fit(x_train, y_train)

        model_builder = MLPModelBuilder(num_layers=2,
                                        num_units=32,
                                        activation="relu",
                                        p_dropout=0.2,
                                        verbose=0,
                                        batch_size=32,
                                        learning_rate=0.001,
                                        num_epochs=2,
                                        early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = mean_squared_error
        explainer = CXPlain(explained_model, model_builder, masking_operation,
                            loss)

        with self.assertRaises(AssertionError):
            explainer.predict(x_test, y_test)

        with self.assertRaises(AssertionError):
            explainer.score(x_test, y_test)
Esempio n. 3
0
    def test_boston_housing_valid(self):
        (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing()
        explained_model = RandomForestRegressor(n_estimators=64,
                                                max_depth=5,
                                                random_state=1)
        explained_model.fit(x_train, y_train)

        model_builder = MLPModelBuilder(num_layers=2,
                                        num_units=32,
                                        activation="relu",
                                        p_dropout=0.2,
                                        verbose=0,
                                        batch_size=32,
                                        learning_rate=0.001,
                                        num_epochs=2,
                                        early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = mean_squared_error
        explainer = CXPlain(explained_model, model_builder, masking_operation,
                            loss)

        explainer.fit(x_train, y_train)
        self.assertEqual(explainer.prediction_model.output_shape,
                         (None, np.prod(x_test.shape[1:])))

        eval_score = explainer.score(x_test, y_test)
        train_score = explainer.get_last_fit_score()
        median = explainer.predict(x_test)
        self.assertTrue(median.shape == x_test.shape)
Esempio n. 4
0
    def test_boston_housing_confidence_level_invalid(self):
        (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing()
        explained_model = RandomForestRegressor(n_estimators=64,
                                                max_depth=5,
                                                random_state=1)
        explained_model.fit(x_train, y_train)

        model_builder = MLPModelBuilder(num_layers=2,
                                        num_units=32,
                                        activation="relu",
                                        p_dropout=0.2,
                                        verbose=0,
                                        batch_size=32,
                                        learning_rate=0.001,
                                        num_epochs=3,
                                        early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = mean_squared_error

        num_models = 2
        explainer = CXPlain(explained_model,
                            model_builder,
                            masking_operation,
                            loss,
                            num_models=num_models)

        explainer.fit(x_train, y_train)

        invalid_confidence_levels = [1.01, -0.5, -0.01]

        for confidence_level in invalid_confidence_levels:
            with self.assertRaises(ValueError):
                explainer.predict(x_test, confidence_level=confidence_level)
Esempio n. 5
0
    def test_boston_housing_load_save_valid(self):
        (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing()
        explained_model = RandomForestRegressor(n_estimators=64, max_depth=5, random_state=1)
        explained_model.fit(x_train, y_train)

        model_builder = MLPModelBuilder(num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0,
                                        batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = mean_squared_error

        num_models_settings = [1, 2]
        for num_models in num_models_settings:
            explainer = CXPlain(explained_model, model_builder, masking_operation, loss,
                                num_models=num_models)

            explainer.fit(x_train, y_train)
            median_1 = explainer.predict(x_test)

            tmp_dir_name = tempfile.mkdtemp()
            explainer.save(tmp_dir_name)

            with self.assertRaises(ValueError):
                explainer.save(tmp_dir_name, overwrite=False)

            explainer.save(tmp_dir_name, overwrite=True)
            explainer.load(tmp_dir_name)
            median_2 = explainer.predict(x_test)

            self.assertTrue(np.array_equal(median_1, median_2))

            shutil.rmtree(tmp_dir_name)  # Cleanup.
Esempio n. 6
0
    def test_mnist_valid(self):
        num_subsamples = 100
        (x_train, y_train), (x_test, y_test) = TestUtil.get_mnist(flattened=False, num_subsamples=num_subsamples)

        explained_model = MLPClassifier(solver='lbfgs', alpha=1e-5,
                                        hidden_layer_sizes=(64, 32), random_state=1)
        explained_model.fit(x_train.reshape((len(x_train), -1)), y_train)

        model_builder = MLPModelBuilder(num_layers=2, num_units=64, activation="relu", p_dropout=0.2, verbose=0,
                                        batch_size=256, learning_rate=0.001, num_epochs=2, early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = categorical_crossentropy

        downsample_factors = [(2, 2), (4, 4), (4, 7), (7, 4), (7, 7)]
        for downsample_factor in downsample_factors:
            explainer = CXPlain(explained_model, model_builder, masking_operation, loss,
                                downsample_factors=downsample_factor, flatten_for_explained_model=True)

            explainer.fit(x_train, y_train)

            self.assertEqual(explainer.prediction_model.output_shape, (None, np.prod(x_test.shape[1:])))

            eval_score = explainer.score(x_test, y_test)
            train_score = explainer.get_last_fit_score()
            median = explainer.predict(x_test)
            self.assertTrue(median.shape == x_test.shape)
Esempio n. 7
0
    def test_mnist_valid(self):
        num_subsamples = 100
        (x_train,
         y_train), (x_test,
                    y_test) = TestUtil.get_mnist(flattened=False,
                                                 num_subsamples=num_subsamples)

        explained_model = MLPClassifier(solver='lbfgs',
                                        alpha=1e-5,
                                        hidden_layer_sizes=(64, 32),
                                        random_state=1)
        explained_model.fit(x_train.reshape((len(x_train), -1)), y_train)

        model_builder = MLPModelBuilder(num_layers=2,
                                        num_units=64,
                                        activation="relu",
                                        p_dropout=0.2,
                                        verbose=0,
                                        batch_size=256,
                                        learning_rate=0.001,
                                        num_epochs=3,
                                        early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = categorical_crossentropy

        downsample_factors = [(2, 2), (4, 4), (4, 7), (7, 4), (7, 7)]
        for downsample_factor in downsample_factors:
            explainer = CXPlain(explained_model,
                                model_builder,
                                masking_operation,
                                loss,
                                num_models=2,
                                downsample_factors=downsample_factor,
                                flatten_for_explained_model=True)

            explainer.fit(x_train, y_train)
            eval_score = explainer.score(x_test, y_test)
            train_score = explainer.get_last_fit_score()
            median, confidence = explainer.predict(x_test,
                                                   confidence_level=0.95)

            self.assertTrue(median.shape == x_test.shape)
            self.assertTrue(confidence.shape == x_test.shape[:-1] + (2, ))

            # Flatten predictions for iteration below.
            median = median.reshape((len(x_test), -1))
            confidence = confidence.reshape((len(x_test), -1, 2))

            for sample_idx in range(len(x_test)):
                for feature_idx in range(len(x_test[sample_idx])):
                    self.assertTrue(confidence[sample_idx][feature_idx][0] <=
                                    median[sample_idx][feature_idx] <=
                                    confidence[sample_idx][feature_idx][1])
                    self.assertTrue(
                        confidence[sample_idx][feature_idx][0] >= 0)
                    self.assertTrue(
                        confidence[sample_idx][feature_idx][1] >= 0)
Esempio n. 8
0
    def test_overwrite_ensemble_model_invalid(self):
        (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing()

        model_builder = MLPModelBuilder()
        explained_model = RandomForestRegressor(n_estimators=64,
                                                max_depth=5,
                                                random_state=1)
        explained_model.fit(x_train, y_train)
        masking_operation = ZeroMasking()
        loss = binary_crossentropy
        num_models = 5
        explainer = CXPlain(explained_model,
                            model_builder,
                            masking_operation,
                            loss,
                            num_models=num_models)

        file_names = [
            CXPlain.get_config_file_name(),
            CXPlain.get_explained_model_file_name(".pkl"),
            CXPlain.get_loss_pkl_file_name(),
            CXPlain.get_model_builder_pkl_file_name(),
            CXPlain.get_masking_operation_pkl_file_name()
        ]

        # Test with untrained explanation model.
        for file_name in file_names:
            tmp_dir = TestExplanationModel.make_at_tmp(file_name)
            with self.assertRaises(ValueError):
                explainer.save(tmp_dir, overwrite=False)

        # Test with trained explanation model.
        explainer.fit(x_train, y_train)

        file_names = [
            CXPlain.get_config_file_name(),
            CXPlain.get_explained_model_file_name(".pkl"),
            CXPlain.get_loss_pkl_file_name(),
            CXPlain.get_model_builder_pkl_file_name(),
            CXPlain.get_masking_operation_pkl_file_name()
        ] + [
            CXPlain.get_prediction_model_h5_file_name(i)
            for i in range(num_models)
        ]

        for file_name in file_names:
            tmp_dir = TestExplanationModel.make_at_tmp(file_name)
            with self.assertRaises(ValueError):
                explainer.save(tmp_dir, overwrite=False)
Esempio n. 9
0
    def test_boston_housing_valid(self):
        (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing()
        explained_model = RandomForestRegressor(n_estimators=64,
                                                max_depth=5,
                                                random_state=1)
        explained_model.fit(x_train, y_train)

        model_builder = MLPModelBuilder(num_layers=2,
                                        num_units=32,
                                        activation="relu",
                                        p_dropout=0.2,
                                        verbose=0,
                                        batch_size=32,
                                        learning_rate=0.001,
                                        num_epochs=3,
                                        early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = mean_squared_error

        for num_models in [2, 5, 10]:
            explainer = CXPlain(explained_model,
                                model_builder,
                                masking_operation,
                                loss,
                                num_models=num_models)

            explainer.fit(x_train, y_train)
            eval_score = explainer.score(x_test, y_test)
            train_score = explainer.get_last_fit_score()
            median, confidence = explainer.predict(x_test,
                                                   confidence_level=0.95)

            self.assertTrue(median.shape == x_test.shape)
            self.assertTrue(confidence.shape == x_test.shape + (2, ))

            # Flatten predictions for iteration below.
            median = median.reshape((len(x_test), -1))
            confidence = confidence.reshape((len(x_test), -1, 2))

            for sample_idx in range(len(x_test)):
                for feature_idx in range(len(x_test[sample_idx])):
                    self.assertTrue(confidence[sample_idx][feature_idx][0] <=
                                    median[sample_idx][feature_idx] <=
                                    confidence[sample_idx][feature_idx][1])
                    self.assertTrue(
                        confidence[sample_idx][feature_idx][0] >= 0)
                    self.assertTrue(
                        confidence[sample_idx][feature_idx][1] >= 0)
Esempio n. 10
0
def find_genes_CX(drug, model, meta, gdsc_expr, gdsc_dr, test_tcga_expr,
                  save_dir):
    torch.manual_seed(SEED)
    np.random.seed(SEED)

    print('obtaining masked data...')
    masked_data = get_masked_data_for_CXPlain(model, gdsc_expr)
    print('obtained masked data...')
    # get_masked_data_for_CXPlain(model, test_tcga_expr)

    import tensorflow as tf
    tf.compat.v1.disable_v2_behavior()
    tf.keras.backend.clear_session()
    tf.random.set_seed(SEED)

    from tensorflow.python.keras.losses import mean_squared_error as loss
    from cxplain import MLPModelBuilder, CXPlain
    # from cxplain.backend.masking.zero_masking import FastZeroMasking
    model_builder = MLPModelBuilder(num_layers=2,
                                    num_units=512,
                                    batch_size=8,
                                    learning_rate=0.001)
    # masking_operation = FastZeroMasking()

    print(gdsc_expr.values.shape, gdsc_dr.values.shape)

    print("Fitting CXPlain model")
    explainer = CXPlain(model, model_builder, None, loss)
    explainer.fit(gdsc_expr.values, gdsc_dr.values, masked_data=masked_data)
    print("Attributing using CXPlain")

    attr = explainer.explain(test_tcga_expr.values)
    attr = pd.DataFrame(attr, index=test_tcga_expr.index, columns=dataset.hgnc)
    borda = get_ranked_list(attr)

    attr_mean = list(np.abs(attr).mean(axis=0).nlargest(200).index)
    out = pd.DataFrame(columns=['borda', 'mean'])
    out['borda'] = borda
    out['mean'] = attr_mean

    out.to_csv(save_dir + '/genes.csv', index=False)

    if not os.path.exists(save_dir + '/explainer/'):
        os.mkdir(save_dir + '/explainer/')

    explainer.save(save_dir + '/explainer/')
Esempio n. 11
0
    def test_mnist_confidence_levels_valid(self):
        num_subsamples = 100
        (x_train,
         y_train), (x_test,
                    y_test) = TestUtil.get_mnist(flattened=False,
                                                 num_subsamples=num_subsamples)

        explained_model = MLPClassifier(solver='lbfgs',
                                        alpha=1e-5,
                                        hidden_layer_sizes=(64, 32),
                                        random_state=1)
        explained_model.fit(x_train.reshape((len(x_train), -1)), y_train)

        model_builder = MLPModelBuilder(num_layers=2,
                                        num_units=64,
                                        activation="relu",
                                        p_dropout=0.2,
                                        verbose=0,
                                        batch_size=256,
                                        learning_rate=0.001,
                                        num_epochs=3,
                                        early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = categorical_crossentropy

        confidence_levels = [0.0, 1.0, 1.01, -0.01]
        for confidence_level in confidence_levels:
            downsample_factor = (2, 2)
            explainer = CXPlain(explained_model,
                                model_builder,
                                masking_operation,
                                loss,
                                num_models=2,
                                downsample_factors=downsample_factor,
                                flatten_for_explained_model=True)

            explainer.fit(x_train, y_train)

            with self.assertRaises(ValueError):
                _ = explainer.predict(x_test,
                                      confidence_level=confidence_level)