class TestCBRW(unittest.TestCase):
    """
    Unit tests for CBRW
    """

    observations = [{
        'feature_a': 'a_val_1',
        'feature_b': 'b_val_1',
        'feature_c': 'c_val_1'
    }, {
        'feature_b': 'b_val_1',
        'feature_c': 'c_val_2',
        'feature_a': 'a_val_1'
    }]

    def setUp(self):
        self.cbrw = CBRW()
        self.cbrw.add_observations(self.observations)

    def test_get_feature_relevance(self):
        self.cbrw._feature_relevance = {'feature_a': 0.5}
        # get relevance for valid feature tuple
        rel = self.cbrw._get_feature_relevance(('feature_a', 'a_val_1'))
        self.assertEqual(rel, 0.5)
        # get relevance for tuple with valid feature name
        rel = self.cbrw._get_feature_relevance(('feature_a', 'xxx'))
        self.assertEqual(rel, 0.5)
        # get relevance for invalid feature name
        rel = self.cbrw._get_feature_relevance(('xxx', 'xxx'))
        self.assertEqual(rel, 0)

    def test_compute_biases(self):
        bias_dict = self.cbrw._compute_biases()
        self.assertEqual(bias_dict[('feature_a', 'a_val_1')], 0)
        self.assertEqual(bias_dict[('feature_b', 'b_val_1')], 0)
        self.assertEqual(bias_dict[('feature_c', 'c_val_1')], 0.25)
        self.assertEqual(bias_dict[('feature_c', 'c_val_2')], 0.25)

    def test_compute_biased_transition_matrix(self):
        transition_matrix = self.cbrw._compute_biased_transition_matrix()
        self.assertIsInstance(transition_matrix, csr_matrix)
        self.assertTupleEqual(transition_matrix.shape, (4, 4))
        self.assertTrue((transition_matrix.data > 0).all())
        self.assertTrue((transition_matrix.data <= 1).all())

    def test_fit_no_data(self):
        self.cbrw = CBRW()
        with self.assertRaises(CBRWFitError):
            self.cbrw.fit()

    def test_fit(self):
        self.cbrw.fit()
        self.assertIsNotNone(self.cbrw._stationary_prob)
        self.assertIsNotNone(self.cbrw._feature_relevance)

    def test_score_before_fit(self):
        with self.assertRaises(CBRWScoreError):
            _ = self.cbrw.score(self.observations)

    def test_score(self):
        self.cbrw.fit()

        # score observation where all features and values
        # have been previously observed
        to_be_scored = self.observations[0]
        score = self.cbrw.score(to_be_scored)
        score = score[0]
        self.assertGreaterEqual(score, 0)
        self.assertLessEqual(score, 1)
        # actual score is approximately 0.2759 so test this
        # value in case implementation changes
        self.assertAlmostEqual(score, 0.2759, places=4)

    def test_score_unknown_features_default(self):
        self.cbrw.fit()

        # score observation where all features but not all
        # values have been previously observed
        to_be_scored = {
            'feature_a': 'a_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        score = self.cbrw.score(to_be_scored)
        self.assertTrue(isnan(score[0]))

        # score observation where a feature has not
        # been previously observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        score = self.cbrw.score(to_be_scored)
        self.assertTrue(isnan(score[0]))

        # score valid and invalid observations in one call
        to_be_scored = [
            self.observations[0], {
                'feature_x': 'x_val_x',
                'feature_b': 'b_val_1'
            }
        ]
        scores = self.cbrw.score(to_be_scored)
        valid_score = scores[0]
        invalid_score = scores[1]
        self.assertFalse(isnan(valid_score))
        self.assertGreaterEqual(valid_score, 0)
        self.assertLessEqual(valid_score, 1)
        self.assertTrue(isnan(invalid_score))

    def test_score_unknown_features_ignore(self):
        self.cbrw = CBRW(ignore_unknown=True)
        self.cbrw.add_observations(self.observations)
        self.cbrw.fit()

        # score observation where all features but not all
        # values have been previously observed
        to_be_scored = {
            'feature_a': 'a_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'}
        score = self.cbrw.score(to_be_scored)
        actual_score = self.cbrw.score(actually_scored)
        self.assertFalse(isnan(score[0]))
        self.assertEqual(score, actual_score)

        # score observation where a feature has not
        # been previously observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'}
        score = self.cbrw.score(to_be_scored)
        actual_score = self.cbrw.score(actually_scored)
        self.assertFalse(isnan(score[0]))
        self.assertEqual(score, actual_score)

        # score observation where no features have
        # previously been observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_y': 'y_val_1',
            'feature_z': 'z_val_1'
        }
        score = self.cbrw.score(to_be_scored)
        self.assertFalse(isnan(score[0]))
        self.assertEqual(score[0], 0)

    def test_score_with_nans_default(self):
        obs = deepcopy(self.observations)
        obs[0]['feautre_a'] = np.nan

        to_be_scored = {
            'feature_a': np.nan,
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }

        # score observation with nan value
        self.cbrw.fit()
        score = self.cbrw.score(to_be_scored)
        self.assertTrue(isnan(score[0]))

        # fit includes observation with nan value
        self.cbrw = CBRW()
        self.cbrw.add_observations(obs)
        self.cbrw.fit()
        score = self.cbrw.score(to_be_scored)
        self.assertTrue(isnan(score[0]))

    def test_score_with_nans_ignore(self):
        obs = deepcopy(self.observations)
        obs[0]['feautre_a'] = np.nan

        to_be_scored = {
            'feature_a': np.nan,
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'}

        # score observation with nan value
        self.cbrw = CBRW(ignore_unknown=True)
        self.cbrw.add_observations(self.observations)
        self.cbrw.fit()
        score = self.cbrw.score(to_be_scored)
        actual_score = self.cbrw.score(actually_scored)
        self.assertFalse(isnan(score[0]))
        self.assertEqual(score, actual_score)

        # fit includes observation with nan value
        self.cbrw = CBRW(ignore_unknown=True)
        self.cbrw.add_observations(obs)
        self.cbrw.fit()
        score = self.cbrw.score(to_be_scored)
        actual_score = self.cbrw.score(actually_scored)
        self.assertFalse(isnan(score[0]))
        self.assertEqual(score, actual_score)

    def test_value_scores_before_fit(self):
        with self.assertRaises(CBRWScoreError):
            _ = self.cbrw.value_scores(self.observations)

    def test_value_scores(self):
        self.cbrw.fit()

        # score observation where all features and values
        # have been previously observed
        to_be_scored = self.observations[0]
        value_scores = self.cbrw.value_scores(to_be_scored)
        value_scores = value_scores[0]
        self.assertCountEqual(value_scores.keys(), to_be_scored.keys())
        for vs in value_scores.values():
            self.assertGreaterEqual(vs, 0)
            self.assertLessEqual(vs, 1)

    def test_value_scores_unknown_features_default(self):
        self.cbrw.fit()

        # score observation where all features but not all
        # values have been previously observed
        to_be_scored = {
            'feature_a': 'a_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        value_scores = self.cbrw.value_scores(to_be_scored)
        value_scores = value_scores[0]
        self.assertTrue(isnan(value_scores['feature_a']))
        self.assertFalse(isnan(value_scores['feature_b']))
        self.assertFalse(isnan(value_scores['feature_c']))

        # score observation where a feature has not
        # been previously observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        value_scores = self.cbrw.value_scores(to_be_scored)
        value_scores = value_scores[0]
        self.assertTrue(isnan(value_scores['feature_x']))
        self.assertFalse(isnan(value_scores['feature_b']))
        self.assertFalse(isnan(value_scores['feature_c']))

        # score valid and invalid observations in one call
        to_be_scored = [
            self.observations[0], {
                'feature_x': 'x_val_x',
                'feature_b': 'b_val_1'
            }
        ]
        value_scores = self.cbrw.value_scores(to_be_scored)
        valid_scores = value_scores[0]
        invalid_scores = value_scores[1]
        self.assertTrue(
            all(not isnan(valid_score)
                for valid_score in valid_scores.values()))
        self.assertTrue(
            any(
                isnan(invalid_score)
                for invalid_score in invalid_scores.values()))

    def test_value_scores_unknown_features_ignore(self):
        self.cbrw = CBRW(ignore_unknown=True)
        self.cbrw.add_observations(self.observations)
        self.cbrw.fit()

        # score observation where all features but not all
        # values have been previously observed
        to_be_scored = {
            'feature_a': 'a_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'}
        value_scores = self.cbrw.value_scores(to_be_scored)[0]
        actual_value_scores = self.cbrw.value_scores(actually_scored)[0]
        self.assertTrue(all(not isnan(vs) for vs in value_scores.values()))
        self.assertEqual(value_scores['feature_a'], 0)
        self.assertEqual(value_scores['feature_b'],
                         actual_value_scores['feature_b'])
        self.assertEqual(value_scores['feature_c'],
                         actual_value_scores['feature_c'])

        # score observation where a feature has not
        # been previously observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'}
        value_scores = self.cbrw.value_scores(to_be_scored)[0]
        actual_value_scores = self.cbrw.value_scores(actually_scored)[0]
        self.assertTrue(all(not isnan(vs) for vs in value_scores.values()))
        self.assertEqual(value_scores['feature_x'], 0)
        self.assertEqual(value_scores['feature_b'],
                         actual_value_scores['feature_b'])
        self.assertEqual(value_scores['feature_c'],
                         actual_value_scores['feature_c'])

        # score observation where no features have
        # previously been observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_y': 'y_val_1',
            'feature_z': 'z_val_1'
        }
        value_scores = self.cbrw.value_scores(to_be_scored)[0]
        self.assertTrue(all(not isnan(vs) for vs in value_scores.values()))
        self.assertTrue(all(vs == 0 for vs in value_scores.values()))

    def test_value_scores_with_nans_default(self):
        obs = deepcopy(self.observations)
        obs[0]['feautre_a'] = np.nan

        to_be_scored = {
            'feature_a': np.nan,
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }

        # score observation with nan value
        self.cbrw.fit()
        value_scores = self.cbrw.value_scores(to_be_scored)
        self.assertTrue(isnan(value_scores[0]['feature_a']))

        # fit includes observation with nan value
        self.cbrw = CBRW()
        self.cbrw.add_observations(obs)
        self.cbrw.fit()
        value_scores = self.cbrw.value_scores(to_be_scored)
        self.assertTrue(isnan(value_scores[0]['feature_a']))

    def test_value_scores_with_nans_ignore(self):
        obs = deepcopy(self.observations)
        obs[0]['feautre_a'] = np.nan

        to_be_scored = {
            'feature_a': np.nan,
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'}

        # score observation with nan value
        self.cbrw = CBRW(ignore_unknown=True)
        self.cbrw.add_observations(self.observations)
        self.cbrw.fit()
        value_scores = self.cbrw.value_scores(to_be_scored)[0]
        actual_value_scores = self.cbrw.value_scores(actually_scored)[0]
        self.assertTrue(all(not isnan(vs) for vs in value_scores.values()))
        self.assertEqual(value_scores['feature_a'], 0)
        self.assertEqual(value_scores['feature_b'],
                         actual_value_scores['feature_b'])
        self.assertEqual(value_scores['feature_c'],
                         actual_value_scores['feature_c'])

        # fit includes observation with nan value
        self.cbrw = CBRW(ignore_unknown=True)
        self.cbrw.add_observations(obs)
        self.cbrw.fit()
        value_scores = self.cbrw.value_scores(to_be_scored)[0]
        actual_value_scores = self.cbrw.value_scores(actually_scored)[0]
        self.assertTrue(all(not isnan(vs) for vs in value_scores.values()))
        self.assertEqual(value_scores['feature_a'], 0)
        self.assertEqual(value_scores['feature_b'],
                         actual_value_scores['feature_b'])
        self.assertEqual(value_scores['feature_c'],
                         actual_value_scores['feature_c'])
class TestCBRW(unittest.TestCase):
    """
    Unit tests for CBRW
    """

    observations = [
        {'feature_a': 'a_val_1', 'feature_b': 'b_val_1', 'feature_c': 'c_val_1'},
        {'feature_b': 'b_val_1', 'feature_c': 'c_val_2', 'feature_a': 'a_val_1'}
    ]

    def setUp(self):
        self.cbrw = CBRW()
        self.cbrw.add_observations(self.observations)

    def test_get_feature_relevance(self):
        self.cbrw._feature_relevance = {
            'feature_a': 0.5
        }
        # get relevance for valid feature tuple
        rel = self.cbrw._get_feature_relevance(('feature_a', 'a_val_1'))
        self.assertEqual(rel, 0.5)
        # get relevance for tuple with valid feature name
        rel = self.cbrw._get_feature_relevance(('feature_a', 'xxx'))
        self.assertEqual(rel, 0.5)
        # get relevance for invalid feature name
        rel = self.cbrw._get_feature_relevance(('xxx', 'xxx'))
        self.assertEqual(rel, 0)

    def test_compute_biases(self):
        bias_dict = self.cbrw._compute_biases()
        self.assertEqual(bias_dict[('feature_a', 'a_val_1')], 0)
        self.assertEqual(bias_dict[('feature_b', 'b_val_1')], 0)
        self.assertEqual(bias_dict[('feature_c', 'c_val_1')], 0.25)
        self.assertEqual(bias_dict[('feature_c', 'c_val_2')], 0.25)

    def test_compute_biased_transition_matrix(self):
        transition_matrix = self.cbrw._compute_biased_transition_matrix()
        self.assertIsInstance(transition_matrix, csr_matrix)
        self.assertTupleEqual(transition_matrix.shape, (4, 4))
        self.assertTrue((transition_matrix.data > 0).all())
        self.assertTrue((transition_matrix.data <= 1).all())

    def test_fit_no_data(self):
        self.cbrw = CBRW()
        with self.assertRaises(CBRWFitError):
            self.cbrw.fit()
    
    def test_fit(self):
        self.cbrw.fit()
        self.assertIsNotNone(self.cbrw._stationary_prob)
        self.assertIsNotNone(self.cbrw._feature_relevance)

    def test_score_before_fit(self):
        with self.assertRaises(CBRWScoreError):
            _ = self.cbrw.score(self.observations)
    
    def test_score(self):
        self.cbrw.fit()

        # score observation where all features and values
        # have been previously observed
        to_be_scored = self.observations[0]
        score = self.cbrw.score(to_be_scored)
        score = score[0]
        self.assertGreaterEqual(score, 0)
        self.assertLessEqual(score, 1)

    def test_score_unknown_features_default(self):
        self.cbrw.fit()

        # score observation where all features but not all
        # values have been previously observed
        to_be_scored = {
            'feature_a': 'a_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        score = self.cbrw.score(to_be_scored)
        self.assertTrue(isnan(score[0]))

        # score observation where a feature has not
        # been previously observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        score = self.cbrw.score(to_be_scored)
        self.assertTrue(isnan(score[0]))
        
        # score valid and invalid observations in one call
        to_be_scored = [
            self.observations[0],
            {'feature_x': 'x_val_x', 'feature_b': 'b_val_1'}
        ]
        scores = self.cbrw.score(to_be_scored)
        valid_score = scores[0]
        invalid_score = scores[1]
        self.assertFalse(isnan(valid_score))
        self.assertGreaterEqual(valid_score, 0)
        self.assertLessEqual(valid_score, 1)
        self.assertTrue(isnan(invalid_score))

    def test_score_unknown_features_ignore(self):
        self.cbrw = CBRW(ignore_unknown=True)
        self.cbrw.add_observations(self.observations)
        self.cbrw.fit()

        # score observation where all features but not all
        # values have been previously observed
        to_be_scored = {
            'feature_a': 'a_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        score = self.cbrw.score(to_be_scored)
        actual_score = self.cbrw.score(actually_scored)
        self.assertFalse(isnan(score[0]))
        self.assertEqual(score, actual_score)

        # score observation where a feature has not
        # been previously observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        score = self.cbrw.score(to_be_scored)
        actual_score = self.cbrw.score(actually_scored)
        self.assertFalse(isnan(score[0]))
        self.assertEqual(score, actual_score)
        
        # score observation where all features not
        # previously observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_y': 'y_val_1',
            'feature_z': 'z_val_1'
        }
        score = self.cbrw.score(to_be_scored)
        self.assertFalse(isnan(score[0]))
        self.assertEqual(score[0], 0)