Beispiel #1
0
 def test_cohort_estimator_counts(self):
     dataset_path = source_path + "datasets/"
     data = pd.read_csv(dataset_path + 'synthetic_data5.csv')
     event_count = data[data['Timestep'] < 4]['ID'].count()
     definition = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")]
     myState = tm.StateSpace(definition)
     sorted_data = data.sort_values(['ID', 'Timestep'], ascending=[True, True])
     myEstimator = es.CohortEstimator(states=myState, ci={'method': 'goodman', 'alpha': 0.05})
     result = myEstimator.fit(sorted_data)
     self.assertEqual(event_count, myEstimator.counts)
    def test_roundtrip_identity(self):
        definition = [('0', "A"), ('1', "B"), ('2', "C"), ('3', "D")]
        myState = tm.StateSpace(definition)
        input_data = dataset_generators.long_format(myState,
                                                    Identity,
                                                    n=100,
                                                    timesteps=2,
                                                    mode='Canonical')
        compact_data = to_compact(input_data)
        cohort_data, cohort_bounds = tm.utils.bin_timestamps(compact_data,
                                                             cohorts=1)
        sorted_data = cohort_data.sort_values(['ID', 'Time'],
                                              ascending=[True, True])
        myEstimator = es.CohortEstimator(states=myState,
                                         cohort_bounds=cohort_bounds,
                                         ci={
                                             'method': 'goodman',
                                             'alpha': 0.05
                                         })
        result = myEstimator.fit(sorted_data,
                                 labels={
                                     'Time': 'Time',
                                     'State': 'State',
                                     'ID': 'ID'
                                 })
        myMatrix = tm.TransitionMatrix(myEstimator.average_matrix)

        self.assertAlmostEqual(myMatrix[0, 0],
                               1.0,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(myMatrix[1, 1],
                               1.0,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(myMatrix[2, 2],
                               1.0,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(myMatrix[2, 2],
                               1.0,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
Beispiel #3
0
    def test_cohort_estimator_counts(self):
        """
        Test that the total counts constructed by the estimator is the same as the event count in the dataset

        """
        dataset_path = source_path + "datasets/"
        data = pd.read_csv(dataset_path + 'synthetic_data5.csv')
        event_count = data['ID'].count()
        # event_count = data[data['Time'] < 4]['ID'].count()
        definition = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")]
        myState = tm.StateSpace(definition)
        sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
        myEstimator = es.CohortEstimator(states=myState,
                                         cohort_bounds=[0, 1, 2, 3, 4],
                                         ci={
                                             'method': 'goodman',
                                             'alpha': 0.05
                                         })
        result = myEstimator.fit(sorted_data)
        self.assertEqual(event_count, myEstimator.counts)
                   ('4', "BB"), ('5', "B"), ('6', "CCC"), ('7', "D")]

    myState = tm.StateSpace(description)
    myState.describe()
    print(myState.get_states())
    print(myState.get_state_labels())

    data = pd.read_csv(dataset_path + 'synthetic_data4.csv',
                       dtype={'State': str})
    sorted_data = data.sort_values(['ID', 'Timestep'], ascending=[True, True])
    print(myState.validate_dataset(dataset=sorted_data))

    # compute confidence interval using goodman method at 95% confidence level
    myEstimator = es.CohortEstimator(states=myState,
                                     ci={
                                         'method': 'goodman',
                                         'alpha': 0.05
                                     })
    result = myEstimator.fit(sorted_data)

    # Print confidence intervals
    myEstimator.summary()

    # Print the estimated results
    myMatrixSet = tm.TransitionMatrixSet(values=result,
                                         temporal_type='Incremental')
    print(myMatrixSet.temporal_type)
    myMatrixSet.print()

elif example == 2:
    # Example: IFRS 9 Style Migration Matrix
Beispiel #5
0
    def test_cohort_estimator_matrix(self):
        """
        Test that the estimated matrix is same as the matrix that was used to generate the data

        matrix = [[0.8, 0.15, 0.05],
                  [0.1, 0.7, 0.2],
                  [0.0, 0.0, 1.0]]

        """
        dataset_path = source_path + "datasets/"
        data = pd.read_csv(dataset_path + 'synthetic_data5.csv')
        definition = [('0', "Stage 1"), ('1', "Stage 2"), ('2', "Stage 3")]
        myState = tm.StateSpace(definition)
        sorted_data = data.sort_values(['ID', 'Time'], ascending=[True, True])
        myEstimator = es.CohortEstimator(states=myState,
                                         cohort_bounds=[0, 1, 2, 3, 4],
                                         ci={
                                             'method': 'goodman',
                                             'alpha': 0.05
                                         })
        result = myEstimator.fit(sorted_data)
        am = myEstimator.average_matrix
        self.assertAlmostEqual(am[0, 0],
                               0.8,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[0, 1],
                               0.15,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[0, 2],
                               0.05,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[1, 0],
                               0.1,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[1, 1],
                               0.7,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[1, 2],
                               0.2,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[2, 0],
                               0.0,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[2, 1],
                               0.0,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)
        self.assertAlmostEqual(am[2, 2],
                               1.0,
                               places=ACCURATE_DIGITS,
                               msg=None,
                               delta=None)