def test_group_targets(self):
        path_data = 'tests/engine_B_data.csv'
        path_meta = 'tests/engine_B_meta.json'

        data = pd.read_csv(path_data)
        meta = json.load(file(path_meta))
        
        weight = '_'.join(
            ['weights', 
             self.scheme_name_A3]
        )
        
        # Run weights for scheme_A3
        engine_B = WeightEngine(data=data, meta=meta)
        engine_B.add_scheme(scheme=self.scheme_A3, key='identity')
        engine_B.run()

        data_A3 = engine_B.dataframe("scheme_name_A3")
        
        # check identical weighted column frequencies
        df = data_A3.pivot_table(
            values=[weight], 
            index=['profile_gender'], 
            columns=['age_group'], 
            aggfunc='sum'
        )  
        for column in df.columns.tolist():
            self.assertTrue(
                numpy.allclose(df[column].values, numpy.array([1.645, 1.855]))
            ) 
        
        #check the weight column counts & sum are equal to index length (14)
        a = numpy.asscalar(data_A3[weight].count())
        b = numpy.asscalar(data_A3[weight].sum())
        c = data_A3.shape[0]
        self.assertTrue(int(a) == int(b) == int(c))

        # check weighted group frequencies have euqal proportions
        values = data_A3.pivot_table(
            values=[weight], 
            index=['age_group'], 
            aggfunc='sum'
        ).values
        self.assertTrue(numpy.allclose(values, 3.5))
Example #2
0
    def test_group_targets(self):
        path_data = 'tests/engine_B_data.csv'
        path_meta = 'tests/engine_B_meta.json'

        data = pd.read_csv(path_data)
        meta = json.load(file(path_meta))

        weight = '_'.join(['weights', self.scheme_name_A3])

        # Run weights for scheme_A3
        engine_B = WeightEngine(data=data, meta=meta)
        engine_B.add_scheme(scheme=self.scheme_A3, key='identity')
        engine_B.run()

        data_A3 = engine_B.dataframe("scheme_name_A3")

        # check identical weighted column frequencies
        df = data_A3.pivot_table(values=[weight],
                                 index=['profile_gender'],
                                 columns=['age_group'],
                                 aggfunc='sum')
        for column in df.columns.tolist():
            self.assertTrue(
                numpy.allclose(df[column].values, numpy.array([1.645, 1.855])))

        #check the weight column counts & sum are equal to index length (14)
        a = numpy.asscalar(data_A3[weight].count())
        b = numpy.asscalar(data_A3[weight].sum())
        c = data_A3.shape[0]
        self.assertTrue(int(a) == int(b) == int(c))

        # check weighted group frequencies have euqal proportions
        values = data_A3.pivot_table(values=[weight],
                                     index=['age_group'],
                                     aggfunc='sum').values
        self.assertTrue(numpy.allclose(values, 3.5))
class TestEngine(unittest.TestCase):

    def setUp(self):
        ''' Simple engine without meta - engine_A
        '''
        path_data = 'tests/engine_A_data.csv'
        data = pd.read_csv(path_data)

        # Setup engine_A
        self.engine_A = WeightEngine(data=data)

        self.scheme_name_A1 = 'scheme_name_A1'
        self.scheme_name_A2 = 'scheme_name_A2'
        self.scheme_name_A3 = 'scheme_name_A3'

        # Setup schemes to use in tests
        self.scheme_A1 = Rim(self.scheme_name_A1)
        self.scheme_A1.lists = ['column1', 'column2']
        self.scheme_A1.add_group(name='Senior Type 1', filter='column3==1', 
            targets={
                'column1': [32.00, 31.00, 37.00],
                'column2': [23.13, 14.32, 4.78, 4.70, 2.65, 2.61, 3.47, 31.04, 13.3]
            })
        self.scheme_A1.add_group(name='Senior Type 2', filter='column3==1', 
            targets={
                'column1': [33.40, 33.40, 33.20],
                'column2': [11.11, 11.11, 11.11, 11.11, 11.11, 11.11, 11.11, 11.11, 11.11]
            })
        self.scheme_A1.add_group(name='Senior Type 3', filter='column3==3',
            targets={
                'column1': [33.2, 29.7, 37.1],
                'column2': [23.13, 14.32, 4.78, 4.70, 2.65, 2.61, 3.47, 31.04, 13.3]
            })
        self.scheme_A1.add_group(name='Senior Type 4', filter='column3==4',
            targets={
                'column1': [33.2, 29.7, 37.1],
                'column2': [23.13, 14.32, 4.78, 4.70, 2.65, 2.61, 3.47, 32.34, 12.00]
            })

        self.scheme_A2 = Rim(self.scheme_name_A2)
        self.scheme_A2.lists = ['column1', 'column2']
        self.scheme_A2.add_group(name='Senior Type 1', filter='column3==1', 
            targets={
                'column1': [37.00, 32.00, 31.00],
                'column2': [13.3, 23.13, 14.32, 4.78, 4.70, 2.65, 2.61, 3.47, 31.04]
            })
        self.scheme_A2.add_group(name='Senior Type 2', filter='column3==1', 
            targets={
                'column1': [33.2, 33.40, 33.40],
                'column2': [11.11, 11.11, 11.11, 11.11, 11.11, 11.11, 11.11, 11.11, 11.11]
            })
        self.scheme_A2.add_group(name='Senior Type 3', filter='column3==3',
            targets={
                'column1': [37.1, 33.2, 29.7],
                'column2': [13.3, 23.13, 14.32, 4.78, 4.70, 2.65, 2.61, 3.47, 31.04]
            })
        self.scheme_A2.add_group(name='Senior Type 4', filter='column3==4',
            targets={
                'column1': [37.1, 33.2, 29.7],
                'column2': [12.00, 23.13, 14.32, 4.78, 4.70, 2.65, 2.61, 3.47, 32.34]
            })

        self.scheme_A3 = Rim(self.scheme_name_A3)
        self.scheme_A3.lists = ['profile_gender']
        self.scheme_A3.targets = {'profile_gender' : [47, 53]}
        self.scheme_A3.add_group(
            name='11-19', filter='age_group=2', targets=self.scheme_A3.targets
        )
        self.scheme_A3.add_group(
            name='31-39', filter='age_group=4', targets=self.scheme_A3.targets
        )
        self.scheme_A3.add_group(
            name='41-49', filter='age_group=5', targets=self.scheme_A3.targets
        )
        self.scheme_A3.add_group(
            name='51-59', filter='age_group=6', targets=self.scheme_A3.targets
        )
        self.scheme_A3.group_targets({
             '11-19': 25, 
             '31-39': 25, 
             '41-49': 25, 
             '51-59': 25
        })
        
        ''' Complex engine with meta - engine_B
        '''
        path_data = 'tests/engine_B_data.csv'
        path_meta = 'tests/engine_B_meta.json'

        data = pd.read_csv(path_data)
        meta = json.load(file(path_meta))

        self.scheme_name_B1 = 'scheme_name_B1'

        engine_B = WeightEngine(data=data, meta=meta)
        
        # Setup schemes to use in tests
        self.scheme_B1 = Rim(self.scheme_name_B1)
        self.scheme_B1.lists = ['profile_gender', 'age_group']
        # self.scheme_B1.set_targets()

    def test_constructor(self):
        path_data = 'tests/engine_B_data.csv'
        path_meta = 'tests/engine_B_meta.json'

        data = pd.read_csv(path_data)
        meta = json.load(file(path_meta))
        
        engine_B = WeightEngine(data=data, meta=meta)

        self.assertIsNotNone(engine_B._df)
        self.assertTrue(engine_B.dropna)
        self.assertEqual(engine_B.schemes, {})
        self.assertIsInstance(engine_B.schemes, dict)

    def test_add_scheme_and_dataframe(self):
        #A list of scheme names used in setUp used for comparison
        scheme_names = [self.scheme_name_A1, self.scheme_name_A2]

        self.engine_A.add_scheme(scheme=self.scheme_A2, key='identity')
        # Should now contain a dict with scheme_name_A2 as the first key
        self.assertEqual(self.engine_A.schemes.keys()[0], self.scheme_name_A2)

        self.engine_A.add_scheme(scheme=self.scheme_A1, key='identity')
        # Should now contain a dict with scheme_name_A2 and scheme_name_A1 as keys
        for key in self.engine_A.schemes:
            self.assertIn(key, scheme_names)
            self.assertIn('identity', self.engine_A.schemes[key]['key'])

        # Sets weights_scheme_name_A1 and weights_scheme_name_A2 to ones
        self.engine_A._df[self.scheme_A1.weight_name()] = pd.np.ones(len(self.engine_A._df))
        self.engine_A._df[self.scheme_A2.weight_name()] = pd.np.ones(len(self.engine_A._df))

        for key in self.engine_A.schemes:
            weight_scheme = self.engine_A._df['weights_'+key]
            boolean_vector = (weight_scheme == pd.np.ones(len(weight_scheme)))
            self.assertTrue(boolean_vector.all())
            self.engine_A.run(schemes=[key])
            boolean_vector = (weight_scheme == pd.np.ones(len(weight_scheme)))
            self.assertFalse(boolean_vector.all())

    def test_add_scheme_no_key(self):
        self.engine_A.add_scheme(scheme=self.scheme_A1, key='identity')
        self.assertIsNotNone(self.engine_A.schemes[self.scheme_name_A1]['key'])

    def test_weight_lazy(self):
        return
        self.engine_A.add_scheme(scheme=self.scheme_A2, key='identity')
        self.engine_A.add_scheme(scheme=self.scheme_A1, key='identity')
        self.assertNotIn('weights_scheme_name_A2', self.engine_A._df.columns)
        self.engine_A.weight()

        self.assertIn('weights_%s' % self.scheme_name_A1, self.engine_A._df.columns)
        self.assertIn('weights_%s' % self.scheme_name_A2, self.engine_A._df.columns)

    def test_group_targets(self):
        path_data = 'tests/engine_B_data.csv'
        path_meta = 'tests/engine_B_meta.json'

        data = pd.read_csv(path_data)
        meta = json.load(file(path_meta))
        
        weight = '_'.join(
            ['weights', 
             self.scheme_name_A3]
        )
        
        # Run weights for scheme_A3
        engine_B = WeightEngine(data=data, meta=meta)
        engine_B.add_scheme(scheme=self.scheme_A3, key='identity')
        engine_B.run()

        data_A3 = engine_B.dataframe("scheme_name_A3")
        
        # check identical weighted column frequencies
        df = data_A3.pivot_table(
            values=[weight], 
            index=['profile_gender'], 
            columns=['age_group'], 
            aggfunc='sum'
        )  
        for column in df.columns.tolist():
            self.assertTrue(
                numpy.allclose(df[column].values, numpy.array([1.645, 1.855]))
            ) 
        
        #check the weight column counts & sum are equal to index length (14)
        a = numpy.asscalar(data_A3[weight].count())
        b = numpy.asscalar(data_A3[weight].sum())
        c = data_A3.shape[0]
        self.assertTrue(int(a) == int(b) == int(c))

        # check weighted group frequencies have euqal proportions
        values = data_A3.pivot_table(
            values=[weight], 
            index=['age_group'], 
            aggfunc='sum'
        ).values
        self.assertTrue(numpy.allclose(values, 3.5))
Example #4
0
class TestEngine(unittest.TestCase):
    def setUp(self):
        ''' Simple engine without meta - engine_A
        '''
        path_data = 'tests/engine_A_data.csv'
        data = pd.read_csv(path_data)

        # Setup engine_A
        self.engine_A = WeightEngine(data=data)

        self.scheme_name_A1 = 'scheme_name_A1'
        self.scheme_name_A2 = 'scheme_name_A2'
        self.scheme_name_A3 = 'scheme_name_A3'

        # Setup schemes to use in tests
        self.scheme_A1 = Rim(self.scheme_name_A1)
        self.scheme_A1.lists = ['column1', 'column2']
        self.scheme_A1.add_group(
            name='Senior Type 1',
            filter='column3==1',
            targets={
                'column1': [32.00, 31.00, 37.00],
                'column2':
                [23.13, 14.32, 4.78, 4.70, 2.65, 2.61, 3.47, 31.04, 13.3]
            })
        self.scheme_A1.add_group(name='Senior Type 2',
                                 filter='column3==1',
                                 targets={
                                     'column1': [33.40, 33.40, 33.20],
                                     'column2': [
                                         11.11, 11.11, 11.11, 11.11, 11.11,
                                         11.11, 11.11, 11.11, 11.11
                                     ]
                                 })
        self.scheme_A1.add_group(
            name='Senior Type 3',
            filter='column3==3',
            targets={
                'column1': [33.2, 29.7, 37.1],
                'column2':
                [23.13, 14.32, 4.78, 4.70, 2.65, 2.61, 3.47, 31.04, 13.3]
            })
        self.scheme_A1.add_group(
            name='Senior Type 4',
            filter='column3==4',
            targets={
                'column1': [33.2, 29.7, 37.1],
                'column2':
                [23.13, 14.32, 4.78, 4.70, 2.65, 2.61, 3.47, 32.34, 12.00]
            })

        self.scheme_A2 = Rim(self.scheme_name_A2)
        self.scheme_A2.lists = ['column1', 'column2']
        self.scheme_A2.add_group(
            name='Senior Type 1',
            filter='column3==1',
            targets={
                'column1': [37.00, 32.00, 31.00],
                'column2':
                [13.3, 23.13, 14.32, 4.78, 4.70, 2.65, 2.61, 3.47, 31.04]
            })
        self.scheme_A2.add_group(name='Senior Type 2',
                                 filter='column3==1',
                                 targets={
                                     'column1': [33.2, 33.40, 33.40],
                                     'column2': [
                                         11.11, 11.11, 11.11, 11.11, 11.11,
                                         11.11, 11.11, 11.11, 11.11
                                     ]
                                 })
        self.scheme_A2.add_group(
            name='Senior Type 3',
            filter='column3==3',
            targets={
                'column1': [37.1, 33.2, 29.7],
                'column2':
                [13.3, 23.13, 14.32, 4.78, 4.70, 2.65, 2.61, 3.47, 31.04]
            })
        self.scheme_A2.add_group(
            name='Senior Type 4',
            filter='column3==4',
            targets={
                'column1': [37.1, 33.2, 29.7],
                'column2':
                [12.00, 23.13, 14.32, 4.78, 4.70, 2.65, 2.61, 3.47, 32.34]
            })

        self.scheme_A3 = Rim(self.scheme_name_A3)
        self.scheme_A3.lists = ['profile_gender']
        self.scheme_A3.targets = {'profile_gender': [47, 53]}
        self.scheme_A3.add_group(name='11-19',
                                 filter='age_group=2',
                                 targets=self.scheme_A3.targets)
        self.scheme_A3.add_group(name='31-39',
                                 filter='age_group=4',
                                 targets=self.scheme_A3.targets)
        self.scheme_A3.add_group(name='41-49',
                                 filter='age_group=5',
                                 targets=self.scheme_A3.targets)
        self.scheme_A3.add_group(name='51-59',
                                 filter='age_group=6',
                                 targets=self.scheme_A3.targets)
        self.scheme_A3.group_targets({
            '11-19': 25,
            '31-39': 25,
            '41-49': 25,
            '51-59': 25
        })
        ''' Complex engine with meta - engine_B
        '''
        path_data = 'tests/engine_B_data.csv'
        path_meta = 'tests/engine_B_meta.json'

        data = pd.read_csv(path_data)
        meta = json.load(file(path_meta))

        self.scheme_name_B1 = 'scheme_name_B1'

        engine_B = WeightEngine(data=data, meta=meta)

        # Setup schemes to use in tests
        self.scheme_B1 = Rim(self.scheme_name_B1)
        self.scheme_B1.lists = ['profile_gender', 'age_group']
        # self.scheme_B1.set_targets()

    def test_constructor(self):
        path_data = 'tests/engine_B_data.csv'
        path_meta = 'tests/engine_B_meta.json'

        data = pd.read_csv(path_data)
        meta = json.load(file(path_meta))

        engine_B = WeightEngine(data=data, meta=meta)

        self.assertIsNotNone(engine_B._df)
        self.assertTrue(engine_B.dropna)
        self.assertEqual(engine_B.schemes, {})
        self.assertIsInstance(engine_B.schemes, dict)

    def test_add_scheme_and_dataframe(self):
        #A list of scheme names used in setUp used for comparison
        scheme_names = [self.scheme_name_A1, self.scheme_name_A2]

        self.engine_A.add_scheme(scheme=self.scheme_A2, key='identity')
        # Should now contain a dict with scheme_name_A2 as the first key
        self.assertEqual(self.engine_A.schemes.keys()[0], self.scheme_name_A2)

        self.engine_A.add_scheme(scheme=self.scheme_A1, key='identity')
        # Should now contain a dict with scheme_name_A2 and scheme_name_A1 as keys
        for key in self.engine_A.schemes:
            self.assertIn(key, scheme_names)
            self.assertIn('identity', self.engine_A.schemes[key]['key'])

        # Sets weights_scheme_name_A1 and weights_scheme_name_A2 to ones
        self.engine_A._df[self.scheme_A1.weight_name()] = pd.np.ones(
            len(self.engine_A._df))
        self.engine_A._df[self.scheme_A2.weight_name()] = pd.np.ones(
            len(self.engine_A._df))

        for key in self.engine_A.schemes:
            weight_scheme = self.engine_A._df['weights_' + key]
            boolean_vector = (weight_scheme == pd.np.ones(len(weight_scheme)))
            self.assertTrue(boolean_vector.all())
            self.engine_A.run(schemes=[key])
            boolean_vector = (weight_scheme == pd.np.ones(len(weight_scheme)))
            self.assertFalse(boolean_vector.all())

    def test_add_scheme_no_key(self):
        self.engine_A.add_scheme(scheme=self.scheme_A1, key='identity')
        self.assertIsNotNone(self.engine_A.schemes[self.scheme_name_A1]['key'])

    def test_weight_lazy(self):
        return
        self.engine_A.add_scheme(scheme=self.scheme_A2, key='identity')
        self.engine_A.add_scheme(scheme=self.scheme_A1, key='identity')
        self.assertNotIn('weights_scheme_name_A2', self.engine_A._df.columns)
        self.engine_A.weight()

        self.assertIn('weights_%s' % self.scheme_name_A1,
                      self.engine_A._df.columns)
        self.assertIn('weights_%s' % self.scheme_name_A2,
                      self.engine_A._df.columns)

    def test_group_targets(self):
        path_data = 'tests/engine_B_data.csv'
        path_meta = 'tests/engine_B_meta.json'

        data = pd.read_csv(path_data)
        meta = json.load(file(path_meta))

        weight = '_'.join(['weights', self.scheme_name_A3])

        # Run weights for scheme_A3
        engine_B = WeightEngine(data=data, meta=meta)
        engine_B.add_scheme(scheme=self.scheme_A3, key='identity')
        engine_B.run()

        data_A3 = engine_B.dataframe("scheme_name_A3")

        # check identical weighted column frequencies
        df = data_A3.pivot_table(values=[weight],
                                 index=['profile_gender'],
                                 columns=['age_group'],
                                 aggfunc='sum')
        for column in df.columns.tolist():
            self.assertTrue(
                numpy.allclose(df[column].values, numpy.array([1.645, 1.855])))

        #check the weight column counts & sum are equal to index length (14)
        a = numpy.asscalar(data_A3[weight].count())
        b = numpy.asscalar(data_A3[weight].sum())
        c = data_A3.shape[0]
        self.assertTrue(int(a) == int(b) == int(c))

        # check weighted group frequencies have euqal proportions
        values = data_A3.pivot_table(values=[weight],
                                     index=['age_group'],
                                     aggfunc='sum').values
        self.assertTrue(numpy.allclose(values, 3.5))