Example #1
0
 def test_interlock(self):
     dataset = self._get_dataset()
     data = dataset._data
     name, lab = 'q4AgeGen', 'q4 Age Gender'
     variables = [
         'q4', {
             'age': [(1, '18-35', {
                 'age': frange('18-35')
             }), (2, '30-49', {
                 'age': frange('30-49')
             }), (3, '50+', {
                 'age': is_ge(50)
             })]
         }, 'gender'
     ]
     dataset.interlock(name, lab, variables)
     val = [1367, 1109, 1036, 831, 736, 579, 571, 550, 454, 438, 340, 244]
     ind = [
         '10;', '8;', '9;', '7;', '3;', '8;10;', '1;', '4;', '2;', '7;9;',
         '1;3;', '2;4;'
     ]
     s = pd.Series(val, index=ind, name='q4AgeGen')
     self.assertTrue(all(s == data['q4AgeGen'].value_counts()))
     values = [(1, u'Yes/18-35/Male'), (2, u'Yes/18-35/Female'),
               (3, u'Yes/30-49/Male'), (4, u'Yes/30-49/Female'),
               (5, u'Yes/50+/Male'), (6, u'Yes/50+/Female'),
               (7, u'No/18-35/Male'), (8, u'No/18-35/Female'),
               (9, u'No/30-49/Male'), (10, u'No/30-49/Female'),
               (11, u'No/50+/Male'), (12, u'No/50+/Female')]
     text = 'q4 Age Gender'
     self.assertEqual(values, dataset.values('q4AgeGen'))
     self.assertEqual(text, dataset.text('q4AgeGen'))
     self.assertTrue(dataset.is_delimited_set('q4AgeGen'))
Example #2
0
 def test_derotate_df(self):
     dataset = self._get_dataset()
     levels = {'visit': ['visit_1', 'visit_2', 'visit_3']}
     mapper = [{'q14r{:02}'.format(r): ['q14r{0:02}c{1:02}'.format(r, c)
               for c in range(1, 4)]} for r in frange('1-5')]
     ds = dataset.derotate(levels, mapper, 'gender', 'record_number')
     df_h = ds._data.head(10)
     df_val = [[x if not np.isnan(x) else 'nan' for x in line]
               for line in df_h.values.tolist()]
     result_df = [[1.0, 2.0, 1.0, 4.0, 4.0, 4.0, 8.0, 1.0, 2.0, 4.0, 2.0, 3.0, 1.0],
                  [1.0, 2.0, 2.0, 4.0, 4.0, 4.0, 8.0, 3.0, 3.0, 2.0, 4.0, 3.0, 1.0],
                  [1.0, 3.0, 1.0, 1.0, 1.0, 8.0, 'nan', 4.0, 3.0, 1.0, 3.0, 1.0, 2.0],
                  [1.0, 4.0, 1.0, 5.0, 5.0, 4.0, 8.0, 2.0, 3.0, 2.0, 3.0, 1.0, 1.0],
                  [1.0, 4.0, 2.0, 4.0, 5.0, 4.0, 8.0, 2.0, 1.0, 3.0, 2.0, 1.0, 1.0],
                  [1.0, 5.0, 1.0, 3.0, 3.0, 5.0, 8.0, 4.0, 2.0, 2.0, 1.0, 3.0, 1.0],
                  [1.0, 5.0, 2.0, 5.0, 3.0, 5.0, 8.0, 3.0, 3.0, 3.0, 1.0, 2.0, 1.0],
                  [1.0, 6.0, 1.0, 2.0, 2.0, 8.0, 'nan', 4.0, 2.0, 3.0, 4.0, 2.0, 1.0],
                  [1.0, 7.0, 1.0, 3.0, 3.0, 3.0, 8.0, 2.0, 1.0, 3.0, 2.0, 4.0, 1.0],
                  [1.0, 7.0, 2.0, 3.0, 3.0, 3.0, 8.0, 3.0, 2.0, 1.0, 2.0, 3.0, 1.0]]
     result_columns = ['@1', 'record_number', 'visit', 'visit_levelled',
                       'visit_1', 'visit_2', 'visit_3', 'q14r01', 'q14r02',
                       'q14r03', 'q14r04', 'q14r05', 'gender']
     df_len = 18520
     self.assertEqual(df_val, result_df)
     self.assertEqual(df_h.columns.tolist(), result_columns)
     self.assertEqual(len(ds._data.index), df_len)
     path_json = '{}/{}.json'.format(ds.path, ds.name)
     path_csv = '{}/{}.csv'.format(ds.path, ds.name)
     os.remove(path_json)
     os.remove(path_csv)
Example #3
0
 def test_add_y_on_y(self):
     batch, ds = _get_batch('test', full=True)
     b_meta = _get_meta(batch)
     batch.add_y_on_y('cross', {'age': frange('20-30')}, 'extend')
     batch.add_y_on_y('back', None, 'replace')
     self.assertEqual(b_meta['y_filter_map']['back'], None)
     self.assertEqual(b_meta['y_on_y'], ['cross', 'back'])
Example #4
0
 def test_derotate_meta(self):
     dataset = self._get_dataset()
     levels = {'visit': ['visit_1', 'visit_2', 'visit_3']}
     mapper = [{'q14r{:02}'.format(r): ['q14r{0:02}c{1:02}'.format(r, c)
               for c in range(1, 4)]} for r in frange('1-5')]
     ds = dataset.derotate(levels, mapper, 'gender', 'record_number')
     err = ds.validate(False)
     err_s = None
     self.assertEqual(err_s, err)
     path_json = '{}/{}.json'.format(ds.path, ds.name)
     path_csv = '{}/{}.csv'.format(ds.path, ds.name)
     os.remove(path_json)
     os.remove(path_csv)
Example #5
0
 def test_from_batch(self):
     ds = _get_dataset()
     ds.force_texts('de-DE', 'en-GB')
     batch1, ds = _get_batch('test1', ds, full=True)
     batch1.set_language('de-DE')
     batch1.hiding('q1', frange('8,9,96-99'))
     batch1.slicing('q1', frange('9-4'))
     batch2, ds = _get_batch('test2', ds)
     batch2.add_downbreak('q1')
     batch2.add_crossbreak('Wave')
     batch2.as_addition('test1')
     n_ds = ds.from_batch('test1', 'RecordNo', 'de-DE', True, 'variables')
     self.assertEqual(n_ds.codes('q1'), [7, 6, 5, 4])
     self.assertEqual(n_ds.variables(), [
         u'age', u'gender', u'q1', u'q2', u'q6', u'q8a', u'q9a', u'Wave',
         u'weight_a', u'RecordNo'
     ])
     self.assertEqual(n_ds['gender'].value_counts().values.tolist(), [3952])
     self.assertEqual(n_ds.value_texts('gender', 'en-GB'), [None, None])
     self.assertEqual(n_ds.value_texts('gender', 'de-DE'),
                      [u'Male', u'Female'])
     self.assertRaises(ValueError, ds.from_batch, 'test1', 'RecordNo',
                       'fr-FR')
Example #6
0
 def test_extend_filter(self):
     batch, ds = _get_batch('test', full=True)
     b_meta = _get_meta(batch)
     ext_filters = {
         'q1': {
             'age': frange('20-25')
         },
         ('q2', 'q6'): {
             'age': frange('30-35')
         }
     }
     batch.extend_filter(ext_filters)
     filter_names = [
         'men_only', 'men_only_q1', 'men_only_q2', 'men_only_q6'
     ]
     self.assertEqual(b_meta['filter_names'], filter_names)
     x_filter_map = OrderedDict([('q1', 'men_only_q1'), ('q2',
                                                         'men_only_q2'),
                                 ('q6', 'men_only_q6'),
                                 ('q6_1', 'men_only_q6'),
                                 ('q6_2', 'men_only_q6'),
                                 ('q6_3', 'men_only_q6'),
                                 ('age', 'men_only')])
     self.assertEqual(b_meta['x_filter_map'], x_filter_map)
Example #7
0
    def show_items(self, array, text_key=None):
        """
		Display items of arrays in different DataSets.

		Parameters
		----------
		array: str/ list of str
			Displays items for these variables.
		text_key: str
			Text key for text-based label information. Can be provided as
			``'x edits~tk'`` or ``'y edits~tk'``, then the edited text is taken.
			If None is provided, the item name will be diplayed instead of the
			the item label.
		"""
        if not text_key:
            label = False
            etk = None
        else:
            label = True
            text_key = text_key.split('~')
            etk = text_key[1].split()[0] if len(text_key) > 1 else None
            text_key = text_key[0]
        df_all_v = []
        for a in array:
            if not self._is_array(a):
                raise ValueError('{} is not an array.'.format(a))
            all_df = []
            for name in list(self.ds_alias.values()):
                ds = self[name]
                if a in ds:
                    if label:
                        val = [
                            ds.text(s, True, text_key, etk)
                            for s in ds.sources(a)
                        ]
                        ind = ds.sources(a)
                    else:
                        val = ds.sources(a)
                        ind = frange('1-{}'.format(len(val)))
                    index = pd.MultiIndex.from_tuples([(a, n) for n in ind])
                    df = pd.DataFrame(val, index=index, columns=[name])
                    all_df.append(df)
            all_df = pd.concat(all_df, axis=1)
            df_all_v.append(all_df)
        if not df_all_v:
            print('No variables to show.')
        else:
            return pd.concat(df_all_v, axis=0)
Example #8
0
 def test_filter(self):
     dataset = self._get_dataset()
     f = intersection([{'gender': [2]},
                       {'age': frange('35-45')}])
     alias = 'men: 35 to 45 years old'
     dataset.filter(alias, f, inplace=True)
     # alias copied correctly?
     self.assertEqual(dataset.filtered, alias)
     # correctly sliced?
     expected_index_len = 1509
     self.assertEqual(len(dataset._data.index), expected_index_len)
     self.assertEqual(dataset['age'].value_counts().sum(), expected_index_len)
     expected_gender_codes = [2]
     expected_age_codes = [35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45]
     self.assertTrue(dataset['gender'].value_counts().index.tolist() ==
                     expected_gender_codes)
     self.assertTrue(sorted(dataset['age'].value_counts().index.tolist()) ==
                     expected_age_codes)
Example #9
0
 def test_derotate_freq(self):
     dataset = self._get_dataset()
     levels = {'visit': ['visit_1', 'visit_2', 'visit_3']}
     mapper = [{'q14r{:02}'.format(r): ['q14r{0:02}c{1:02}'.format(r, c)
               for c in range(1, 4)]} for r in frange('1-5')]
     ds = dataset.derotate(levels, mapper, 'gender', 'record_number')
     val_c = {'visit': {'val': {1: 8255, 2: 6174, 3: 4091},
                'index': [1, 2, 3]},
              'visit_levelled': {'val': {4: 3164, 1: 3105, 5: 3094, 6: 3093, 3: 3082, 2: 2982},
                                'index': [4, 1, 5, 6, 3,2]},
              'visit_1': {'val': {4: 3225, 6: 3136, 3: 3081, 2: 3069, 1: 3029, 5: 2980},
                          'index': [4, 6, 3, 2, 1, 5]},
              'visit_2': {'val': {1: 2789, 6: 2775, 5: 2765, 3: 2736, 4: 2709, 2: 2665, 8: 2081},
                          'index': [1, 6, 5, 3, 4, 2, 8]},
              'visit_3': {'val': {8: 4166, 5: 2181, 4: 2112, 3: 2067, 1: 2040, 6: 2001, 2: 1872},
                          'index': [8, 5, 4, 3, 1, 6, 2]},
              'q14r01': {'val': {3: 4683, 1: 4653, 4: 4638, 2: 4546},
                         'index': [3, 1, 4, 2]},
              'q14r02': {'val': {4: 4749, 2: 4622, 1: 4598, 3: 4551},
                         'index': [4, 2, 1, 3]},
              'q14r03': {'val': {1: 4778, 4: 4643, 3: 4571, 2: 4528},
                         'index': [1, 4, 3, 2]},
              'q14r04': {'val': {1: 4665, 2: 4658, 4: 4635, 3: 4562},
                         'index': [1, 2, 4, 3]},
              'q14r05': {'val': {2: 4670, 4: 4642, 1: 4607, 3: 4601},
                        'index': [2, 4, 1, 3]},
              'gender': {'val': {2: 9637, 1: 8883},
                         'index': [2, 1]}}
     for var in val_c.keys():
         series = pd.Series(val_c[var]['val'], index = val_c[var]['index'])
         compare = all(series == ds._data[var].value_counts())
         self.assertTrue(compare)
     path_json = '{}/{}.json'.format(ds.path, ds.name)
     path_csv = '{}/{}.csv'.format(ds.path, ds.name)
     os.remove(path_json)
     os.remove(path_csv)
Example #10
0
 def test_extend_filter(self):
     batch, ds = _get_batch('test', full=True)
     b_meta = _get_meta(batch)
     ext_filters = {
         'q1': {
             'age': frange('20-25')
         },
         ('q2', 'q6'): {
             'age': frange('30-35')
         }
     }
     batch.extend_filter(ext_filters)
     filter_names = [
         'men only', '(men only)+(q1)', '(men only)+(q2)',
         '(men only)+(q6)', '(men only)+(q6_1)', '(men only)+(q6_2)',
         '(men only)+(q6_3)'
     ]
     self.assertEqual(b_meta['filter_names'], filter_names)
     x_filter_map = OrderedDict([('q1', {
         '(men only)+(q1)':
         intersection([{
             'gender': 1
         }, {
             'age': [20, 21, 22, 23, 24, 25]
         }])
     }),
                                 ('q2', {
                                     '(men only)+(q2)':
                                     intersection([{
                                         'gender': 1
                                     }, {
                                         'age': [30, 31, 32, 33, 34, 35]
                                     }])
                                 }),
                                 ('q6', {
                                     '(men only)+(q6)':
                                     intersection([{
                                         'gender': 1
                                     }, {
                                         'age': [30, 31, 32, 33, 34, 35]
                                     }])
                                 }),
                                 (u'q6_1', {
                                     '(men only)+(q6_1)':
                                     intersection([{
                                         'gender': 1
                                     }, {
                                         'age': [30, 31, 32, 33, 34, 35]
                                     }])
                                 }),
                                 (u'q6_2', {
                                     '(men only)+(q6_2)':
                                     intersection([{
                                         'gender': 1
                                     }, {
                                         'age': [30, 31, 32, 33, 34, 35]
                                     }])
                                 }),
                                 (u'q6_3', {
                                     '(men only)+(q6_3)':
                                     intersection([{
                                         'gender': 1
                                     }, {
                                         'age': [30, 31, 32, 33, 34, 35]
                                     }])
                                 }), ('age', {
                                     'men only': {
                                         'gender': 1
                                     }
                                 })])
     self.assertEqual(b_meta['x_filter_map'], x_filter_map)