def _combine_filters(self, ext_filters): """ Combines existing filter in ``self.filter`` with additional filters. Parameters ---------- ext_filters: dict dict with variable name as key, str or tupel of str, and logic as value. For example: ext_filters = {'q1': {'gender': 1}} Returns ------- new_filter: dict {'new_filter_name': intersection([old_logic, new_logic])} """ old_filter = self.filter no_global_filter = old_filter == 'no_filter' if no_global_filter: combined_name = '(no_filter)+({})'.format(ext_filters.keys()[0]) new_filter = {combined_name: ext_filters.values()[0]} else: old_filter_name = old_filter.keys()[0] old_filter_logic = old_filter.values()[0] new_filter_name = ext_filters.keys()[0] new_filter_logic = ext_filters.values()[0] combined_name = '({})+({})'.format(old_filter_name, new_filter_name) combined_logic = intersection([old_filter_logic, new_filter_logic]) new_filter = {combined_name: combined_logic} return new_filter
def test_nested_logic_list(self): q2 = self.example_data_A_data['q2'] test_logic = intersection( [union([has_all([1, 2]), has_any([3, 4])]), has_count([3])]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, has_all([1, 2])) idx2, vkey2 = get_logic_index(q2, has_any([3, 4])) idx3, vkey3 = get_logic_index(q2, has_count([3])) self.assertItemsEqual(idx, idx1.union(idx2).intersection(idx3)) self.assertEqual(vkey, 'x[(({1&2},{3,4})&{3})]:y')
def test_logic_list(self): q2 = self.example_data_A_data['q2'] test_logic = union([has_all([1, 2]), has_any([3, 4]), has_count([3])]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, test_logic[1][0]) idx2, vkey2 = get_logic_index(q2, test_logic[1][1]) idx3, vkey3 = get_logic_index(q2, test_logic[1][2]) self.assertCountEqual(idx, idx1.union(idx2).union(idx3)) self.assertEqual(vkey, 'x[({1&2},{3,4},{3})]:y') q2 = self.example_data_A_data['q2'] test_logic = intersection( [has_all([1, 2]), has_any([3, 4]), has_count([3])]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, test_logic[1][0]) idx2, vkey2 = get_logic_index(q2, test_logic[1][1]) idx3, vkey3 = get_logic_index(q2, test_logic[1][2]) self.assertCountEqual(idx, idx1.intersection(idx2).intersection(idx3)) self.assertEqual(vkey, 'x[({1&2}&{3,4}&{3})]:y') q2 = self.example_data_A_data['q2'] test_logic = difference( [has_all([1, 2]), has_any([3, 4]), has_count([3])]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, test_logic[1][0]) idx2, vkey2 = get_logic_index(q2, test_logic[1][1]) idx3, vkey3 = get_logic_index(q2, test_logic[1][2]) self.assertCountEqual(idx, idx1.difference(idx2).difference(idx3)) self.assertEqual(vkey, 'x[({1&2}~{3,4}~{3})]:y') q2 = self.example_data_A_data['q2'] test_logic = symmetric_difference( [has_all([1, 2]), has_any([3, 4]), has_count([3])]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, test_logic[1][0]) idx2, vkey2 = get_logic_index(q2, test_logic[1][1]) idx3, vkey3 = get_logic_index(q2, test_logic[1][2]) if pd.__version__ == '0.19.2': self.assertCountEqual( idx, idx1.symmetric_difference(idx2).symmetric_difference(idx3)) else: self.assertCountEqual( idx, idx1.symmetric_difference(idx2).symmetric_difference(idx3)) self.assertEqual(vkey, 'x[({1&2}^{3,4}^{3})]:y')
def test_filter(self): dataset = self._get_dataset() f = intersection([{'gender': [2]}, {'age': frange('35-45')}]) alias = 'men: 35 to 45 years old' dataset.filter(alias, f, inplace=True) # alias copied correctly? self.assertEqual(dataset.filtered, alias) # correctly sliced? expected_index_len = 1509 self.assertEqual(len(dataset._data.index), expected_index_len) self.assertEqual(dataset['age'].value_counts().sum(), expected_index_len) expected_gender_codes = [2] expected_age_codes = [35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45] self.assertTrue(dataset['gender'].value_counts().index.tolist() == expected_gender_codes) self.assertTrue(sorted(dataset['age'].value_counts().index.tolist()) == expected_age_codes)
def test_logic_list(self): q2 = self.example_data_A_data['q2'] test_logic = union([has_all([1, 2]), has_any([3, 4]), has_count([3])]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, test_logic[1][0]) idx2, vkey2 = get_logic_index(q2, test_logic[1][1]) idx3, vkey3 = get_logic_index(q2, test_logic[1][2]) self.assertItemsEqual(idx, idx1.union(idx2).union(idx3)) self.assertEqual(vkey, 'x[((1&2),(3,4),{3})]:y') q2 = self.example_data_A_data['q2'] test_logic = intersection( [has_all([1, 2]), has_any([3, 4]), has_count([3])]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, test_logic[1][0]) idx2, vkey2 = get_logic_index(q2, test_logic[1][1]) idx3, vkey3 = get_logic_index(q2, test_logic[1][2]) self.assertItemsEqual(idx, idx1.intersection(idx2).intersection(idx3)) self.assertEqual(vkey, 'x[((1&2)&(3,4)&{3})]:y') q2 = self.example_data_A_data['q2'] test_logic = difference( [has_all([1, 2]), has_any([3, 4]), has_count([3])]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, test_logic[1][0]) idx2, vkey2 = get_logic_index(q2, test_logic[1][1]) idx3, vkey3 = get_logic_index(q2, test_logic[1][2]) self.assertItemsEqual(idx, idx1.difference(idx2).difference(idx3)) self.assertEqual(vkey, 'x[((1&2)~(3,4)~{3})]:y') q2 = self.example_data_A_data['q2'] test_logic = sym_diff( [has_all([1, 2]), has_any([3, 4]), has_count([3])]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, test_logic[1][0]) idx2, vkey2 = get_logic_index(q2, test_logic[1][1]) idx3, vkey3 = get_logic_index(q2, test_logic[1][2]) self.assertItemsEqual(idx, idx1.sym_diff(idx2).sym_diff(idx3)) self.assertEqual(vkey, 'x[((1&2)^(3,4)^{3})]:y')
def test_get_logic_key(self): logic = has_all([1, 2, 3], True) self.assertEqual( get_logic_key(logic), 'x[e{1&2&3}]:y' ) logic = has_count([is_ge(1), [5, 6, 7, 8, 9]]) self.assertEqual( get_logic_key(logic), 'x[(5,6,7,8,9){>=1}]:y' ) logic = not_count([is_ge(1), [5, 6, 7, 8, 9]]) self.assertEqual( get_logic_key(logic), 'x[(5,6,7,8,9)~{>=1}]:y' ) logic = union([ has_any([1, 2]), has_all([3, 4]), not_any([5, 6]) ]) self.assertEqual( get_logic_key(logic), 'x[({1,2},{3&4},~{5,6})]:y' ) logic = union([ intersection([ has_any([1, 2]), not_any([3]) ]), {'Wave': has_any([1,2])} ]) self.assertEqual( get_logic_key(logic, self.example_data_A_data), 'x[(({1,2}&~{3}),Wave={1,2})]:y' )
def test_get_logic_key(self): logic = has_all([1, 2, 3], True) self.assertEqual( get_logic_key(logic), 'x[e(1&2&3)]:y' ) logic = has_count([is_ge(1), [5, 6, 7, 8, 9]]) self.assertEqual( get_logic_key(logic), 'x[(5,6,7,8,9){>=1}]:y' ) logic = not_count([is_ge(1), [5, 6, 7, 8, 9]]) self.assertEqual( get_logic_key(logic), 'x[(5,6,7,8,9)~{>=1}]:y' ) logic = union([ has_any([1, 2]), has_all([3, 4]), not_any([5, 6]) ]) self.assertEqual( get_logic_key(logic), 'x[((1,2),(3&4),~(5,6))]:y' ) logic = union([ intersection([ has_any([1, 2]), not_any([3]) ]), {'Wave': has_any([1,2])} ]) self.assertEqual( get_logic_key(logic, self.example_data_A_data), 'x[(((1,2)&~(3)),Wave=(1,2))]:y' )
def test_get_logic_key(self): logic = has_all([1, 2, 3], True) self.assertEqual(get_logic_key(logic), 'x[e(1&2&3)]:y') logic = has_count([is_ge(1), [5, 6, 7, 8, 9]]) self.assertEqual(get_logic_key(logic), 'x[(5,6,7,8,9){>=1}]:y') logic = not_count([is_ge(1), [5, 6, 7, 8, 9]]) self.assertEqual(get_logic_key(logic), 'x[(5,6,7,8,9)~{>=1}]:y') logic = union([has_any([1, 2]), has_all([3, 4]), not_any([5, 6])]) self.assertEqual(get_logic_key(logic), 'x[((1,2),(3&4),~(5,6))]:y') logic = union([ intersection([has_any([1, 2]), not_any([3])]), { 'Wave': has_any([1, 2]) } ]) self.assertEqual(get_logic_key(logic, self.example_data_A_data), 'x[(((1,2)&~(3)),Wave=(1,2))]:y')
def test_nested_logic_list(self): q2 = self.example_data_A_data['q2'] test_logic = intersection([ union([ has_all([1, 2]), has_any([3, 4]) ]), has_count([3]) ]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, has_all([1, 2])) idx2, vkey2 = get_logic_index(q2, has_any([3, 4])) idx3, vkey3 = get_logic_index(q2, has_count([3])) self.assertItemsEqual( idx, idx1.union(idx2).intersection(idx3) ) self.assertEqual( vkey, 'x[(((1&2),(3,4))&{3})]:y' )
def test_extend_filter(self): batch, ds = _get_batch('test', full=True) b_meta = _get_meta(batch) ext_filters = { 'q1': { 'age': frange('20-25') }, ('q2', 'q6'): { 'age': frange('30-35') } } batch.extend_filter(ext_filters) filter_names = [ 'men only', '(men only)+(q1)', '(men only)+(q2)', '(men only)+(q6)', '(men only)+(q6_1)', '(men only)+(q6_2)', '(men only)+(q6_3)' ] self.assertEqual(b_meta['filter_names'], filter_names) x_filter_map = OrderedDict([('q1', { '(men only)+(q1)': intersection([{ 'gender': 1 }, { 'age': [20, 21, 22, 23, 24, 25] }]) }), ('q2', { '(men only)+(q2)': intersection([{ 'gender': 1 }, { 'age': [30, 31, 32, 33, 34, 35] }]) }), ('q6', { '(men only)+(q6)': intersection([{ 'gender': 1 }, { 'age': [30, 31, 32, 33, 34, 35] }]) }), (u'q6_1', { '(men only)+(q6_1)': intersection([{ 'gender': 1 }, { 'age': [30, 31, 32, 33, 34, 35] }]) }), (u'q6_2', { '(men only)+(q6_2)': intersection([{ 'gender': 1 }, { 'age': [30, 31, 32, 33, 34, 35] }]) }), (u'q6_3', { '(men only)+(q6_3)': intersection([{ 'gender': 1 }, { 'age': [30, 31, 32, 33, 34, 35] }]) }), ('age', { 'men only': { 'gender': 1 } })]) self.assertEqual(b_meta['x_filter_map'], x_filter_map)
def test_logic_list(self): q2 = self.example_data_A_data['q2'] test_logic = union([ has_all([1, 2]), has_any([3, 4]), has_count([3]) ]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, test_logic[1][0]) idx2, vkey2 = get_logic_index(q2, test_logic[1][1]) idx3, vkey3 = get_logic_index(q2, test_logic[1][2]) self.assertItemsEqual( idx, idx1.union(idx2).union(idx3) ) self.assertEqual( vkey, 'x[((1&2),(3,4),{3})]:y' ) q2 = self.example_data_A_data['q2'] test_logic = intersection([ has_all([1, 2]), has_any([3, 4]), has_count([3]) ]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, test_logic[1][0]) idx2, vkey2 = get_logic_index(q2, test_logic[1][1]) idx3, vkey3 = get_logic_index(q2, test_logic[1][2]) self.assertItemsEqual( idx, idx1.intersection(idx2).intersection(idx3) ) self.assertEqual( vkey, 'x[((1&2)&(3,4)&{3})]:y' ) q2 = self.example_data_A_data['q2'] test_logic = difference([ has_all([1, 2]), has_any([3, 4]), has_count([3]) ]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, test_logic[1][0]) idx2, vkey2 = get_logic_index(q2, test_logic[1][1]) idx3, vkey3 = get_logic_index(q2, test_logic[1][2]) self.assertItemsEqual( idx, idx1.difference(idx2).difference(idx3) ) self.assertEqual( vkey, 'x[((1&2)~(3,4)~{3})]:y' ) q2 = self.example_data_A_data['q2'] test_logic = sym_diff([ has_all([1, 2]), has_any([3, 4]), has_count([3]) ]) idx, vkey = get_logic_index(q2, test_logic) idx1, vkey1 = get_logic_index(q2, test_logic[1][0]) idx2, vkey2 = get_logic_index(q2, test_logic[1][1]) idx3, vkey3 = get_logic_index(q2, test_logic[1][2]) self.assertItemsEqual( idx, idx1.sym_diff(idx2).sym_diff(idx3) ) self.assertEqual( vkey, 'x[((1&2)^(3,4)^{3})]:y' )