def test__has_not_count_errors(self): # Test unsupported dtype series is given start_time = self.example_data_A_data['start_time'] start_time = start_time.astype(np.datetime64) # Test has version with self.assertRaises(TypeError) as error: test_values = [1, 2] # Test _has_count raises TypeError idx = _has_count(start_time, test_values) self.assertEqual( error.exception.message[:58], "The series given to has_count() must be a supported dtype." ) # Test not version with self.assertRaises(TypeError) as error: test_values = [1, 2] # Test _has_count raises TypeError idx = _not_count(start_time, test_values) self.assertEqual( error.exception.message[:58], "The series given to not_count() must be a supported dtype." )
def test__has_not_count_errors(self): # Test unsupported dtype series is given start_time = self.example_data_A_data['start_time'] start_time = start_time.astype(np.datetime64) # Test has version with self.assertRaises(TypeError) as error: test_values = [1, 2] # Test _has_count raises TypeError idx = _has_count(start_time, test_values) self.assertEqual( error.exception.message[:58], "The series given to has_count() must be a supported dtype." ) # Test not version with self.assertRaises(TypeError) as error: test_values = [1, 2] # Test _has_count raises TypeError idx = _not_count(start_time, test_values) self.assertEqual( error.exception.message[:58], "The series given to not_count() must be a supported dtype." )
def test__has_not_count(self): test_vars = [ 'q2', # Test on delimited set 'gender', # Test on single stored as int64 'locality' # Test on single stored as float64 ] # Test non-operator-lead logical comparisons for var_name in test_vars: test_var = self.example_data_A_data[var_name] response_tests = [ [1], [1, 2], [1, 3], [1, 3, [1, 2, 3]] ] for test_responses in response_tests: # Test _has_count returns correct results idx = _has_count(test_var, test_responses) # Determine min/max values for logic and # slice dummies column-wise for targeted values subset dummies, _min, _max = self.get_count_nums( test_var[idx], test_responses ) # Count the number of resposnes per row test_var_counts = dummies.sum(axis=1).unique() if len(test_responses)==1: # Test single targeted response count self.assertEqual(test_var_counts, [_min]) else: value_range = range(_min, _max+1) # Positive test range of response count self.assertTrue(all([ c in value_range for c in test_var_counts ])) # Test inverse index produced by not version not_idx = _not_count(test_var, test_responses) self.confirm_inverse_index(test_var, idx, not_idx) # Test operator-lead logical comparisons __op_symbol__ = { _is_lt: '<', _is_le: '<=', _is_eq: '', _is_ne: '!=', _is_ge: '>=', _is_gt: '>' } __op_map__ = { _is_lt: lt, _is_le: le, _is_eq: eq, _is_ne: ne, _is_ge: ge, _is_gt: gt } for op_func in [_is_lt, _is_le, _is_eq, _is_ne, _is_ge, _is_gt]: key_part = __op_symbol__[op_func] for var_name in test_vars: test_var = self.example_data_A_data[var_name] response_tests = [ [(op_func, 3)], [(op_func, 3), [1, 2, 3]] ] for test_responses in response_tests: # Test _has_count returns correct results idx = _has_count(test_var, test_responses) # Determine min/max values for logic and # slice dummies column-wise for targeted values subset dummies, dum_func, _max = self.get_count_nums( test_var[idx], test_responses ) numerator = dum_func[1] try: values = test_responses[1] values = [str(v) for v in values if str(v) in dummies.columns] dummies = dummies[values] except: pass # Count the number of resposnes per row test_var_counts = dummies.sum(axis=1).unique() # Positive test range of response count self.assertTrue(all(__op_map__[op_func]( test_var_counts, numerator ))) if op_func in [_is_ge, _is_eq] and numerator > 0: incl_na = False elif op_func in [_is_gt]: incl_na = False else: incl_na = True # Test inverse index produced by not version not_idx = _not_count(test_var, test_responses) self.confirm_inverse_index( test_var, idx, not_idx, incl_na ) # Test non-operator-lead logical comparisons with # exclusivity for var_name in test_vars: test_var = self.example_data_A_data[var_name] response_tests = [ [1, 3, [1, 2, 3]] ] for test_responses in response_tests: # Test _has_count returns correct results idx = _has_count(test_var, test_responses, True) # Determine min/max values for logic and # slice dummies column-wise for targeted values subset dummies, _min, _max = self.get_count_nums( test_var[idx], test_responses ) # Count the number of resposnes per row test_var_counts = dummies.sum(axis=1).unique() value_range = range(_min, _max+1) # Positive test range of response count self.assertTrue(all([ c in value_range for c in test_var_counts ])) # Negative test for exclusivity all_dummies = test_var.astype('object').str.get_dummies(';') other_cols = [ c for c in all_dummies.columns if not c in dummies.columns ] other_dummies = all_dummies[other_cols] other_any_mask = other_dummies.any(axis=1) other_dummies = other_dummies[other_any_mask] self.assertEqual( other_dummies.index.intersection(dummies.index).size, 0 ) # Test inverse index produced by not version not_idx = _not_count(test_var, test_responses, True) self.confirm_inverse_index(test_var, idx, not_idx)
def test__has_not_count(self): test_vars = [ 'q2', # Test on delimited set 'gender', # Test on single stored as int64 'locality' # Test on single stored as float64 ] # Test non-operator-lead logical comparisons for var_name in test_vars: test_var = self.example_data_A_data[var_name] response_tests = [ [1], [1, 2], [1, 3], [1, 3, [1, 2, 3]] ] for test_responses in response_tests: # Test _has_count returns correct results idx = _has_count(test_var, test_responses) # Determine min/max values for logic and # slice dummies column-wise for targeted values subset dummies, _min, _max = self.get_count_nums( test_var[idx], test_responses ) # Count the number of resposnes per row test_var_counts = dummies.sum(axis=1).unique() if len(test_responses)==1: # Test single targeted response count self.assertEqual(test_var_counts, [_min]) else: value_range = range(_min, _max+1) # Positive test range of response count self.assertTrue(all([ c in value_range for c in test_var_counts ])) # Test inverse index produced by not version not_idx = _not_count(test_var, test_responses) self.confirm_inverse_index(test_var, idx, not_idx) # Test operator-lead logical comparisons __op_symbol__ = { _is_lt: '<', _is_le: '<=', _is_eq: '', _is_ne: '!=', _is_ge: '>=', _is_gt: '>' } __op_map__ = { _is_lt: lt, _is_le: le, _is_eq: eq, _is_ne: ne, _is_ge: ge, _is_gt: gt } for op_func in [_is_lt, _is_le, _is_eq, _is_ne, _is_ge, _is_gt]: key_part = __op_symbol__[op_func] for var_name in test_vars: test_var = self.example_data_A_data[var_name] response_tests = [ [(op_func, 3)], [(op_func, 3), [1, 2, 3]] ] for test_responses in response_tests: # Test _has_count returns correct results idx = _has_count(test_var, test_responses) # Determine min/max values for logic and # slice dummies column-wise for targeted values subset dummies, dum_func, _max = self.get_count_nums( test_var[idx], test_responses ) numerator = dum_func[1] try: values = test_responses[1] values = [str(v) for v in values if str(v) in dummies.columns] dummies = dummies[values] except: pass # Count the number of resposnes per row test_var_counts = dummies.sum(axis=1).unique() # Positive test range of response count self.assertTrue(all(__op_map__[op_func]( test_var_counts, numerator ))) if op_func in [_is_ge, _is_eq] and numerator > 0: incl_na = False elif op_func in [_is_gt]: incl_na = False else: incl_na = True # Test inverse index produced by not version not_idx = _not_count(test_var, test_responses) self.confirm_inverse_index( test_var, idx, not_idx, incl_na ) # Test non-operator-lead logical comparisons with # exclusivity for var_name in test_vars: test_var = self.example_data_A_data[var_name] response_tests = [ [1, 3, [1, 2, 3]] ] for test_responses in response_tests: # Test _has_count returns correct results idx = _has_count(test_var, test_responses, True) # Determine min/max values for logic and # slice dummies column-wise for targeted values subset dummies, _min, _max = self.get_count_nums( test_var[idx], test_responses ) # Count the number of resposnes per row test_var_counts = dummies.sum(axis=1).unique() value_range = range(_min, _max+1) # Positive test range of response count self.assertTrue(all([ c in value_range for c in test_var_counts ])) # Negative test for exclusivity all_dummies = test_var.astype('object').str.get_dummies(';') other_cols = [ c for c in all_dummies.columns if not c in dummies.columns ] other_dummies = all_dummies[other_cols] other_any_mask = other_dummies.any(axis=1) other_dummies = other_dummies[other_any_mask] self.assertEqual( other_dummies.index.intersection(dummies.index).size, 0 ) # Test inverse index produced by not version not_idx = _not_count(test_var, test_responses, True) self.confirm_inverse_index(test_var, idx, not_idx)