Exemple #1
0
    def test__has_not_count_errors(self):

        # Test unsupported dtype series is given
        start_time = self.example_data_A_data['start_time']
        start_time = start_time.astype(np.datetime64)

        # Test has version
        with self.assertRaises(TypeError) as error:
            test_values = [1, 2]
            # Test _has_count raises TypeError
            idx = _has_count(start_time, test_values)
        self.assertEqual(
            error.exception.message[:58],
        "The series given to has_count() must be a supported dtype."
        )

        # Test not version
        with self.assertRaises(TypeError) as error:
            test_values = [1, 2]
            # Test _has_count raises TypeError
            idx = _not_count(start_time, test_values)
        self.assertEqual(
            error.exception.message[:58],
        "The series given to not_count() must be a supported dtype."
        )
 def test__has_not_count_errors(self):
       
     # Test unsupported dtype series is given
     start_time = self.example_data_A_data['start_time']
     start_time = start_time.astype(np.datetime64)
     
     # Test has version
     with self.assertRaises(TypeError) as error:
         test_values = [1, 2]
         # Test _has_count raises TypeError
         idx = _has_count(start_time, test_values)
     self.assertEqual(
         error.exception.message[:58],
     "The series given to has_count() must be a supported dtype."
     )
     
     # Test not version
     with self.assertRaises(TypeError) as error:
         test_values = [1, 2]
         # Test _has_count raises TypeError
         idx = _not_count(start_time, test_values)
     self.assertEqual(
         error.exception.message[:58],
     "The series given to not_count() must be a supported dtype."
     )
Exemple #3
0
    def test__has_not_count(self):

        test_vars = [
            'q2',       # Test on delimited set
            'gender',   # Test on single stored as int64
            'locality'  # Test on single stored as float64
        ]

        # Test non-operator-lead logical comparisons
        for var_name in test_vars:
            test_var = self.example_data_A_data[var_name]
            response_tests = [
                [1],
                [1, 2],
                [1, 3],
                [1, 3, [1, 2, 3]]
            ]
            for test_responses in response_tests:

                # Test _has_count returns correct results
                idx = _has_count(test_var, test_responses)

                # Determine min/max values for logic and
                # slice dummies column-wise for targeted values subset
                dummies, _min, _max = self.get_count_nums(
                    test_var[idx],
                    test_responses
                )

                # Count the number of resposnes per row
                test_var_counts = dummies.sum(axis=1).unique()

                if len(test_responses)==1:
                    # Test single targeted response count
                    self.assertEqual(test_var_counts, [_min])
                else:
                    value_range = range(_min, _max+1)
                    # Positive test range of response count
                    self.assertTrue(all([
                        c in value_range
                        for c in test_var_counts
                    ]))

                # Test inverse index produced by not version
                not_idx = _not_count(test_var, test_responses)
                self.confirm_inverse_index(test_var, idx, not_idx)

        # Test operator-lead logical comparisons
        __op_symbol__ = {
            _is_lt: '<', _is_le: '<=',
            _is_eq: '', _is_ne: '!=',
            _is_ge: '>=', _is_gt: '>'
        }
        __op_map__ = {
            _is_lt: lt, _is_le: le,
            _is_eq: eq, _is_ne: ne,
            _is_ge: ge, _is_gt: gt
        }
        for op_func in [_is_lt, _is_le, _is_eq, _is_ne, _is_ge, _is_gt]:
            key_part = __op_symbol__[op_func]

            for var_name in test_vars:
                test_var = self.example_data_A_data[var_name]
                response_tests = [
                    [(op_func, 3)],
                    [(op_func, 3), [1, 2, 3]]
                ]
                for test_responses in response_tests:

                    # Test _has_count returns correct results
                    idx = _has_count(test_var, test_responses)

                    # Determine min/max values for logic and
                    # slice dummies column-wise for targeted values subset
                    dummies, dum_func, _max = self.get_count_nums(
                        test_var[idx],
                        test_responses
                    )
                    numerator = dum_func[1]

                    try:
                        values = test_responses[1]
                        values = [str(v) for v in values if str(v) in dummies.columns]
                        dummies = dummies[values]
                    except:
                        pass

                    # Count the number of resposnes per row
                    test_var_counts = dummies.sum(axis=1).unique()

                    # Positive test range of response count
                    self.assertTrue(all(__op_map__[op_func](
                        test_var_counts,
                        numerator
                    )))

                    if op_func in [_is_ge, _is_eq] and numerator > 0:
                        incl_na = False
                    elif op_func in [_is_gt]:
                        incl_na = False
                    else:
                        incl_na = True

                    # Test inverse index produced by not version
                    not_idx = _not_count(test_var, test_responses)
                    self.confirm_inverse_index(
                        test_var,
                        idx,
                        not_idx,
                        incl_na
                    )


        # Test non-operator-lead logical comparisons with
        # exclusivity
        for var_name in test_vars:
            test_var = self.example_data_A_data[var_name]
            response_tests = [
                [1, 3, [1, 2, 3]]
            ]
            for test_responses in response_tests:

                # Test _has_count returns correct results
                idx = _has_count(test_var, test_responses, True)

                # Determine min/max values for logic and
                # slice dummies column-wise for targeted values subset
                dummies, _min, _max = self.get_count_nums(
                    test_var[idx],
                    test_responses
                )

                # Count the number of resposnes per row
                test_var_counts = dummies.sum(axis=1).unique()

                value_range = range(_min, _max+1)
                # Positive test range of response count
                self.assertTrue(all([
                    c in value_range
                    for c in test_var_counts
                ]))
                # Negative test for exclusivity
                all_dummies = test_var.astype('object').str.get_dummies(';')
                other_cols = [
                    c for c in all_dummies.columns
                    if not c in dummies.columns
                ]
                other_dummies = all_dummies[other_cols]
                other_any_mask = other_dummies.any(axis=1)
                other_dummies = other_dummies[other_any_mask]
                self.assertEqual(
                    other_dummies.index.intersection(dummies.index).size,
                    0
                )

                # Test inverse index produced by not version
                not_idx = _not_count(test_var, test_responses, True)
                self.confirm_inverse_index(test_var, idx, not_idx)
 def test__has_not_count(self):
     
     test_vars = [
         'q2',       # Test on delimited set
         'gender',   # Test on single stored as int64
         'locality'  # Test on single stored as float64
     ]
     
     # Test non-operator-lead logical comparisons
     for var_name in test_vars:
         test_var = self.example_data_A_data[var_name]
         response_tests = [
             [1],
             [1, 2],
             [1, 3],
             [1, 3, [1, 2, 3]]
         ]
         for test_responses in response_tests:
             
             # Test _has_count returns correct results
             idx = _has_count(test_var, test_responses)
             
             # Determine min/max values for logic and
             # slice dummies column-wise for targeted values subset
             dummies, _min, _max = self.get_count_nums(
                 test_var[idx], 
                 test_responses
             )
             
             # Count the number of resposnes per row
             test_var_counts = dummies.sum(axis=1).unique()
             
             if len(test_responses)==1:
                 # Test single targeted response count
                 self.assertEqual(test_var_counts, [_min])
             else:
                 value_range = range(_min, _max+1)
                 # Positive test range of response count
                 self.assertTrue(all([
                     c in value_range
                     for c in test_var_counts
                 ]))
                 
             # Test inverse index produced by not version
             not_idx = _not_count(test_var, test_responses)
             self.confirm_inverse_index(test_var, idx, not_idx)
     
     # Test operator-lead logical comparisons
     __op_symbol__ = {
         _is_lt: '<', _is_le: '<=', 
         _is_eq: '', _is_ne: '!=', 
         _is_ge: '>=', _is_gt: '>'
     }
     __op_map__ = {
         _is_lt: lt, _is_le: le,
         _is_eq: eq, _is_ne: ne,
         _is_ge: ge, _is_gt: gt
     }
     for op_func in [_is_lt, _is_le, _is_eq, _is_ne, _is_ge, _is_gt]:
         key_part = __op_symbol__[op_func]
         
         for var_name in test_vars:
             test_var = self.example_data_A_data[var_name]
             response_tests = [
                 [(op_func, 3)],
                 [(op_func, 3), [1, 2, 3]]
             ]
             for test_responses in response_tests:
                 
                 # Test _has_count returns correct results
                 idx = _has_count(test_var, test_responses)
                 
                 # Determine min/max values for logic and
                 # slice dummies column-wise for targeted values subset
                 dummies, dum_func, _max = self.get_count_nums(
                     test_var[idx], 
                     test_responses
                 )
                 numerator = dum_func[1]
                 
                 try:
                     values = test_responses[1]
                     values = [str(v) for v in values if str(v) in dummies.columns]
                     dummies = dummies[values]
                 except:
                     pass
                 
                 # Count the number of resposnes per row
                 test_var_counts = dummies.sum(axis=1).unique()
                 
                 # Positive test range of response count
                 self.assertTrue(all(__op_map__[op_func](
                     test_var_counts, 
                     numerator
                 )))
                 
                 if op_func in [_is_ge, _is_eq] and numerator > 0:
                     incl_na = False
                 elif op_func in [_is_gt]:
                     incl_na = False
                 else:
                     incl_na = True
                 
                 # Test inverse index produced by not version
                 not_idx = _not_count(test_var, test_responses)
                 self.confirm_inverse_index(
                     test_var, 
                     idx, 
                     not_idx, 
                     incl_na
                 )
            
     
     # Test non-operator-lead logical comparisons with 
     # exclusivity
     for var_name in test_vars:
         test_var = self.example_data_A_data[var_name]
         response_tests = [
             [1, 3, [1, 2, 3]]
         ]
         for test_responses in response_tests:
             
             # Test _has_count returns correct results
             idx = _has_count(test_var, test_responses, True)
             
             # Determine min/max values for logic and
             # slice dummies column-wise for targeted values subset
             dummies, _min, _max = self.get_count_nums(
                 test_var[idx], 
                 test_responses
             )
             
             # Count the number of resposnes per row
             test_var_counts = dummies.sum(axis=1).unique()
             
             value_range = range(_min, _max+1)
             # Positive test range of response count
             self.assertTrue(all([
                 c in value_range
                 for c in test_var_counts
             ]))
             # Negative test for exclusivity
             all_dummies = test_var.astype('object').str.get_dummies(';')
             other_cols = [
                 c for c in all_dummies.columns 
                 if not c in dummies.columns
             ]
             other_dummies = all_dummies[other_cols]
             other_any_mask = other_dummies.any(axis=1)
             other_dummies = other_dummies[other_any_mask]
             self.assertEqual(
                 other_dummies.index.intersection(dummies.index).size,
                 0
             )
             
             # Test inverse index produced by not version
             not_idx = _not_count(test_var, test_responses, True)
             self.confirm_inverse_index(test_var, idx, not_idx)