def setUp(self): self.names = ['a', 'b', 'c', 'd'] self.dtypes = ['str', 'int', 'float', 'bool'] self.task = task_factory(self.dtypes[0], self.names[0]) self.tasks = [ task_factory(args[0], args[1]) for args in zip(self.dtypes, self.names) ] self.instance = Tasks(self.tasks)
def extract_tasks_from_df(df, instructions=None): """ Extract tasks from dataframe by inferring task kind from dtypes. Instructions may be passed as a separate list. Limitations: - The str and regex tasks have the same dtype (object). - When introducing NA the dtype may have been promoted. This happens to boolean tasks that are nullable. The incorrect inference will be made that the task is str. See: https://pandas.pydata.org/pandas-docs/stable/user_guide/gotchas.html#na-type-promotions """ args = [] instructions = pd.Series(instructions, index=df.columns) tasks = pd.concat([df.dtypes, instructions], axis=1) tasks.columns = ['dtype', 'instruction'] for task in tasks.itertuples(): kwargs = {} name = task.Index for item in REGISTRY.values(): if item.dtype == task.dtype.name: kind = item.kind if kind == 'category': kwargs['categories'] = task.dtype.categories break kwargs['instruction'] = task.instruction args.append((kind, name, kwargs)) return [ task_factory(kind, name, **kwargs) for kind, name, kwargs in args ]
def test_inequality(self): self.names.insert(0, self.names.pop()) self.dtypes.insert(0, self.dtypes.pop()) output = [ task_factory(args[0], args[1]) for args in zip(self.dtypes, self.names) ] self.assertNotEqual(self.tasks, output)
def __setitem__(self, id, value): if isinstance(value, Task): if id != value.name: warnings.warn(f"The task name '{value.name}' " f"does not match the id '{id}'. " f"Task name is set to '{id}'.") value.name = id else: if isinstance(value, tuple): kind, *args, kwargs = value if isinstance(kwargs, dict): value = task_factory(kind, id, *args, **kwargs) else: value = task_factory(kind, id, *args, kwargs) else: value = task_factory(value, id) self.tasks[id] = value self._set_pos_in_tasks()
def test_dependency_from_tuple(self): condition = "`relevant` == True" value = None output = Dependency(condition, value) dependency = (condition, value) task = task_factory('str', 'topic', nullable=True, dependencies=dependency) self.assertEqual(task.dependencies[0], output)
def setUp(self): self.names = ['a', 'b', 'c', 'd'] self.dtypes = ['str', 'int', 'float', 'bool'] self.instructions = ['Enter string', 'Enter integer', None, None] tasks = [ task_factory(args[0], args[1], instruction=args[2]) for args in zip(self.dtypes, self.names, self.instructions) ] self.instance = Annotations(tasks)
def test_created_str_task_attributes(self): tests = { 'kind': 'str', 'dtype': 'object', 'name': 'a', 'instruction': 'eat my shorts \n', 'nullable': False, } task = task_factory('str', 'a', instruction='eat my shorts') for i in tests: with self.subTest(i=i): self.assertEqual(getattr(task, i), tests[i])
def test_equality(self): output = [ task_factory(args[0], args[1]) for args in zip(self.dtypes, self.names) ] self.assertEqual(self.tasks, output)
def test_validation_valid_bool(self): key, value = next(iter(BOOLEAN_STATES.items())) task = task_factory('bool', 'a') self.assertEqual(task(key), value)
def test_validation_invalid_regex(self): task = task_factory('regex', 'a', regex=r'[fs]\d{4}r') self.assertIsInstance(task('f0084r!'), Invalid)
def test_validation_valid_regex(self): task = task_factory('regex', 'a', regex=r'[fs]\d{4}r') self.assertEqual(task('f0084r'), 'f0084r')
def test_validation_invalid_int(self): task = task_factory('int', 'a') self.assertIsInstance(task('1.0'), Invalid)
def test_none_if_nullable(self): task = task_factory('str', 'a', nullable=True) self.assertEqual(task(KEYS.none), None)
def test_equality_with_task_from_iterable(self): task = task_factory(['x', 'y', 'z'], 'a') self.assertEqual(task, self.task)
def setUp(self): self.task = task_factory('category', 'a', categories=['x', 'y', 'z'])
def test_inequality_name(self): task1 = task_factory('int', 'a') task2 = task_factory('int', 'b') self.assertNotEqual(task1, task2)
def test_quality(self): task1 = task_factory('int', 'a') task2 = task_factory('int', 'a') self.assertEqual(task1, task2)
def test_created_int_task_dtype(self): task = task_factory('int', 'a') self.assertEqual(task.dtype, 'Int64')
def test_contains(self): self.assertTrue(task_factory('str', 'a') in self.instance)
def test_validation_valid_category(self): task = task_factory(['x', 'y', 'z'], 'a') self.assertEqual(task('1'), 'x')
def test_none_if_not_nullable(self): task = task_factory('int', 'a', nullable=False) self.assertIsInstance(task(KEYS.none), Invalid)
def test_validation_invalid_category(self): task = task_factory(['x', 'y', 'z'], 'a') self.assertIsInstance(task('u'), Invalid)
def test_validation_valid_int(self): task = task_factory('int', 'a') self.assertEqual(task('1337'), 1337)
def test_validation_invalid_bool(self): task = task_factory('bool', 'a') self.assertIsInstance(task('u'), Invalid)