def condition(function=None, on=ConditionOperator.START, cache=False, vectorized=False): from remake.schematic.cache import SchematicCache if function is None: return partial(condition, on=on, cache=cache, vectorized=vectorized) transformer, name, sources = get_info(function) function = function if not cache else lru_cache(function) valid_locations = [ ConditionOperator.START, ConditionOperator.END, ConditionOperator.GROUPS ] if on not in valid_locations: msg = f"{name}(..., at={on}) is not a valid condition error. Please use one of {valid_locations}" raise InvalidConditionLocationError(msg) spec = ConditionOperator(name=name, on=on, sources=sources, transform=function, vectorized=vectorized) SchematicCache.add_condition(transformer, spec) return function
def named_split(function): """ A split takes a dataframe and converts it into a dictionary with dataframes as values. """ from remake.schematic.cache import SchematicCache schematic, name, _ = get_info(function) SchematicCache.add_split(schematic, SplitOperator(name, function, named=True))
def split(function): """ A split takes a dataframe and converts it into either a list of dataframes. """ from remake.schematic.cache import SchematicCache schematic, name, _ = get_info(function) SchematicCache.add_split(schematic, SplitOperator(name, function, named=False))
def schematic(cls): if not inspect.isclass(cls): raise ValueError( f"@schematic can only be used as a class decorator, it cannot decorate {cls}" ) cls_name = cls.__name__ inherited_core = SchematicCore() for parent in cls.__bases__: inherited_core = merge_schematics(inherited_core, recursive_merge(parent)) schematic = SchematicCache.get(cls_name) final_schematic = merge_schematics(inherited_core, schematic) # Copy all functionality of the original class cls_attributes = {} for attr_name, attr in vars(cls).items(): if not attr_name.startswith("_") and not attr_name.endswith("_"): cls_attributes[attr_name] = attr Transform = type(f"{cls_name}Schematic", (SchematicTransformation, ), {}) transformer_cls = assemble_schematic(Transform, final_schematic, cls_attributes) return transformer_cls
def column(function=None, temporary=False, cache=False): from remake.schematic.cache import SchematicCache if function is None: return partial(column, temporary=temporary, cache=cache) transformer, column_name, input_columns = get_info(function) function = function if not cache else lru_cache(function) spec = ColumnOperator(name=column_name, input_columns=input_columns, is_view=temporary, transform=function) SchematicCache.add_column(transformer, spec) return function
def transformation(function=None, on=TransformationOperator.START, after=None): from remake.schematic.cache import SchematicCache if function is None: return partial(transformation, on=on, after=after) transformer, name, input_columns = get_info(function) valid_locations = [ TransformationOperator.START, TransformationOperator.END, TransformationOperator.GROUPS ] if on not in valid_locations: msg = f"{name}(..., at={on}) is not a valid condition error. Please use one of {valid_locations}" raise SchematicError(msg) spec = TransformationOperator(name=name, on=on, after=after, transform=function) SchematicCache.add_transformation(transformer, spec) return function
def index(*columns): from remake.schematic.cache import SchematicCache class_name = inspect.stack()[1][0].f_locals["__qualname__"] SchematicCache.add_index(class_name, IndexOperator(list(columns)))
def test_core_generation(): name = "TEST" SchematicCache.add_column(name, ColumnSpec("test", [], False, lambda: True, False)) SchematicCache.add_condition(name, ConditionSpec("test", [], lambda: True, "")) SchematicCache.add_copy(name, CopySpec("test", "test")) SchematicCache.add_group(name, GroupSpec(name="test", sources=[], transform=None, sort_by=None)) SchematicCache.add_index(name, IndexSpec("test", [])) SchematicCache.add_parameter(name, ParameterSpec("Test", "Test", None, None, None, None, None)) SchematicCache.add_sorts(name, SortBySpec([])) SchematicCache.add_explode(name, ExplodeSpec([], None)) SchematicCache.add_expand(name, ExpandSpec([], None)) SchematicCache.add_join(name, JoinSpec([], None)) core = SchematicCache.get(name) assert len(core.columns) == 1 assert len(core.copies) == 1 assert len(core.groups) == 1 assert len(core.indexes) == 1 assert len(core.parameters) == 1 assert len(core.sorts) == 1 assert len(core.explodes) == 1 assert len(core.expands) == 1 assert len(core.joins) == 1