def add_copy(self, copy: CopyOperator): if copy.name in self._copies[copy.name]: raise SchematicError( f"There are duplicate copies with name '{copy.name}'") elif copy.name in self._columns[copy.name]: raise SchematicError( f"The copy '{copy.name}' clashes with a column with the same name" ) self._copies[copy.name] = copy
def add_join(self, join: JoinOperator): if join.name in self._joins: raise SchematicError( f"There are duplicate joins with name '{join.name}'") elif join.name in self._parameters: raise SchematicError( f"The join '{join.name}' clashes with a parameter with the same name" ) else: self._joins[join.name] = join
def add_parameter(self, parameter: ParameterOperator): if parameter.name in self._parameters: raise SchematicError( f"There are duplicate parameters with name '{parameter.name}'") elif parameter.name in self._joins: raise SchematicError( f"The parameter '{parameter.name}' clashes with a join with the same name" ) else: self._parameters[parameter.name] = parameter
def add_group(self, group: GroupOperator): if self.flat_group is not None: raise SchematicError( "A schematic cannot both have a group and a flat_group") elif self.group is None: self.group = group self.add_sorts(group.sort_by) for column in group.sources: self.add_copy(CopyOperator(name=column, source=column)) else: raise SchematicError("A schematic can only have one group")
def transformation(function=None, on=TransformationOperator.START, after=None): from remake.schematic.cache import SchematicCache if function is None: return partial(transformation, on=on, after=after) transformer, name, input_columns = get_info(function) valid_locations = [ TransformationOperator.START, TransformationOperator.END, TransformationOperator.GROUPS ] if on not in valid_locations: msg = f"{name}(..., at={on}) is not a valid condition error. Please use one of {valid_locations}" raise SchematicError(msg) spec = TransformationOperator(name=name, on=on, after=after, transform=function) SchematicCache.add_transformation(transformer, spec) return function
def add_split(self, split: SplitOperator): if self.split is not None: raise SchematicError("A schematic can only have one split") self.split = split
def get(schematic_name): if schematic_name in SchematicCache.__cores__: return SchematicCache.__cores__.pop(schematic_name) else: raise SchematicError(f"{schematic_name} is not a valid schematic")