def _parse_presort_exp( # noqa: C901 self, presort: Any ) -> IndexedOrderedDict[str, bool]: if presort is None: presort = "" if not isinstance(presort, str): return IndexedOrderedDict(presort) presort = presort.strip() res: IndexedOrderedDict[str, bool] = IndexedOrderedDict() if presort == "": return res for p in presort.split(","): pp = p.strip().split() key = pp[0].strip() if len(pp) == 1: value = True elif len(pp) == 2: if pp[1].strip().lower() == "asc": value = True elif pp[1].strip().lower() == "desc": value = False else: raise SyntaxError(f"Invalid expression {presort}") else: raise SyntaxError(f"Invalid expression {presort}") if key in res: raise SyntaxError(f"Invalid expression {presort} duplicated key {key}") res[key] = value return res
def __init__( self, spec: WorkflowSpec, ctx: WorkflowContext, parent_workflow: Optional["_Workflow"] = None, ): super().__init__(spec, ctx, parent_workflow) self.tasks = IndexedOrderedDict()
class _Workflow(_Task): def __init__( self, spec: WorkflowSpec, ctx: WorkflowContext, parent_workflow: Optional["_Workflow"] = None, ): super().__init__(spec, ctx, parent_workflow) self.tasks = IndexedOrderedDict() def _init_tasks(self): for k, v in self.spec.tasks.items(): self.tasks[k] = self._build_task(v) self._set_outputs() def _build_task(self, spec: TaskSpec) -> _Task: if isinstance(spec, WorkflowSpec): task: _Task = _Workflow(spec, self.ctx, self) else: task = _Task(spec, self.ctx, self) self._set_configs(task, spec) self._set_inputs(task, spec) if isinstance(task, _Workflow): # internal initialization must be after external initialization task._init_tasks() return task def _set_inputs(self, task: _Task, spec: TaskSpec) -> None: for f, to_expr in spec.node_spec.dependency.items(): t = to_expr.split(".", 1) if len(t) == 1: task.inputs[f].set_dependency(self.inputs[t[0]]) else: task.inputs[f].set_dependency(self.tasks[t[0]].outputs[t[1]]) def _set_configs(self, task: _Task, spec: TaskSpec) -> None: for f, v in spec.node_spec.config.items(): task.configs[f].set(v) for f, t in spec.node_spec.config_dependency.items(): task.configs[f].set_dependency(self.configs[t]) def _set_outputs(self) -> None: assert isinstance(self.spec, WorkflowSpec) for f, to_expr in self.spec.internal_dependency.items(): t = to_expr.split(".", 1) if len(t) == 1: self.outputs[f].set_dependency(self.inputs[t[0]]) else: self.outputs[f].set_dependency(self.tasks[t[0]].outputs[t[1]]) def _register(self, temp: List[_Task]) -> None: for n in self.tasks.values(): n._register(temp) def update_by_cache(self) -> None: self._ensure_fully_connected() for n in self.tasks.values(): n.task.update_by_cache()
def update_by_cache(self) -> None: if not self.spec.deterministic: return d = IndexedOrderedDict() for k, o in self.outputs.items(): hasvalue, skipped, value = self.ctx.cache.get(o.__uuid__()) if not hasvalue: return d[k] = (skipped, value) for k, v in d.items(): if v[0]: self.outputs[k].skip(from_cache=True) else: self.outputs[k].set(v[1], from_cache=True) self._transit(_State.FINISHED)
def test_dependencydict(): t = MockTaskForVar() s = ConfigSpec("a", int, True, False, 1) c1 = _ConfigVar(t, s) s = ConfigSpec("b", int, True, False, 2) c2 = _ConfigVar(t, s) d = _DependencyDict(IndexedOrderedDict([("a", c1), ("b", c2)])) assert 2 == len(d) assert 1 == d["a"] assert 2 == d["b"] c2.set(3) assert 3 == d["b"] assert [("a", 1), ("b", 3)] == list(d.items()) with raises(InvalidOperationError): d["c"] = 1 with raises(InvalidOperationError): d["b"] = 1 with raises(InvalidOperationError): d.update(dict()) assert 3 == d["b"] assert "3" == d.get_or_throw("b", str) assert "3" == d.get("b", "x") assert 0 == d.get("d", 0) with raises(KeyError): d.get_or_throw("d", str)
def _make_dict(self, data: Iterable[Any], out_type: Type[T]) -> IndexedOrderedDict[str, T]: res: IndexedOrderedDict[str, T] = IndexedOrderedDict() for v in data: res[v.name] = out_type(self, v) res.set_readonly() return res
def _parse_spec_collection(self, obj: Any, to_type: Type[T]) -> IndexedOrderedDict[str, T]: res: IndexedOrderedDict[str, T] = IndexedOrderedDict() if obj is None: return res aot(isinstance(obj, List), "Spec collection must be a list") for v in obj: s = self._parse_spec(v, to_type) aot(s.name not in res, KeyError(f"Duplicated key {s.name}")) res[s.name] = s return res
def get_sorts(self, schema: Schema) -> IndexedOrderedDict[str, bool]: """Get keys for sorting in a partition, it's the combination of partition keys plus the presort keys :param schema: the dataframe schema this partition spec to operate on :return: an ordered dictionary of key, order pairs :Example: >>> p = PartitionSpec(by=["a"],presort="b , c dESc") >>> schema = Schema("a:int,b:int,c:int,d:int")) >>> assert p.get_sorts(schema) == {"a":True, "b":True, "c": False} """ d: IndexedOrderedDict[str, bool] = IndexedOrderedDict() for p in self.partition_by: aot(p in schema, KeyError(f"{p} not in {schema}")) d[p] = True for p, v in self.presort.items(): aot(p in schema, KeyError(f"{p} not in {schema}")) d[p] = v return d
def parse_presort_exp( presort: Any) -> IndexedOrderedDict[str, bool]: # noqa [C901] """Returns ordered column sorting direction where ascending order would return as true, and descending as false. :param presort: string that contains column and sorting direction or list of tuple that contains column and boolean sorting direction :type presort: Any :return: column and boolean sorting direction of column, order matters. :rtype: IndexedOrderedDict[str, bool] :Example: >>> parse_presort_exp("b desc, c asc") >>> parse_presort_exp([("b", True), ("c", False))]) both return IndexedOrderedDict([("b", True), ("c", False))]) """ if isinstance(presort, IndexedOrderedDict): return presort presort_list: List[Tuple[str, bool]] = [] res: IndexedOrderedDict[str, bool] = IndexedOrderedDict() if presort is None: return res elif isinstance(presort, str): presort = presort.strip() if presort == "": return res for p in presort.split(","): pp = p.strip().split() key = pp[0].strip() if len(pp) == 1: presort_list.append((key, True)) elif len(pp) == 2: if pp[1].strip().lower() == "asc": presort_list.append((key, True)) elif pp[1].strip().lower() == "desc": presort_list.append((key, False)) else: raise SyntaxError(f"Invalid expression {presort}") else: raise SyntaxError(f"Invalid expression {presort}") elif isinstance(presort, list): for p in presort: if isinstance(p, str): aot( len(p.strip().split()) == 1, SyntaxError(f"Invalid expression {presort}"), ) presort_list.append((p.strip(), True)) else: aot(len(p) == 2, SyntaxError(f"Invalid expression {presort}")) aot( isinstance(p, tuple) & (isinstance(p[0], str) & (isinstance(p[1], bool))), SyntaxError(f"Invalid expression {presort}"), ) presort_list.append((p[0].strip(), p[1])) for key, value in presort_list: if key in res: raise SyntaxError( f"Invalid expression {presort} duplicated key {key}") res[key] = value return res
def test_parse_presort_exp(): assert parse_presort_exp(None) == IndexedOrderedDict() assert parse_presort_exp(IndexedOrderedDict([ ('c', True) ])) == IndexedOrderedDict([('c', True)]) assert parse_presort_exp("c") == IndexedOrderedDict([('c', True)]) assert parse_presort_exp(" c") == IndexedOrderedDict([('c', True)]) assert parse_presort_exp("c desc") == IndexedOrderedDict([ ('c', False) ]) assert parse_presort_exp("b desc, c asc") == IndexedOrderedDict([ ('b', False), ('c', True) ]) assert parse_presort_exp("DESC DESC, ASC ASC") == IndexedOrderedDict([ ('DESC', False), ('ASC', True) ]) assert parse_presort_exp([("b", False), ("c", True) ]) == IndexedOrderedDict([('b', False), ('c', True)]) assert parse_presort_exp("B DESC, C ASC") == IndexedOrderedDict([ ('B', False), ('C', True) ]) assert parse_presort_exp("b desc, c asc") == IndexedOrderedDict([ ('b', False), ('c', True) ]) with raises(SyntaxError): parse_presort_exp("b dsc, c asc") # mispelling of desc with raises(SyntaxError): parse_presort_exp("c true") # string format needs desc/asc with raises(SyntaxError): parse_presort_exp("c true, c true") # cannot contain duplicates with raises(SyntaxError): parse_presort_exp([("b", "desc"), ("c", "asc") ]) # instead of desc and asc, needs to be bool
def test_using_indexed_ordered_dict(): def get_count(d: IndexedOrderedDict[str, int]): return len(d) dd = IndexedOrderedDict(dict(a=1)) assert 1 == get_count(dd)
def test_indexed_orderd_dict(): d = IndexedOrderedDict([("b", 2), ("a", 1)]) d1 = IndexedOrderedDict([("a", 1), ("b", 2)]) assert dict(a=1, b=2) == d assert d1 != d assert d._need_reindex assert 1 == d.index_of_key("a") assert not d._need_reindex assert "a" == d.get_key_by_index(1) assert 2 == d.get_value_by_index(0) assert ("a", 1) == d.get_item_by_index(1) assert not d._need_reindex d.set_value_by_index(1, 10) assert not d._need_reindex assert ("a", 10) == d.get_item_by_index(1) assert ("b", 2) == d.pop_by_index(0) assert d._need_reindex assert 1 == len(d) assert 0 == d.index_of_key("a") assert not d._need_reindex assert 10 == d.setdefault("a", 20) assert not d._need_reindex assert 30 == d.setdefault("b", 30) assert d._need_reindex d.clear() assert d._need_reindex raises(KeyError, lambda: d.index_of_key("a")) assert not d._need_reindex assert 0 == len(d) d = IndexedOrderedDict([("b", 2), ("a", 1)]) assert not d.readonly d.set_readonly() assert d.readonly raises(InvalidOperationError, lambda: d.__setitem__("b", "3")) raises(InvalidOperationError, lambda: d.__delitem__("b")) assert 2 == d["b"] # popitem d = IndexedOrderedDict([("b", 2), ("a", 1), ("c", 3)]) assert 1 == d.index_of_key("a") assert not d._need_reindex assert ("b", 2) == d.popitem(last=False) assert d._need_reindex assert ("c", 3) == d.popitem(last=True) assert 0 == d.index_of_key("a") assert not d._need_reindex d.set_readonly() raises(InvalidOperationError, lambda: d.popitem()) # move_to_end d = IndexedOrderedDict([("b", 2), ("a", 1), ("c", 3)]) d1 = IndexedOrderedDict([("b", 2), ("c", 3), ("a", 1)]) assert d != d1 d.move_to_end("a") assert d == d1 d.set_readonly() raises(InvalidOperationError, lambda: d.move_to_end("b")) # copy and deepcopy d = IndexedOrderedDict([("b", 2), ("a", 1), ("c", 3)]) d.set_readonly() d.index_of_key("a") assert not d._need_reindex d1 = d.copy() assert isinstance(d1, IndexedOrderedDict) assert not d1._need_reindex assert d == d1 assert 1 == d1.index_of_key("a") assert not d1.readonly # after copy, readonly is set to False del d1["a"] # will not affect the original assert 1 == d.index_of_key("a") d = IndexedOrderedDict([("b", [1, IndexedOrderedDict([("x", [2, 4])]), 3])]) d.set_readonly() d1 = copy(d) assert not d1.readonly # after copy, readonly is set to False d1["b"][0] = 10 assert 10 == d["b"][0] d1["b"][1]["x"][0] = 200 assert 200 == d["b"][1]["x"][0] d.index_of_key("b") assert not d._need_reindex d2 = deepcopy(d) assert d2._need_reindex # after deepcopy, reindex is required assert not d2.readonly # after deepcopy, readonly is set to False d2["b"][0] = 20 assert 10 == d["b"][0] d2["b"][1]["x"][0] = 300 assert 200 == d["b"][1]["x"][0] # pickle d = IndexedOrderedDict([("b", 2), ("a", 1), ("c", 3)]) d.set_readonly() d.index_of_key("a") assert not d._need_reindex d1 = pickle.loads(pickle.dumps(d)) assert isinstance(d1, IndexedOrderedDict) assert not d1._need_reindex assert d == d1 assert 1 == d1.index_of_key("a") assert d1.readonly # equals d = IndexedOrderedDict([("b", 2), ("a", 1), ("c", 3)]) d.set_readonly() d1 = IndexedOrderedDict([("b", 2), ("c", 3), ("a", 1)]) d2 = [("b", 2), ("a", 1), ("c", 3)] d3 = [("b", 2), ("c", 3), ("a", 1)] d4 = dict([("b", 2), ("c", 3), ("a", 1)]) assert not d.equals(d1, True) assert d.equals(d1, False) assert d.equals(d2, True) assert d.equals(d2, False) assert not d.equals(d3, True) assert d.equals(d3, False) assert not d.equals(d4, True) assert d.equals(d4, False)