예제 #1
0
파일: partition.py 프로젝트: zywillc/fugue
 def _parse_presort_exp(  # noqa: C901
     self, presort: Any
 ) -> IndexedOrderedDict[str, bool]:
     if presort is None:
         presort = ""
     if not isinstance(presort, str):
         return IndexedOrderedDict(presort)
     presort = presort.strip()
     res: IndexedOrderedDict[str, bool] = IndexedOrderedDict()
     if presort == "":
         return res
     for p in presort.split(","):
         pp = p.strip().split()
         key = pp[0].strip()
         if len(pp) == 1:
             value = True
         elif len(pp) == 2:
             if pp[1].strip().lower() == "asc":
                 value = True
             elif pp[1].strip().lower() == "desc":
                 value = False
             else:
                 raise SyntaxError(f"Invalid expression {presort}")
         else:
             raise SyntaxError(f"Invalid expression {presort}")
         if key in res:
             raise SyntaxError(f"Invalid expression {presort} duplicated key {key}")
         res[key] = value
     return res
예제 #2
0
 def __init__(
     self,
     spec: WorkflowSpec,
     ctx: WorkflowContext,
     parent_workflow: Optional["_Workflow"] = None,
 ):
     super().__init__(spec, ctx, parent_workflow)
     self.tasks = IndexedOrderedDict()
예제 #3
0
class _Workflow(_Task):
    def __init__(
        self,
        spec: WorkflowSpec,
        ctx: WorkflowContext,
        parent_workflow: Optional["_Workflow"] = None,
    ):
        super().__init__(spec, ctx, parent_workflow)
        self.tasks = IndexedOrderedDict()

    def _init_tasks(self):
        for k, v in self.spec.tasks.items():
            self.tasks[k] = self._build_task(v)
        self._set_outputs()

    def _build_task(self, spec: TaskSpec) -> _Task:
        if isinstance(spec, WorkflowSpec):
            task: _Task = _Workflow(spec, self.ctx, self)
        else:
            task = _Task(spec, self.ctx, self)
        self._set_configs(task, spec)
        self._set_inputs(task, spec)
        if isinstance(task, _Workflow):
            # internal initialization must be after external initialization
            task._init_tasks()
        return task

    def _set_inputs(self, task: _Task, spec: TaskSpec) -> None:
        for f, to_expr in spec.node_spec.dependency.items():
            t = to_expr.split(".", 1)
            if len(t) == 1:
                task.inputs[f].set_dependency(self.inputs[t[0]])
            else:
                task.inputs[f].set_dependency(self.tasks[t[0]].outputs[t[1]])

    def _set_configs(self, task: _Task, spec: TaskSpec) -> None:
        for f, v in spec.node_spec.config.items():
            task.configs[f].set(v)
        for f, t in spec.node_spec.config_dependency.items():
            task.configs[f].set_dependency(self.configs[t])

    def _set_outputs(self) -> None:
        assert isinstance(self.spec, WorkflowSpec)
        for f, to_expr in self.spec.internal_dependency.items():
            t = to_expr.split(".", 1)
            if len(t) == 1:
                self.outputs[f].set_dependency(self.inputs[t[0]])
            else:
                self.outputs[f].set_dependency(self.tasks[t[0]].outputs[t[1]])

    def _register(self, temp: List[_Task]) -> None:
        for n in self.tasks.values():
            n._register(temp)

    def update_by_cache(self) -> None:
        self._ensure_fully_connected()
        for n in self.tasks.values():
            n.task.update_by_cache()
예제 #4
0
 def update_by_cache(self) -> None:
     if not self.spec.deterministic:
         return
     d = IndexedOrderedDict()
     for k, o in self.outputs.items():
         hasvalue, skipped, value = self.ctx.cache.get(o.__uuid__())
         if not hasvalue:
             return
         d[k] = (skipped, value)
     for k, v in d.items():
         if v[0]:
             self.outputs[k].skip(from_cache=True)
         else:
             self.outputs[k].set(v[1], from_cache=True)
     self._transit(_State.FINISHED)
def test_dependencydict():
    t = MockTaskForVar()
    s = ConfigSpec("a", int, True, False, 1)
    c1 = _ConfigVar(t, s)
    s = ConfigSpec("b", int, True, False, 2)
    c2 = _ConfigVar(t, s)
    d = _DependencyDict(IndexedOrderedDict([("a", c1), ("b", c2)]))
    assert 2 == len(d)
    assert 1 == d["a"]
    assert 2 == d["b"]
    c2.set(3)
    assert 3 == d["b"]
    assert [("a", 1), ("b", 3)] == list(d.items())
    with raises(InvalidOperationError):
        d["c"] = 1
    with raises(InvalidOperationError):
        d["b"] = 1
    with raises(InvalidOperationError):
        d.update(dict())
    assert 3 == d["b"]
    assert "3" == d.get_or_throw("b", str)
    assert "3" == d.get("b", "x")
    assert 0 == d.get("d", 0)
    with raises(KeyError):
        d.get_or_throw("d", str)
예제 #6
0
 def _make_dict(self, data: Iterable[Any],
                out_type: Type[T]) -> IndexedOrderedDict[str, T]:
     res: IndexedOrderedDict[str, T] = IndexedOrderedDict()
     for v in data:
         res[v.name] = out_type(self, v)
     res.set_readonly()
     return res
예제 #7
0
 def _parse_spec_collection(self, obj: Any,
                            to_type: Type[T]) -> IndexedOrderedDict[str, T]:
     res: IndexedOrderedDict[str, T] = IndexedOrderedDict()
     if obj is None:
         return res
     aot(isinstance(obj, List), "Spec collection must be a list")
     for v in obj:
         s = self._parse_spec(v, to_type)
         aot(s.name not in res, KeyError(f"Duplicated key {s.name}"))
         res[s.name] = s
     return res
예제 #8
0
파일: partition.py 프로젝트: zywillc/fugue
    def get_sorts(self, schema: Schema) -> IndexedOrderedDict[str, bool]:
        """Get keys for sorting in a partition, it's the combination of partition
        keys plus the presort keys

        :param schema: the dataframe schema this partition spec to operate on
        :return: an ordered dictionary of key, order pairs

        :Example:

        >>> p = PartitionSpec(by=["a"],presort="b , c dESc")
        >>> schema = Schema("a:int,b:int,c:int,d:int"))
        >>> assert p.get_sorts(schema) == {"a":True, "b":True, "c": False}
        """
        d: IndexedOrderedDict[str, bool] = IndexedOrderedDict()
        for p in self.partition_by:
            aot(p in schema, KeyError(f"{p} not in {schema}"))
            d[p] = True
        for p, v in self.presort.items():
            aot(p in schema, KeyError(f"{p} not in {schema}"))
            d[p] = v
        return d
예제 #9
0
def parse_presort_exp(
        presort: Any) -> IndexedOrderedDict[str, bool]:  # noqa [C901]
    """Returns ordered column sorting direction where ascending order
    would return as true, and descending as false.

    :param presort: string that contains column and sorting direction or
        list of tuple that contains column and boolean sorting direction
    :type presort: Any

    :return: column and boolean sorting direction of column, order matters.
    :rtype: IndexedOrderedDict[str, bool]

    :Example:

    >>> parse_presort_exp("b desc, c asc")
    >>> parse_presort_exp([("b", True), ("c", False))])
    both return IndexedOrderedDict([("b", True), ("c", False))])
    """

    if isinstance(presort, IndexedOrderedDict):
        return presort

    presort_list: List[Tuple[str, bool]] = []
    res: IndexedOrderedDict[str, bool] = IndexedOrderedDict()
    if presort is None:
        return res

    elif isinstance(presort, str):
        presort = presort.strip()
        if presort == "":
            return res
        for p in presort.split(","):
            pp = p.strip().split()
            key = pp[0].strip()
            if len(pp) == 1:
                presort_list.append((key, True))
            elif len(pp) == 2:
                if pp[1].strip().lower() == "asc":
                    presort_list.append((key, True))
                elif pp[1].strip().lower() == "desc":
                    presort_list.append((key, False))
                else:
                    raise SyntaxError(f"Invalid expression {presort}")
            else:
                raise SyntaxError(f"Invalid expression {presort}")

    elif isinstance(presort, list):
        for p in presort:
            if isinstance(p, str):
                aot(
                    len(p.strip().split()) == 1,
                    SyntaxError(f"Invalid expression {presort}"),
                )
                presort_list.append((p.strip(), True))
            else:
                aot(len(p) == 2, SyntaxError(f"Invalid expression {presort}"))
                aot(
                    isinstance(p, tuple)
                    & (isinstance(p[0], str) & (isinstance(p[1], bool))),
                    SyntaxError(f"Invalid expression {presort}"),
                )
                presort_list.append((p[0].strip(), p[1]))

    for key, value in presort_list:
        if key in res:
            raise SyntaxError(
                f"Invalid expression {presort} duplicated key {key}")
        res[key] = value
    return res
예제 #10
0
def test_parse_presort_exp():

    assert parse_presort_exp(None) == IndexedOrderedDict()
    assert parse_presort_exp(IndexedOrderedDict([
        ('c', True)
    ])) == IndexedOrderedDict([('c', True)])
    assert parse_presort_exp("c") == IndexedOrderedDict([('c', True)])
    assert parse_presort_exp("         c") == IndexedOrderedDict([('c', True)])
    assert parse_presort_exp("c           desc") == IndexedOrderedDict([
        ('c', False)
    ])
    assert parse_presort_exp("b desc, c asc") == IndexedOrderedDict([
        ('b', False), ('c', True)
    ])
    assert parse_presort_exp("DESC DESC, ASC ASC") == IndexedOrderedDict([
        ('DESC', False), ('ASC', True)
    ])
    assert parse_presort_exp([("b", False), ("c", True)
                              ]) == IndexedOrderedDict([('b', False),
                                                        ('c', True)])
    assert parse_presort_exp("B DESC, C ASC") == IndexedOrderedDict([
        ('B', False), ('C', True)
    ])
    assert parse_presort_exp("b desc, c asc") == IndexedOrderedDict([
        ('b', False), ('c', True)
    ])

    with raises(SyntaxError):
        parse_presort_exp("b dsc, c asc")  # mispelling of desc

    with raises(SyntaxError):
        parse_presort_exp("c true")  # string format needs desc/asc

    with raises(SyntaxError):
        parse_presort_exp("c true, c true")  # cannot contain duplicates

    with raises(SyntaxError):
        parse_presort_exp([("b", "desc"), ("c", "asc")
                           ])  # instead of desc and asc, needs to be bool
예제 #11
0
def test_using_indexed_ordered_dict():
    def get_count(d: IndexedOrderedDict[str, int]):
        return len(d)

    dd = IndexedOrderedDict(dict(a=1))
    assert 1 == get_count(dd)
예제 #12
0
def test_indexed_orderd_dict():
    d = IndexedOrderedDict([("b", 2), ("a", 1)])
    d1 = IndexedOrderedDict([("a", 1), ("b", 2)])
    assert dict(a=1, b=2) == d
    assert d1 != d
    assert d._need_reindex
    assert 1 == d.index_of_key("a")
    assert not d._need_reindex
    assert "a" == d.get_key_by_index(1)
    assert 2 == d.get_value_by_index(0)
    assert ("a", 1) == d.get_item_by_index(1)
    assert not d._need_reindex
    d.set_value_by_index(1, 10)
    assert not d._need_reindex
    assert ("a", 10) == d.get_item_by_index(1)
    assert ("b", 2) == d.pop_by_index(0)
    assert d._need_reindex
    assert 1 == len(d)
    assert 0 == d.index_of_key("a")
    assert not d._need_reindex
    assert 10 == d.setdefault("a", 20)
    assert not d._need_reindex
    assert 30 == d.setdefault("b", 30)
    assert d._need_reindex
    d.clear()
    assert d._need_reindex
    raises(KeyError, lambda: d.index_of_key("a"))
    assert not d._need_reindex
    assert 0 == len(d)

    d = IndexedOrderedDict([("b", 2), ("a", 1)])
    assert not d.readonly
    d.set_readonly()
    assert d.readonly
    raises(InvalidOperationError, lambda: d.__setitem__("b", "3"))
    raises(InvalidOperationError, lambda: d.__delitem__("b"))
    assert 2 == d["b"]

    # popitem
    d = IndexedOrderedDict([("b", 2), ("a", 1), ("c", 3)])
    assert 1 == d.index_of_key("a")
    assert not d._need_reindex
    assert ("b", 2) == d.popitem(last=False)
    assert d._need_reindex
    assert ("c", 3) == d.popitem(last=True)
    assert 0 == d.index_of_key("a")
    assert not d._need_reindex
    d.set_readonly()
    raises(InvalidOperationError, lambda: d.popitem())

    # move_to_end
    d = IndexedOrderedDict([("b", 2), ("a", 1), ("c", 3)])
    d1 = IndexedOrderedDict([("b", 2), ("c", 3), ("a", 1)])
    assert d != d1
    d.move_to_end("a")
    assert d == d1
    d.set_readonly()
    raises(InvalidOperationError, lambda: d.move_to_end("b"))

    # copy and deepcopy
    d = IndexedOrderedDict([("b", 2), ("a", 1), ("c", 3)])
    d.set_readonly()
    d.index_of_key("a")
    assert not d._need_reindex
    d1 = d.copy()
    assert isinstance(d1, IndexedOrderedDict)
    assert not d1._need_reindex
    assert d == d1
    assert 1 == d1.index_of_key("a")
    assert not d1.readonly  # after copy, readonly is set to False
    del d1["a"]  # will not affect the original
    assert 1 == d.index_of_key("a")

    d = IndexedOrderedDict([("b", [1,
                                   IndexedOrderedDict([("x", [2, 4])]), 3])])
    d.set_readonly()
    d1 = copy(d)
    assert not d1.readonly  # after copy, readonly is set to False
    d1["b"][0] = 10
    assert 10 == d["b"][0]
    d1["b"][1]["x"][0] = 200
    assert 200 == d["b"][1]["x"][0]
    d.index_of_key("b")
    assert not d._need_reindex
    d2 = deepcopy(d)
    assert d2._need_reindex  # after deepcopy, reindex is required
    assert not d2.readonly  # after deepcopy, readonly is set to False
    d2["b"][0] = 20
    assert 10 == d["b"][0]
    d2["b"][1]["x"][0] = 300
    assert 200 == d["b"][1]["x"][0]

    # pickle
    d = IndexedOrderedDict([("b", 2), ("a", 1), ("c", 3)])
    d.set_readonly()
    d.index_of_key("a")
    assert not d._need_reindex
    d1 = pickle.loads(pickle.dumps(d))
    assert isinstance(d1, IndexedOrderedDict)
    assert not d1._need_reindex
    assert d == d1
    assert 1 == d1.index_of_key("a")
    assert d1.readonly

    # equals
    d = IndexedOrderedDict([("b", 2), ("a", 1), ("c", 3)])
    d.set_readonly()
    d1 = IndexedOrderedDict([("b", 2), ("c", 3), ("a", 1)])
    d2 = [("b", 2), ("a", 1), ("c", 3)]
    d3 = [("b", 2), ("c", 3), ("a", 1)]
    d4 = dict([("b", 2), ("c", 3), ("a", 1)])
    assert not d.equals(d1, True)
    assert d.equals(d1, False)
    assert d.equals(d2, True)
    assert d.equals(d2, False)
    assert not d.equals(d3, True)
    assert d.equals(d3, False)
    assert not d.equals(d4, True)
    assert d.equals(d4, False)