Exemple #1
0
    def case(self, *operands) -> SeriesOrScalar:
        """
        Returns `then` where `where`, else `other`.
        """
        assert operands

        where = operands[0]
        then = operands[1]

        if len(operands) > 3:
            other = self.case(*operands[2:])
        else:
            other = operands[2]

        if is_frame(then):
            return then.where(where, other=other)
        elif is_frame(other):
            return other.where(~where, other=then)
        elif is_frame(where):
            # This one is a bit tricky.
            # Everything except "where" are scalars.
            # To make the "df.where" function still usable
            # we create a temporary dataframe with the
            # properties of where (but the content of then).
            tmp = where.apply(lambda x: then, meta=(where.name, type(then)))
            return tmp.where(where, other=other)
        else:
            return then if where else other
Exemple #2
0
 def not_(
     self,
     df: SeriesOrScalar,
 ) -> SeriesOrScalar:
     """
     Returns not `df` (where `df` can also be just a scalar).
     """
     if is_frame(df):
         return ~(df.astype("boolean"))
     else:
         return not df
Exemple #3
0
    def null(
        self,
        df: SeriesOrScalar,
    ) -> SeriesOrScalar:
        """
        Returns true where `df` is null (where `df` can also be just a scalar).
        """
        if is_frame(df):
            return df.isna()

        return pd.isna(df) or df is None or np.isnan(df)
Exemple #4
0
    def position(self, search, s, start=None):
        """Attention: SQL starts counting at 1"""
        if is_frame(s):
            s = s.str

        if start is None or start <= 0:
            start = 0
        else:
            start -= 1

        return s.find(search, start) + 1
Exemple #5
0
 def not_(
     self,
     df: Union[dd.Series, Any],
 ) -> Union[dd.Series, Any]:
     """
     Returns not `df` (where `df` can also be just a scalar).
     """
     if is_frame(df):
         return ~(df.astype("boolean"))
     else:
         return not df  # pragma: no cover
Exemple #6
0
    def null(
        self,
        df: Union[dd.Series, Any],
    ) -> Union[dd.Series, Any]:
        """
        Returns true where `df` is null (where `df` can also be just a scalar).
        """
        if is_frame(df):
            return df.isna()

        return pd.isna(df) or df is None or np.isnan(df)
Exemple #7
0
    def true_(
        self,
        df: SeriesOrScalar,
    ) -> SeriesOrScalar:
        """
        Returns true where `df` is true (where `df` can also be just a scalar).
        Returns false on nan.
        """
        if is_frame(df):
            return df.fillna(False)

        return not pd.isna(df) and df is not None and not np.isnan(df) and bool(df)
Exemple #8
0
    def trim(self, flags, search, s):
        if is_frame(s):
            s = s.str

        if flags == "LEADING":
            strip_call = s.lstrip
        elif flags == "TRAILING":
            strip_call = s.rstrip
        else:
            strip_call = s.strip

        return strip_call(search)
Exemple #9
0
    def false_(
        self,
        df: Union[dd.Series, Any],
    ) -> Union[dd.Series, Any]:
        """
        Returns true where `df` is false (where `df` can also be just a scalar).
        Returns false on nan.
        """
        if is_frame(df):
            return ~df.fillna(True)

        return not pd.isna(df) and df is not None and not np.isnan(
            df) and not bool(df)
Exemple #10
0
    def cast(self, operand, rex=None) -> SeriesOrScalar:
        if not is_frame(operand):
            return operand

        output_type = str(rex.getType())
        output_type = sql_to_python_type(output_type.upper())

        return_column = cast_column_to_type(operand, output_type)

        if return_column is None:
            return operand
        else:
            return return_column
Exemple #11
0
 def case(
     self,
     where: Union[dd.Series, Any],
     then: Union[dd.Series, Any],
     other: Union[dd.Series, Any],
 ) -> Union[dd.Series, Any]:
     """
     Returns `then` where `where`, else `other`.
     """
     if is_frame(then):
         return then.where(where, other=other)
     elif is_frame(other):
         return other.where(~where, other=then)
     elif is_frame(where):
         # This one is a bit tricky.
         # Everything except "where" are scalars.
         # To make the "df.where" function still usable
         # we create a temporary dataframe with the
         # properties of where (but the content of then).
         tmp = where.apply(lambda x: then, meta=(where.name, type(then)))
         return tmp.where(where, other=other)
     else:
         return then if where else other
Exemple #12
0
    def substring(self, s, start, length=None):
        """Attention: SQL starts counting at 1"""
        if start <= 0:
            start = 0
        else:
            start -= 1

        end = length + start if length else None
        if is_frame(s):
            return s.str.slice(start, end)

        if end:
            return s[start:end]
        else:
            return s[start:]
Exemple #13
0
    def overlay(self, s, replace, start, length=None):
        """Attention: SQL starts counting at 1"""
        if start <= 0:
            start = 0
        else:
            start -= 1

        if length is None:
            length = len(replace)
        end = length + start

        if is_frame(s):
            return s.str.slice_replace(start, end, replace)

        s = s[:start] + replace + s[end:]
        return s
Exemple #14
0
    def datetime_sub(self, *operands, rex=None):
        output_type = str(rex.getType())
        assert output_type.startswith("INTERVAL")
        interval_unit = output_type.split()[1].lower()

        subtraction_op = ReduceOperation(
            operation=operator.sub, unary_operation=lambda x: -x
        )
        intermediate_res = subtraction_op(*operands)

        # Special case output_type for datetime operations
        if interval_unit in {"year", "quarter", "month"}:
            # if interval_unit is INTERVAL YEAR, Calcite will covert to months
            if not is_frame(intermediate_res):
                # Numpy doesn't allow divsion by month time unit
                result = intermediate_res.astype("timedelta64[M]")
                # numpy -ve timedelta's are off by one vs sql when casted to month
                result = result + 1 if result < 0 else result
            else:
                result = intermediate_res / np.timedelta64(1, "M")
        else:
            result = intermediate_res.astype("timedelta64[ms]")

        return result
Exemple #15
0
    def regex(
        self,
        test: SeriesOrScalar,
        regex: str,
        escape: str = None,
    ) -> SeriesOrScalar:
        """
        Returns true, if the string test matches the given regex
        (maybe escaped by escape)
        """

        if not escape:
            escape = "\\"

        # Unfortunately, SQL's like syntax is not directly
        # a regular expression. We need to do some translation
        # SQL knows about the following wildcards:
        # %, ?, [], _, #
        transformed_regex = ""
        escaped = False
        in_char_range = False
        for char in regex:
            # Escape characters with "\"
            if escaped:
                char = "\\" + char
                escaped = False

            # Keep character ranges [...] as they are
            elif in_char_range:
                if char == "]":
                    in_char_range = False

            # These chars have a special meaning in regex
            # whereas in SQL they have not, so we need to
            # add additional escaping
            elif char in self.replacement_chars:
                char = "\\" + char

            elif char == "[":
                in_char_range = True

            # The needed "\" is printed above, so we continue
            elif char == escape:
                escaped = True
                continue

            # An unescaped "%" in SQL is a .*
            elif char == "%":
                char = ".*"

            # An unescaped "_" in SQL is a .
            elif char == "_":
                char = "."

            transformed_regex += char

        # the SQL like always goes over the full string
        transformed_regex = "^" + transformed_regex + "$"

        # Finally, apply the string
        if is_frame(test):
            return test.str.match(transformed_regex).astype("boolean")
        else:
            return bool(re.match(transformed_regex, test))
Exemple #16
0
def test_is_frame_for_none():
    assert not is_frame(None)
Exemple #17
0
def test_is_frame_for_number():
    assert not is_frame(3)
    assert not is_frame(3.5)
Exemple #18
0
    def apply(self, *operands):
        """Call the stored functions"""
        if is_frame(operands[0]):
            return self.tensor_f(*operands)

        return self.scalar_f(*operands)
Exemple #19
0
def test_is_frame_for_frame():
    df = dd.from_pandas(pd.DataFrame({"a": [1]}), npartitions=1)
    assert is_frame(df)