コード例 #1
0
ファイル: services.py プロジェクト: Korulag/ipz
 def create_question(
     self,
     category: non_null(non_blank(str)),
     question: non_null(non_blank(str)),
     q_type: non_null(non_blank(str)) = 'range'
 ) -> Union[BooleanMarkQuestion, RangeMarkQuestion]:
     if q_type == 'range':
         return RangeMarkQuestionService.create(category, question)
     return BooleanMarkQuestionService.create(category, question)
コード例 #2
0
 def __init__(
         self,
         h3_resolutions: non_blank(List[int]),
         lat_column: non_blank(str),
         lng_column: non_blank(str),
 ):
     super().__init__()
     self.h3_resolutions = h3_resolutions
     self.lat_column = lat_column
     self.lng_column = lng_column
     self.stack_transform = None
コード例 #3
0
ファイル: services.py プロジェクト: Korulag/ipz
 def create(
     cls, name: non_null(non_blank(str)), surname: non_null(non_blank(str)),
     position: non_null(non_blank(str)), level: non_null(non_blank(str)),
     email: non_null(non_blank(str)), pay: non_negative(non_null(int))
 ) -> User:
     validate_email(email)
     return cls.model.objects.create(name=name,
                                     surname=surname,
                                     position=position,
                                     hire_date=date.today(),
                                     level=level,
                                     email=email,
                                     pay=pay)
コード例 #4
0
ファイル: feature.py プロジェクト: zuston/butterfree
 def __init__(
     self,
     name: non_blank(str),
     description: non_blank(str),
     dtype: non_blank(DataType) = None,
     from_column: non_blank(str) = None,
     transformation: non_null(TransformComponent) = None,
 ) -> None:
     self.name = name
     self.description = description
     self.transformation = transformation
     self.dtype = dtype
     self.from_column = from_column
コード例 #5
0
def foo(
    a: non_blank(),
    b: non_null(),
    c: non_empty(),
    d: no_whitespaces(),
    e: non_negative(),
    f: strongly_typed(List),
):
    return a, b, c, d, e, f
コード例 #6
0
ファイル: services.py プロジェクト: Korulag/ipz
 def create(cls, category: non_null(non_blank(str)),
            question: non_null(non_blank(str))):
     return cls.model.objects.create(question_string=question,
                                     category=category)
コード例 #7
0
def forward_fill(
    dataframe: DataFrame,
    partition_by: non_blank(Union[str, List[str]]),
    order_by: non_blank(Union[str, List[str]]),
    fill_column: non_blank(str),
    filled_column: non_blank(str) = None,
):
    """Applies a forward fill to a single column.

    Filling null values with the last known non-null value, leaving leading nulls alone.

    Attributes:
        dataframe: dataframe to be transformed.
        partition_by: list of columns' names to be used as partition for the operation.
        order_by: list of columns' names to be used when sorting column values.
        fill_column: column to be forward filled.
        filled_column: new column name. Optional. When none, operation will be inplace.

    Example:
        >>> dataframe.orderBy("ts", "sensor_type", "location").show()
        +-----------+-------------------+--------+-----------+
        |sensor_type|                 ts|location|temperature|
        +-----------+-------------------+--------+-----------+
        |          1|2017-09-09 12:00:00|   shade|   18.83018|
        |          1|2017-09-09 12:00:00|     sun|       null|
        |          2|2017-09-09 12:00:00|   shade|   18.61258|
        |          2|2017-09-09 12:00:00|     sun|    25.4986|
        |          1|2017-09-09 13:00:00|   shade|   18.78458|
        |          1|2017-09-09 13:00:00|     sun|   25.68457|
        |          2|2017-09-09 13:00:00|   shade|       null|
        |          2|2017-09-09 13:00:00|     sun|       null|
        |          1|2017-09-09 14:00:00|   shade|   17.98115|
        |          1|2017-09-09 14:00:00|     sun|   24.15754|
        |          2|2017-09-09 14:00:00|   shade|   18.61258|
        |          2|2017-09-09 14:00:00|     sun|       null|
        +-----------+-------------------+--------+-----------+
        >>> filled_df = forward_fill(
        ...     dataframe,
        ...     ["sensor_type", "location"],
        ...     "ts",
        ...     "temperature",
        ...     "temperature_filled"
        ... )
        >>> filled_df.orderBy("ts", "sensor_type", "location").show()
        +-----------+-------------------+--------+-----------+------------------+
        |sensor_type|                 ts|location|temperature|temperature_filled|
        +-----------+-------------------+--------+-----------+------------------+
        |          1|2017-09-09 12:00:00|   shade|   18.83018|          18.83018|
        |          1|2017-09-09 12:00:00|     sun|       null|              null|
        |          2|2017-09-09 12:00:00|   shade|   18.61258|          18.61258|
        |          2|2017-09-09 12:00:00|     sun|    25.4986|           25.4986|
        |          1|2017-09-09 13:00:00|   shade|   18.78458|          18.78458|
        |          1|2017-09-09 13:00:00|     sun|   25.68457|          25.68457|
        |          2|2017-09-09 13:00:00|   shade|       null|          18.61258|
        |          2|2017-09-09 13:00:00|     sun|       null|           25.4986|
        |          1|2017-09-09 14:00:00|   shade|   17.98115|          17.98115|
        |          1|2017-09-09 14:00:00|     sun|   24.15754|          24.15754|
        |          2|2017-09-09 14:00:00|   shade|   18.61258|          18.61258|
        |          2|2017-09-09 14:00:00|     sun|       null|           25.4986|
        +-----------+-------------------+--------+-----------+------------------+
        >>> # inplace forward fill
        >>> filled_df = forward_fill(
        ...     dataframe,
        ...     ["sensor_type", "location"],
        ...     "ts",
        ...     "temperature"
        ... )
        >>> filled_df.orderBy("ts", "sensor_type", "location").show()
        +-----------+-------------------+--------+-----------+
        |sensor_type|                 ts|location|temperature|
        +-----------+-------------------+--------+-----------+
        |          1|2017-09-09 12:00:00|   shade|   18.83018|
        |          1|2017-09-09 12:00:00|     sun|       null|
        |          2|2017-09-09 12:00:00|   shade|   18.61258|
        |          2|2017-09-09 12:00:00|     sun|    25.4986|
        |          1|2017-09-09 13:00:00|   shade|   18.78458|
        |          1|2017-09-09 13:00:00|     sun|   25.68457|
        |          2|2017-09-09 13:00:00|   shade|   18.61258|
        |          2|2017-09-09 13:00:00|     sun|    25.4986|
        |          1|2017-09-09 14:00:00|   shade|   17.98115|
        |          1|2017-09-09 14:00:00|     sun|   24.15754|
        |          2|2017-09-09 14:00:00|   shade|   18.61258|
        |          2|2017-09-09 14:00:00|     sun|    25.4986|
        +-----------+-------------------+--------+-----------+
    """
    window = (
        Window.partitionBy(partition_by).orderBy(order_by).rowsBetween(-sys.maxsize, 0)
    )

    return dataframe.withColumn(
        filled_column or fill_column,
        functions.last(dataframe[fill_column], ignorenulls=True).over(window),
    )
 def positional_args_validations_method(self, a: non_blank(str), b, *,
                                        c: dict, d):
     pass
コード例 #9
0
 def __init__(
     self, expression: non_blank(str),
 ):
     super().__init__()
     self.expression = expression
コード例 #10
0
 def guinea_pig(s: no_whitespaces(non_blank(str)) = default_value):
     return s
コード例 #11
0
def pivot(
    dataframe: DataFrame,
    group_by_columns: non_blank(List[str]),
    pivot_column: non_blank(str),
    agg_column: non_blank(str),
    aggregation: non_null(Callable),
    mock_value: non_null(object) = None,
    mock_type: non_null(object) = None,
    with_forward_fill: non_null(bool) = False,
):
    """Defines a pivot transformation.

    Attributes:
        dataframe: dataframe to be pivoted.
        group_by_columns: list of columns' names to be grouped.
        pivot_column: column to be pivoted.
        agg_column: column to be aggregated by pivoted category.
        aggregation: desired spark aggregation function to be performed.
            An example: spark_agg(col_name). See docs for all spark_agg:
            https://spark.apache.org/docs/2.3.1/api/python/_modules/pyspark/sql/functions.html
        mock_value: value used to make a difference between true nulls resulting from
            the aggregation and empty values from the pivot transformation.
        mock_type: mock_value data type (compatible with spark).
        with_forward_fill: applies a forward fill to null values after the pivot
            operation.

    Example:
        >>> dataframe.orderBy("ts", "id", "amenity").show()
        +---+---+-------+-----+
        | id| ts|amenity|  has|
        +---+---+-------+-----+
        |  1|  1| fridge|false|
        |  1|  1|   oven| true|
        |  1|  1|   pool|false|
        |  2|  2|balcony|false|
        |  1|  3|balcony| null|
        |  1|  4|   oven| null|
        |  1|  4|   pool| true|
        |  1|  5|balcony| true|
        +---+---+-------+-----+
        >>> pivoted = pivot(dataframe, ["id", "ts"], "amenity", "has", functions.first)
        >>> pivoted.orderBy("ts", "id").show()
        +---+---+-------+------+----+-----+
        | id| ts|balcony|fridge|oven| pool|
        +---+---+-------+------+----+-----+
        |  1|  1|   null| false|true|false|
        |  2|  2|  false|  null|null| null|
        |  1|  3|   null|  null|null| null|
        |  1|  4|   null|  null|null| true|
        |  1|  5|   true|  null|null| null|
        +---+---+-------+------+----+-----+

        But, sometimes, you would like to keep the last values that some feature has
        assumed from previous modifications. In this example, amenity "oven" for the
        id=1 was set to null and "pool" was set to true at ts=4. All other amenities
        should then be kept to their actual state at that ts. To do that, we will use
        a technique called forward fill:

        >>> pivoted = pivot(
        ...     dataframe,
        ...     ["id", "ts"],
        ...     "amenity",
        ...     "has",
        ...     functions.first,
        ...     with_forward_fill=True
        ...)
        >>> pivoted.orderBy("ts", "id").show()
        +---+---+-------+------+----+-----+
        | id| ts|balcony|fridge|oven| pool|
        +---+---+-------+------+----+-----+
        |  1|  1|   null| false|true|false|
        |  2|  2|  false|  null|null| null|
        |  1|  3|   null| false|true|false|
        |  1|  4|   null| false|true| true|
        |  1|  5|   true| false|true| true|
        +---+---+-------+------+----+-----+

        Great! Now every amenity that didn't have been changed kept it's state. BUT,
        the force change to null for amenity "oven" on id=1 at ts=4 was ignored during
        forward fill. If the user wants to respect this change, it must provide a mock
        value and type to be used as a signal for "true nulls". In other words, we want
        to forward fill only nulls that were created by the pivot transformation.

        In this example, amenities only assume boolean values. So there is no mock
        values for a boolean. It is only true or false. So users can give a mock value
        of another type (for which the column can be cast to). Check this out:

        >>> pivoted = pivot(
        ...     dataframe,
        ...     ["id", "ts"],
        ...     "amenity",
        ...     "has",
        ...     functions.first,
        ...     with_forward_fill=True,
        ...     mock_value=-1,
        ...     mock_type="int"
        ...)
        >>> pivoted.orderBy("ts", "id").show()
        +---+---+-------+------+----+-----+
        | id| ts|balcony|fridge|oven| pool|
        +---+---+-------+------+----+-----+
        |  1|  1|   null| false|true|false|
        |  2|  2|  false|  null|null| null|
        |  1|  3|   null| false|true|false|
        |  1|  4|   null| false|null| true|
        |  1|  5|   true| false|null| true|
        +---+---+-------+------+----+-----+

        During transformation, this method will cast the agg_column to mock_type
        data type and fill all "true nulls" with the mock_value. After pivot and forward
        fill are applied, all new pivoted columns will then return to the original type
        with all mock values replaced by null.
    """
    agg_column_type = None
    if mock_value is not None:
        if mock_type is None:
            raise AttributeError(
                "When proving a mock value, users must inform the data type,"
                " which should be supported by Spark.")
        agg_column_type = dict(dataframe.dtypes).get(agg_column)
        dataframe = dataframe.withColumn(
            agg_column,
            functions.col(agg_column).cast(mock_type)).fillna(
                {agg_column: mock_value})

    pivoted = (dataframe.groupBy(*group_by_columns).pivot(pivot_column).agg(
        aggregation(agg_column)))

    new_columns = [c for c in pivoted.columns if c not in group_by_columns]

    if with_forward_fill:
        for c in new_columns:
            pivoted = forward_fill(
                dataframe=pivoted,
                partition_by=group_by_columns[:-1],
                order_by=group_by_columns[-1],
                fill_column=c,
            )

    if mock_value is not None:
        for c in new_columns:
            pivoted = pivoted.withColumn(
                c,
                functions.when(
                    functions.col(c) != mock_value,
                    functions.col(c)).cast(agg_column_type),
            )
    return pivoted
def args_validations_function(a: non_blank(str), b, *, c: non_empty(dict), d):
    pass
コード例 #13
0
 def __init__(self, func: non_blank(callable), data_type: non_blank(DataType)):
     self.func = func
     self.data_type = data_type
コード例 #14
0
def bar(arg: non_blank(str)):
    pass
コード例 #15
0
 def __init__(self, functions: non_blank(List[Function])):
     super().__init__()
     self.functions = functions
     self._windows = []
 def args_validations_staticmethod(
         a: non_blank(str), b, *, c: non_empty(dict), d):
     pass
 def args_validations_method(self, a: non_blank(str), b, *,
                             c: non_empty(dict), d):
     pass
 def positional_args_validations_staticmethod(
         a: non_blank(str), b, *, c: dict, d):
     pass
コード例 #19
0
 def __init__(self,
              functions: non_blank(List[Function]),
              filter_expression: str = None):
     super(AggregatedTransform, self).__init__()
     self.functions = functions
     self.filter_expression = filter_expression
def positional_args_validations_function(a: non_blank(str), b, *, c: dict, d):
    pass