Ejemplo n.º 1
0
 def create_question(
     self,
     category: non_null(non_blank(str)),
     question: non_null(non_blank(str)),
     q_type: non_null(non_blank(str)) = 'range'
 ) -> Union[BooleanMarkQuestion, RangeMarkQuestion]:
     if q_type == 'range':
         return RangeMarkQuestionService.create(category, question)
     return BooleanMarkQuestionService.create(category, question)
Ejemplo n.º 2
0
 def create(cls, employee_id: non_negative(non_null(int))) -> model:
     feedback = (Feedback.objects.select_related(
         'user', 'personal_satisfaction', 'team_satisfaction',
         'company_satisfaction').order_by('-id').filter(
             user_id=employee_id).first())
     # TODO: add report generation logic
     return
def foo(
    a: non_blank(),
    b: non_null(),
    c: non_empty(),
    d: no_whitespaces(),
    e: non_negative(),
    f: strongly_typed(List),
):
    return a, b, c, d, e, f
Ejemplo n.º 4
0
 def __init__(
     self,
     name: non_blank(str),
     description: non_blank(str),
     dtype: non_blank(DataType) = None,
     from_column: non_blank(str) = None,
     transformation: non_null(TransformComponent) = None,
 ) -> None:
     self.name = name
     self.description = description
     self.transformation = transformation
     self.dtype = dtype
     self.from_column = from_column
Ejemplo n.º 5
0
 def create(
     cls, name: non_null(non_blank(str)), surname: non_null(non_blank(str)),
     position: non_null(non_blank(str)), level: non_null(non_blank(str)),
     email: non_null(non_blank(str)), pay: non_negative(non_null(int))
 ) -> User:
     validate_email(email)
     return cls.model.objects.create(name=name,
                                     surname=surname,
                                     position=position,
                                     hire_date=date.today(),
                                     level=level,
                                     email=email,
                                     pay=pay)
Ejemplo n.º 6
0
def pivot(
    dataframe: DataFrame,
    group_by_columns: non_blank(List[str]),
    pivot_column: non_blank(str),
    agg_column: non_blank(str),
    aggregation: non_null(Callable),
    mock_value: non_null(object) = None,
    mock_type: non_null(object) = None,
    with_forward_fill: non_null(bool) = False,
):
    """Defines a pivot transformation.

    Attributes:
        dataframe: dataframe to be pivoted.
        group_by_columns: list of columns' names to be grouped.
        pivot_column: column to be pivoted.
        agg_column: column to be aggregated by pivoted category.
        aggregation: desired spark aggregation function to be performed.
            An example: spark_agg(col_name). See docs for all spark_agg:
            https://spark.apache.org/docs/2.3.1/api/python/_modules/pyspark/sql/functions.html
        mock_value: value used to make a difference between true nulls resulting from
            the aggregation and empty values from the pivot transformation.
        mock_type: mock_value data type (compatible with spark).
        with_forward_fill: applies a forward fill to null values after the pivot
            operation.

    Example:
        >>> dataframe.orderBy("ts", "id", "amenity").show()
        +---+---+-------+-----+
        | id| ts|amenity|  has|
        +---+---+-------+-----+
        |  1|  1| fridge|false|
        |  1|  1|   oven| true|
        |  1|  1|   pool|false|
        |  2|  2|balcony|false|
        |  1|  3|balcony| null|
        |  1|  4|   oven| null|
        |  1|  4|   pool| true|
        |  1|  5|balcony| true|
        +---+---+-------+-----+
        >>> pivoted = pivot(dataframe, ["id", "ts"], "amenity", "has", functions.first)
        >>> pivoted.orderBy("ts", "id").show()
        +---+---+-------+------+----+-----+
        | id| ts|balcony|fridge|oven| pool|
        +---+---+-------+------+----+-----+
        |  1|  1|   null| false|true|false|
        |  2|  2|  false|  null|null| null|
        |  1|  3|   null|  null|null| null|
        |  1|  4|   null|  null|null| true|
        |  1|  5|   true|  null|null| null|
        +---+---+-------+------+----+-----+

        But, sometimes, you would like to keep the last values that some feature has
        assumed from previous modifications. In this example, amenity "oven" for the
        id=1 was set to null and "pool" was set to true at ts=4. All other amenities
        should then be kept to their actual state at that ts. To do that, we will use
        a technique called forward fill:

        >>> pivoted = pivot(
        ...     dataframe,
        ...     ["id", "ts"],
        ...     "amenity",
        ...     "has",
        ...     functions.first,
        ...     with_forward_fill=True
        ...)
        >>> pivoted.orderBy("ts", "id").show()
        +---+---+-------+------+----+-----+
        | id| ts|balcony|fridge|oven| pool|
        +---+---+-------+------+----+-----+
        |  1|  1|   null| false|true|false|
        |  2|  2|  false|  null|null| null|
        |  1|  3|   null| false|true|false|
        |  1|  4|   null| false|true| true|
        |  1|  5|   true| false|true| true|
        +---+---+-------+------+----+-----+

        Great! Now every amenity that didn't have been changed kept it's state. BUT,
        the force change to null for amenity "oven" on id=1 at ts=4 was ignored during
        forward fill. If the user wants to respect this change, it must provide a mock
        value and type to be used as a signal for "true nulls". In other words, we want
        to forward fill only nulls that were created by the pivot transformation.

        In this example, amenities only assume boolean values. So there is no mock
        values for a boolean. It is only true or false. So users can give a mock value
        of another type (for which the column can be cast to). Check this out:

        >>> pivoted = pivot(
        ...     dataframe,
        ...     ["id", "ts"],
        ...     "amenity",
        ...     "has",
        ...     functions.first,
        ...     with_forward_fill=True,
        ...     mock_value=-1,
        ...     mock_type="int"
        ...)
        >>> pivoted.orderBy("ts", "id").show()
        +---+---+-------+------+----+-----+
        | id| ts|balcony|fridge|oven| pool|
        +---+---+-------+------+----+-----+
        |  1|  1|   null| false|true|false|
        |  2|  2|  false|  null|null| null|
        |  1|  3|   null| false|true|false|
        |  1|  4|   null| false|null| true|
        |  1|  5|   true| false|null| true|
        +---+---+-------+------+----+-----+

        During transformation, this method will cast the agg_column to mock_type
        data type and fill all "true nulls" with the mock_value. After pivot and forward
        fill are applied, all new pivoted columns will then return to the original type
        with all mock values replaced by null.
    """
    agg_column_type = None
    if mock_value is not None:
        if mock_type is None:
            raise AttributeError(
                "When proving a mock value, users must inform the data type,"
                " which should be supported by Spark.")
        agg_column_type = dict(dataframe.dtypes).get(agg_column)
        dataframe = dataframe.withColumn(
            agg_column,
            functions.col(agg_column).cast(mock_type)).fillna(
                {agg_column: mock_value})

    pivoted = (dataframe.groupBy(*group_by_columns).pivot(pivot_column).agg(
        aggregation(agg_column)))

    new_columns = [c for c in pivoted.columns if c not in group_by_columns]

    if with_forward_fill:
        for c in new_columns:
            pivoted = forward_fill(
                dataframe=pivoted,
                partition_by=group_by_columns[:-1],
                order_by=group_by_columns[-1],
                fill_column=c,
            )

    if mock_value is not None:
        for c in new_columns:
            pivoted = pivoted.withColumn(
                c,
                functions.when(
                    functions.col(c) != mock_value,
                    functions.col(c)).cast(agg_column_type),
            )
    return pivoted
Ejemplo n.º 7
0
 def create(cls, category: non_null(non_blank(str)),
            question: non_null(non_blank(str))):
     return cls.model.objects.create(question_string=question,
                                     category=category)
Ejemplo n.º 8
0
 def guinea_pig(front: str, back: non_null(str)):
     result = None
     if front:
         result = front + '-' + back
     return result