예제 #1
0
    def ageCalculate(self, column, dateFormat, nameColAge):
        """
        This method compute the age of clients based on their born dates.
        :param  column      Name of the column born dates column.
        :param  dateFormat  String format date of the column provided.
        :param  nameColAge  Name of the new column, the new columns is the resulting column of ages.

        """
        # Check if column argument a string datatype:
        self.__assertTypeStr(column, "column")

        # Check if dateFormat argument a string datatype:
        self.__assertTypeStr(dateFormat, "dateFormat")

        # Asserting if column if in dataFrame:
        assert column in self.__df.columns, "Error: Column assigned in column argument does not exist in dataFrame"

        # Output format date
        Format = "yyyy-MM-dd"  # Some SimpleDateFormat string

        exprs = format_number(
            mag(
                months_between(
                    date_format(
                        unix_timestamp(column, dateFormat).cast("timestamp"),
                        Format), current_date()) / 12), 4).alias(nameColAge)

        self.__df = self.__df.withColumn(nameColAge, exprs)

        self.__addTransformation()  # checkpoint in case

        return self
예제 #2
0
    def ageCalculate(self, column, dateFormat, nameColAge):
        """
        This method compute the age of clients based on their born dates.
        :param  column      Name of the column born dates column.
        :param  dateFormat  String format date of the column provided.
        :param  nameColAge  Name of the new column, the new columns is the resulting column of ages.

        """
        # Check if column argument a string datatype:
        self.__assertTypeStr(column, "column")

        # Check if dateFormat argument a string datatype:
        self.__assertTypeStr(dateFormat, "dateFormat")

        # Asserting if column if in dataFrame:
        assert column in self.__df.columns, "Error: Column assigned in column argument does not exist in dataFrame"

        # Output format date
        Format = "yyyy-MM-dd"  # Some SimpleDateFormat string

        exprs = format_number(
            mag(
                months_between(date_format(
                    unix_timestamp(column, dateFormat).cast("timestamp"), Format), current_date()) / 12), 4).alias(
            nameColAge)

        self.__df = self.__df.withColumn(nameColAge, exprs)

        self.__addTransformation()  # checkpoint in case

        return self