Beispiel #1
0
    def Gis_DataWithLabel(self, labels: Union[str, Sequence]) -> None:
        """
        The setter for the GIS with label feature property.

        The label argument is either a data type, for which all features are selected or a str/list of labels of features, which are kept

        :param labels: indicates the labels for the GisDataWithLabel object
        :type labels:  Union[str, Sequence]
        :return:
        """
        data = NameSelector(self._gis_features).fit_transform(self.input)
        label_transformer = (TypeSelector(labels) if labels in self.__dtypes
                             else NameSelector(labels))
        label_data = label_transformer.fit_transform(self.input)
        result = pd.concat([data, label_data], axis=1)
        self._data_grouped = GisAnalyzerWithClusterLabel(
            result, self._gis_features[0], self._gis_features[1], labels)
Beispiel #2
0
    def String_Data(self):
        """
        A property which initializes a String Data Analyzer object. Takes all string columns and uses them in a Categorical Analyzer object.

        :return: a Categorical Analyzer with all string features
        :rtype: CategoricalAnalyzer
        """
        if self.len_string_features:
            val = NameSelector(self.string_features).fit_transform(self.input)
            log.info(
                f"Created string DataFrame of size {self.len_string_features} with the following columns: {self.string_features}"
            )
            return CategoricalAnalyzer(val)
Beispiel #3
0
    def Numerical_Data(self) -> NumericalAnalyzer:
        """
        Selects all numerical values and uses those as input values to create a Numerical_Data processor object.

        :return: a numerical data analyzer
        """
        if self.len_numerical_features:
            val = NameSelector(self.numerical_features).fit_transform(
                self.input)
            log.info(
                f"Setting numerical DataFrame of size {self.len_numerical_features} with the following columns: {self.numerical_features}"
            )
            return NumericalAnalyzer(val)
Beispiel #4
0
    def Date_By_Data(self, dtype: Union[float, int, str, Union]) -> None:
        """
        A setter for the Data by date. It takes a column type and checks if the date_features are not empty.
        It then creates a Data by Date object for the provided data type.

        The provided argument can either be a data type defined in the class or a str/list of columns which are found in the DataFrame

        :param dtype: the data for which data shall be analyzed along the date column
        :type dtype: one of the classes defined data types
        :return: a date_by_Analyzer object
        """
        if self.len_date_features:
            date_data = NameSelector(self.date_features).fit_transform(
                self.input)
            x_transformer = (TypeSelector(dtype) if dtype in self.__dtypes else
                             NameSelector(dtype))
            x_data = x_transformer.fit_transform(self.input)
            result = pd.concat([date_data, x_data], axis=1)
            log.info(
                f"Created Date DataFrame of size {self.len_date_features} with the following columns: {self.date_features}"
            )
            self._date_by_data = DateByAnalyzer(result, list(x_data.columns),
                                                self.date_features)
Beispiel #5
0
    def Boolean_Data(self) -> CategoricalAnalyzer:
        """
        A property which initializes a Categorical Analyzer for boolean values.
        Takes all boolean columns and uses them in a Categorical Analyzer object.

        :return: A Categorical Analyzer Object containing boolean values
        :rtype: CategoricalAnalyzer
        """
        if self.len_bool_features:
            val = NameSelector(self.bool_features).fit_transform(self.input)
            log.info(
                f"Created boolean DataFrame of size {self.len_bool_features} with the following columns: {self.bool_features}"
            )
            return CategoricalAnalyzer(val)
Beispiel #6
0
    def Categorical_Data(self) -> CategoricalAnalyzer:
        """
        A property which initializes a Categorical Analyzer for categorical values.
        Takes all categorical features and uses them in a Categorical Analyzer object.

        :return: A Categorical Data Analyzer
        :rtype: CategoricalAnalyzer
        """
        if self.len_categorical_features:
            val = NameSelector(self.categorical_features).fit_transform(
                self.input)
            log.info(
                f"Created categorical DataFrame of size {self.len_categorical_features} with the following columns: {self.categorical_features}"
            )
            return CategoricalAnalyzer(val)
Beispiel #7
0
    def GIS_Data(self) -> GisAnalyzer:
        """
        A property which initializes a GIS Analyzer by taking the first and second entry of the gis feature list.

        :return:
        """
        if self.len_gis_features >= 2:
            data = NameSelector(self._gis_features).fit_transform(self.input)
            log.info(
                f"Setting the GIS DataFrame of size {len(data)} with {self._gis_features[0]} as the latitude column and {self._gis_features[1]} as the longitude column."
                f"Note that you can reset them if necessary")
            return GisAnalyzer(data, self._gis_features[0],
                               self._gis_features[1])
        else:
            log.error(
                "No features have been defined as GIS data. You first need to set them using gis_features"
            )
Beispiel #8
0
    def Data_Mixed_Types(self, dtype: Union[Sequence, str]):
        """
        Setter property for the data mixed types. It initializes a Data Mixed Tye Object. There are several ways to set it:

            1) as a tuple: in this case, for both arguments are made a check if the provided argument is a data type. If yes, all columns of this data type
            are provided. If not, all columns found in the respective arguments are kept. Example to mix receive a Mixed Data Type for all boolean columns
            on the one side and the columns "income" and "revenue" on the other side:

            > # Assume an ``explorer`` object named "explorer"
            >  explorer.Data_Mixed_Types = explorer.dtype_bool, ["income", "revenue"]

            Or to get all boolean and numeric types:

            > # Assume an ``explorer`` object named "explorer"
            >  explorer.Data_Mixed_Types = explorer.dtype_bool, explorer.dtype_num

            2) as a string: in this case, it is assumed, that the first column type is numerical, i.e. it is the default value. The second column can be a string that
            indicates either all columns of a given data type if the parameter is a valid data type or a single column. For example, to gain all numerical
            features by the boolean column "Male":

            > # Assume an ``explorer`` object named "explorer"
            >  explorer.Data_Mixed_Types = "Male"

            Or

            > # Assume an ``explorer`` object named "explorer"
            >  explorer.Data_Mixed_Types = bool

            3) as a list: in this case, it is assumed, that the first column type is numerical, i.e. it is the default value. The list indicates a list of features
            to be kept as second feature dimension. Example to mix receive a Mixed Data Type for all numerical columns on the one side and the columns "gender" and "married"
            on the other side:

            > # Assume an ``explorer`` object named "explorer"
            >  explorer.Data_Mixed_Types = ["gender", "married"]

        :param dtype: As described in the description, different ways exist to initialize call this object
        :type dtype: Union[Sequence, str]
        :return: a Data Mixed Type object
        """
        # first case, the provided argument is a tuple. the two entries are taken as input for the respective data type/ data column names
        if isinstance(dtype, tuple):
            log.info(
                f'Received the following two inputs "{dtype[0]}" and "{dtype[1]}". Deriving the data frame to be generated out of it. '
            )
            subset_1, subset_2 = dtype[0], dtype[1]
            data_1 = (TypeSelector(subset_1)
                      if subset_1 in self.__dtypes else NameSelector(subset_1))
            df_1 = data_1.fit_transform(self.input)
            data_2 = (TypeSelector(subset_2)
                      if subset_2 in self.__dtypes else NameSelector(subset_2))
            df_2 = data_2.fit_transform(self.input)
            result = pd.concat([df_1, df_2], axis=1)
            self._data_grouped = MixedDataAnalyser(result, list(df_1.columns),
                                                   list(df_2.columns))

        # second case: the provided argument is a string. The derived object is the column name indicated by the string. The second dimension is numerical features
        elif isinstance(dtype, str):
            log.info(
                f"Received a single input. The first data type dimension is automatically set to numerical. "
                f'The second is derived from the input "{dtype}"')
            num = TypeSelector(self.dtype_num)
            group = (TypeSelector(dtype)
                     if dtype in self.__dtypes else NameSelector(dtype))
            data_num = num.fit_transform(self.input)
            data_grouped = group.fit_transform(self.input)
            result = pd.concat([data_num, data_grouped], axis=1)
            self._data_grouped = MixedDataAnalyser(result,
                                                   self.numerical_features,
                                                   list(data_grouped.columns))

        # third case: the provided argument is a list: the entries in the list are the feature which are kept. The second dimension is numerical features.
        elif isinstance(dtype, list):
            log.info(
                f"Received a single input containing a list of features. The first data type dimension is automatically set to numerical. "
                f'The second is derived from the input "{dtype}"')
            num = TypeSelector(self.dtype_num)
            group = NameSelector(dtype)
            data_num = num.fit_transform(self.input)
            data_grouped = group.fit_transform(self.input)
            result = pd.concat([data_num, data_grouped], axis=1)
            self._data_grouped = MixedDataAnalyser(result,
                                                   self.numerical_features,
                                                   list(data_grouped.columns))
            # grouper = NameSelector(dtype)

        log.info(
            f"Setting Grouped DataFrame of size {len(self._data_grouped.data)} where the first feature dimension contains columns"
            f": {list(self._data_grouped.nums)} and the second dimension {list(self._data_grouped.group)}"
        )