Esempio n. 1
0
    def __init__(self,
                 dim: str = 'col',
                 digits: int = 5,
                 seed: int = 90210,
                 size: (tuple, int) = (30, 30)):
        """
        Constructor / Initiate the class

        Parameters
        ----------
        dim     : str
                  indicate whether one wants to test for normality along the columns 'col' or rows
                  'row', default is 'col'
        digits  : int
                  number of decimal places to round down
        seed    : int
                  User can set a seed parameter to generate deterministic, non-random output
        size    : tuple of integers, integer
                  dimensions or range of numbers in generated df, default is (30, 30)

        """
        if type(self) == Generator:
            raise BaseClassCannotBeInstantiated(
                "base class '{}' cannot be instantiated".format(
                    self.__class__.__name__))

        if any(dim < 0 for dim in size):
            raise ValueError("dimensions in size cannot be negative")

        Assertor.evaluate_data_type({dim: str, digits: int, seed: int})
        self.dim = dim
        self.digits = digits
        self.seed = seed
        self.size = size
Esempio n. 2
0
    def normality_report(self,
                         file_dir: str = "reports/txt",
                         dim: str = 'col',
                         digits: int = 5,
                         ds: bool = False):
        """
        Method that prints a report containing the results of the Normality tests

        Parameters
        ----------
        file_dir    : str
                      directory to save the file
        dim         : str
                      indicate whether one wants to test for normality along the columns
                      'col' or rows 'row', default is 'col'
        digits      : int
                      number of decimal places to round down
        ds          : bool
                      indicating if one wants additional table with descriptive
                      statistics of the data

        """
        Assertor.evaluate_data_type({
            file_dir: str,
            dim: str,
            digits: int,
            ds: bool
        })

        try:
            if not os.path.exists(file_dir):
                os.makedirs(file_dir)
        except Exception as e:
            raise OSError("creation of dir " + file_dir + " failed with: " +
                          str(e))

        local_time = datetime.datetime.now().isoformat().replace(":",
                                                                 "-").replace(
                                                                     ".", "-")
        file = open(
            os.path.join(file_dir, "NormalityReport_" + local_time + ".txt"),
            "w")
        summary, mn, un = self.result_summary(dim=dim, digits=digits)
        figlet = Figlet(font="slant")
        title = figlet.renderText("normb")

        if ds:
            file.write(title)
            file.write('Version: ' + __version__ + '\n' '\n')
            file.write(summary + '\n')
            file.write(mn + '\n')
            file.write(un + '\n')
            file.write(self.descriptive_statistics(dim, digits))
        else:
            file.write(title)
            file.write('Version: ' + __version__ + '\n' '\n')
            file.write(summary + '\n')
            file.write(mn + '\n')
            file.write(un + '\n')
        file.close()
Esempio n. 3
0
    def uniform_data_frame(self, limits: tuple = (-1, 1), excel: bool = False):
        """
        Method that produces a df containing uniformly distributed floating point values between
        'limits' and of dimensions defined in 'size' argument.

        Parameters
        ----------
        limits  : tuple
                  (lower, upper) limit of values to be generated in df
        excel   : bool
                  indicating if one wants to output to excel

        Returns
        -------
        Out     : pandas.DataFrame
                  n x 1 (if size is integer) or n x m (if size is tuple) dimensional df

        """
        np.random.seed(self.seed)
        Assertor.evaluate_data_type({limits: tuple})

        lower, upper = limits
        df = pd.DataFrame(np.random.uniform(lower, upper, self.size))

        if excel:
            self.to_excel(df)
        return df
Esempio n. 4
0
    def normal_data_frame(self,
                          mu: (int, float) = 0,
                          sigma: (int, float) = 1,
                          excel: bool = False):
        """
        Method that produces a df containing normally distributed floating point values with mean
        equal 'mu' and st.dev equal 'sigma' and dimensions defined by 'size'.

        Parameters
        ----------
        mu      : int, float
                  mean value
        sigma   : int, float
                  standard deviation
        excel   : bool
                  indicating if one wants to output to excel

        Returns
        -------
        Out     : pandas.DataFrame
                  n x 1 (if size is integer) or n x m (if size is tuple) dimensional df

        """
        np.random.seed(self.seed)
        Assertor.evaluate_data_type({mu: int, sigma: int})

        df = pd.DataFrame(np.random.normal(mu, sigma, self.size))

        if excel:
            self.to_excel(df)
        return df
Esempio n. 5
0
    def count_astrix(string: str):
        """
        Count the number of statistical tests that passed based on the astrix notation

        Parameters
        ----------
        string  : str
                  string with results

        Returns
        -------
        Out     : int
                  number of statistical tests that have passed

        """
        Assertor.evaluate_data_type({string: str})

        count = 0
        temp = []
        for char in string + ' ':
            if char != '*':
                if not temp:
                    continue
                else:
                    count += 1
                    temp = []
            else:
                temp.append(char)
        return count
Esempio n. 6
0
    def astrix(p_value: float):
        """
        Method for producing correct astrix notation given a p-value

        Parameters
        ----------
        p_value   : float
                    p-value to be looked-up
        Returns
        -------
        Out     : string
                  correct astrix notation

        """
        Assertor.evaluate_data_type({p_value: float})

        sign_limit = [
            0.0001,
            0.001,
            0.01,
            0.05,
        ]
        sign_stars = ['****', '***', '**', '*', '']
        return "{}{}".format(p_value,
                             sign_stars[bisect_left(sign_limit, p_value)])
Esempio n. 7
0
    def test_access_static_evaluate_pd_data_frame_method(self, invalid_object):
        """
        Test that it is possible to access the static evaluate_pd_dataframe() method without
        instantiating the Assertor class

        """
        with pt.raises(TypeError):
            Assertor.evaluate_pd_dataframe(invalid_object)
Esempio n. 8
0
    def test_access_static_evaluate_data_type_method(self, invalid_object):
        """
        Test that it is possible to access the static evaluate_data_type() method without
        instantiating the Assertor class

        """
        valid_type = list
        with pt.raises(TypeError):
            Assertor.evaluate_data_type({invalid_object: valid_type})
Esempio n. 9
0
    def __init__(self, seed: int, size: (tuple, int)):
        """
        Initiates the class

        Parameters
        ----------
        seed    : int
                  User can set a seed parameter to generate deterministic, non-random output
        size    : tuple of integers, int
                  dimensions or range of numbers in generated df, default is (30, 30)

        """
        Assertor.evaluate_data_type({seed: int, size: tuple})
        super().__init__(seed=seed, size=size)
Esempio n. 10
0
    def test_assertor_cannot_be_instantiated(self):
        """
        Test that the base class (Assertor) cannot be instantiated, i.e. an
        BaseClassCannotBeInstantiated exception is thrown

        """
        with pt.raises(BaseClassCannotBeInstantiated):
            Assertor()
Esempio n. 11
0
    def __init__(self, df: pd.DataFrame):
        """
        Constructor / Initiate the class

        Parameters
        ----------
        df      : pandas.DataFrame
                  Dataframe for which one wants to test for normality

        """
        Assertor.evaluate_pd_dataframe(df)
        Assertor.evaluate_numeric_df(df)

        if np.prod(df.shape) < 400:
            raise ValueError(
                "pd.DataFrame must have at least 400 observations, i.e. (20 x 20) in order to "
                "conduct any meaningful normality tests, got {}".format(
                    df.shape))
        self.df = df
Esempio n. 12
0
    def __init__(self, df: pd.DataFrame = None):
        """
        Constructor / Initiate the class

        Parameters
        ----------
        df      : pandas.DataFrame
                  df to be analysed

        """
        if type(self) == NormalityTest:
            raise BaseClassCannotBeInstantiated(
                "base class '{}' cannot be instantiated".format(
                    self.__class__.__name__))

        Assertor.evaluate_pd_dataframe(df)
        r('if (!is.element("MVN", installed.packages()[,1])){ '
          'install.packages("MVN", dep = TRUE)}')
        self.df = numpy2ri.numpy2ri(np.array(df))
        gc.collect()
Esempio n. 13
0
    def __init__(self,
                 df: pd.DataFrame,
                 mn: str,
                 un: str,
                 dim: str = 'col',
                 digits: int = 5):
        """
        Constructor / Initiate the class

        Parameters
        ----------
        df      : pandas.DataFrame
                  DataFrame used for analysis
        mn      : str
                  string with all the results from the multivariate normality tests
        un      : str
                  string with all the results from the univariate normality tests
        dim     : str
                  indicate whether one wants to test for normality along the columns 'col' or rows
                  'row', default is 'col'
        digits  : int
                  number of decimal places to round down

        """
        super().__init__(dim=dim, digits=digits)
        Assertor.evaluate_pd_dataframe(df)
        Assertor.evaluate_numeric_df(df)
        Assertor.evaluate_data_type({mn: str, un: str, dim: str, digits: int})

        self.df = df
        self.mn = mn
        self.un = un
        self.dim = dim
        self.digits = digits
Esempio n. 14
0
    def to_excel(df: pd.DataFrame,
                 file_dir: str = "reports/xlsx",
                 header: bool = True,
                 index: bool = True):
        """
        Method that converts dataframe (df) to Excel

        Parameters
        ----------
        df      : pandas.DataFrame
                  dataframe to be converted into excel
        file_dir: str
                  directory to save the file
        header  : bool
                  Write out the column names
        index   : bool
                  Write row names

        """
        Assertor.evaluate_pd_dataframe(df)
        Assertor.evaluate_data_type({file_dir: str})

        local_time = datetime.datetime.now().isoformat().replace(":",
                                                                 "-").replace(
                                                                     ".", "-")
        filepath = os.path.join(file_dir,
                                "ExcelDataFrame_" + local_time + ".xlsx")

        try:
            if not os.path.exists(file_dir):
                os.makedirs(file_dir)
        except Exception as e:
            raise OSError("creation of dir " + file_dir + " failed with: " +
                          str(e))

        df.to_excel(filepath, header=header, index=index)
Esempio n. 15
0
    def mixed_data_frame(self,
                         mu: (int, float) = 0,
                         sigma: (int, float) = 1,
                         limits: tuple = (-1, 1),
                         excel: bool = False):
        """
        Generates a df with an equal mix of uniformly and normally distributed values.

        Parameters
        ----------
        mu      : integer, float
                  mean value
        sigma   : integer, float
                  standard deviation
        limits  : tuple
                  (lower, upper) limit of values to be generated in df
        excel   : bool
                  indicating if one wants to output to excel

        Returns
        -------
        Out     : pandas.DataFrame
                  n x 1 (if size is integer) or n x m (if size is tuple) dimensional df

        """
        np.random.seed(self.seed)
        Assertor.evaluate_data_type({mu: int, sigma: int, limits: tuple})

        original_df = self.uniform_data_frame(limits)
        mixed_df = original_df.append(self.normal_data_frame(mu, sigma),
                                      ignore_index=True)
        df = mixed_df.apply(np.random.permutation).head(self.size[0])

        if excel:
            self.to_excel(df)
        return df
Esempio n. 16
0
    def __init__(self, df: pd.DataFrame, dim: str = 'col', digits: int = 5):
        """
        Constructor / Initiate the class

        Parameters
        ----------
        df      : pandas.DataFrame
                  Dataframe for which one wants to generate / test
        dim     : str
                  indicate whether one wants to test for normality along the columns 'col' or rows
                  'row', default is 'col'
        digits  : int
                  number of decimal places to round down

        """
        super().__init__(dim=dim, digits=digits)
        Assertor.evaluate_pd_dataframe(df)
        Assertor.evaluate_numeric_df(df)
        Assertor.evaluate_data_type({dim: str, digits: int})

        self.df = df
        self.dim = dim
        self.digits = digits