Example #1
0
    def __init__(self,
                 df: pd.DataFrame,
                 mn: str,
                 un: str,
                 dim: str = 'col',
                 digits: int = 5):
        """
        Constructor / Initiate the class

        Parameters
        ----------
        df      : pandas.DataFrame
                  DataFrame used for analysis
        mn      : str
                  string with all the results from the multivariate normality tests
        un      : str
                  string with all the results from the univariate normality tests
        dim     : str
                  indicate whether one wants to test for normality along the columns 'col' or rows
                  'row', default is 'col'
        digits  : int
                  number of decimal places to round down

        """
        super().__init__(dim=dim, digits=digits)
        Assertor.evaluate_pd_dataframe(df)
        Assertor.evaluate_numeric_df(df)
        Assertor.evaluate_data_type({mn: str, un: str, dim: str, digits: int})

        self.df = df
        self.mn = mn
        self.un = un
        self.dim = dim
        self.digits = digits
Example #2
0
    def test_access_static_evaluate_pd_data_frame_method(self, invalid_object):
        """
        Test that it is possible to access the static evaluate_pd_dataframe() method without
        instantiating the Assertor class

        """
        with pt.raises(TypeError):
            Assertor.evaluate_pd_dataframe(invalid_object)
Example #3
0
    def __init__(self, df: pd.DataFrame):
        """
        Constructor / Initiate the class

        Parameters
        ----------
        df      : pandas.DataFrame
                  Dataframe for which one wants to test for normality

        """
        Assertor.evaluate_pd_dataframe(df)
        Assertor.evaluate_numeric_df(df)

        if np.prod(df.shape) < 400:
            raise ValueError(
                "pd.DataFrame must have at least 400 observations, i.e. (20 x 20) in order to "
                "conduct any meaningful normality tests, got {}".format(
                    df.shape))
        self.df = df
Example #4
0
    def __init__(self, df: pd.DataFrame = None):
        """
        Constructor / Initiate the class

        Parameters
        ----------
        df      : pandas.DataFrame
                  df to be analysed

        """
        if type(self) == NormalityTest:
            raise BaseClassCannotBeInstantiated(
                "base class '{}' cannot be instantiated".format(
                    self.__class__.__name__))

        Assertor.evaluate_pd_dataframe(df)
        r('if (!is.element("MVN", installed.packages()[,1])){ '
          'install.packages("MVN", dep = TRUE)}')
        self.df = numpy2ri.numpy2ri(np.array(df))
        gc.collect()
Example #5
0
    def __init__(self, df: pd.DataFrame, dim: str = 'col', digits: int = 5):
        """
        Constructor / Initiate the class

        Parameters
        ----------
        df      : pandas.DataFrame
                  Dataframe for which one wants to generate / test
        dim     : str
                  indicate whether one wants to test for normality along the columns 'col' or rows
                  'row', default is 'col'
        digits  : int
                  number of decimal places to round down

        """
        super().__init__(dim=dim, digits=digits)
        Assertor.evaluate_pd_dataframe(df)
        Assertor.evaluate_numeric_df(df)
        Assertor.evaluate_data_type({dim: str, digits: int})

        self.df = df
        self.dim = dim
        self.digits = digits
Example #6
0
    def to_excel(df: pd.DataFrame,
                 file_dir: str = "reports/xlsx",
                 header: bool = True,
                 index: bool = True):
        """
        Method that converts dataframe (df) to Excel

        Parameters
        ----------
        df      : pandas.DataFrame
                  dataframe to be converted into excel
        file_dir: str
                  directory to save the file
        header  : bool
                  Write out the column names
        index   : bool
                  Write row names

        """
        Assertor.evaluate_pd_dataframe(df)
        Assertor.evaluate_data_type({file_dir: str})

        local_time = datetime.datetime.now().isoformat().replace(":",
                                                                 "-").replace(
                                                                     ".", "-")
        filepath = os.path.join(file_dir,
                                "ExcelDataFrame_" + local_time + ".xlsx")

        try:
            if not os.path.exists(file_dir):
                os.makedirs(file_dir)
        except Exception as e:
            raise OSError("creation of dir " + file_dir + " failed with: " +
                          str(e))

        df.to_excel(filepath, header=header, index=index)