def __init__(self, dim: str = 'col', digits: int = 5, seed: int = 90210, size: (tuple, int) = (30, 30)): """ Constructor / Initiate the class Parameters ---------- dim : str indicate whether one wants to test for normality along the columns 'col' or rows 'row', default is 'col' digits : int number of decimal places to round down seed : int User can set a seed parameter to generate deterministic, non-random output size : tuple of integers, integer dimensions or range of numbers in generated df, default is (30, 30) """ if type(self) == Generator: raise BaseClassCannotBeInstantiated( "base class '{}' cannot be instantiated".format( self.__class__.__name__)) if any(dim < 0 for dim in size): raise ValueError("dimensions in size cannot be negative") Assertor.evaluate_data_type({dim: str, digits: int, seed: int}) self.dim = dim self.digits = digits self.seed = seed self.size = size
def normality_report(self, file_dir: str = "reports/txt", dim: str = 'col', digits: int = 5, ds: bool = False): """ Method that prints a report containing the results of the Normality tests Parameters ---------- file_dir : str directory to save the file dim : str indicate whether one wants to test for normality along the columns 'col' or rows 'row', default is 'col' digits : int number of decimal places to round down ds : bool indicating if one wants additional table with descriptive statistics of the data """ Assertor.evaluate_data_type({ file_dir: str, dim: str, digits: int, ds: bool }) try: if not os.path.exists(file_dir): os.makedirs(file_dir) except Exception as e: raise OSError("creation of dir " + file_dir + " failed with: " + str(e)) local_time = datetime.datetime.now().isoformat().replace(":", "-").replace( ".", "-") file = open( os.path.join(file_dir, "NormalityReport_" + local_time + ".txt"), "w") summary, mn, un = self.result_summary(dim=dim, digits=digits) figlet = Figlet(font="slant") title = figlet.renderText("normb") if ds: file.write(title) file.write('Version: ' + __version__ + '\n' '\n') file.write(summary + '\n') file.write(mn + '\n') file.write(un + '\n') file.write(self.descriptive_statistics(dim, digits)) else: file.write(title) file.write('Version: ' + __version__ + '\n' '\n') file.write(summary + '\n') file.write(mn + '\n') file.write(un + '\n') file.close()
def uniform_data_frame(self, limits: tuple = (-1, 1), excel: bool = False): """ Method that produces a df containing uniformly distributed floating point values between 'limits' and of dimensions defined in 'size' argument. Parameters ---------- limits : tuple (lower, upper) limit of values to be generated in df excel : bool indicating if one wants to output to excel Returns ------- Out : pandas.DataFrame n x 1 (if size is integer) or n x m (if size is tuple) dimensional df """ np.random.seed(self.seed) Assertor.evaluate_data_type({limits: tuple}) lower, upper = limits df = pd.DataFrame(np.random.uniform(lower, upper, self.size)) if excel: self.to_excel(df) return df
def normal_data_frame(self, mu: (int, float) = 0, sigma: (int, float) = 1, excel: bool = False): """ Method that produces a df containing normally distributed floating point values with mean equal 'mu' and st.dev equal 'sigma' and dimensions defined by 'size'. Parameters ---------- mu : int, float mean value sigma : int, float standard deviation excel : bool indicating if one wants to output to excel Returns ------- Out : pandas.DataFrame n x 1 (if size is integer) or n x m (if size is tuple) dimensional df """ np.random.seed(self.seed) Assertor.evaluate_data_type({mu: int, sigma: int}) df = pd.DataFrame(np.random.normal(mu, sigma, self.size)) if excel: self.to_excel(df) return df
def count_astrix(string: str): """ Count the number of statistical tests that passed based on the astrix notation Parameters ---------- string : str string with results Returns ------- Out : int number of statistical tests that have passed """ Assertor.evaluate_data_type({string: str}) count = 0 temp = [] for char in string + ' ': if char != '*': if not temp: continue else: count += 1 temp = [] else: temp.append(char) return count
def astrix(p_value: float): """ Method for producing correct astrix notation given a p-value Parameters ---------- p_value : float p-value to be looked-up Returns ------- Out : string correct astrix notation """ Assertor.evaluate_data_type({p_value: float}) sign_limit = [ 0.0001, 0.001, 0.01, 0.05, ] sign_stars = ['****', '***', '**', '*', ''] return "{}{}".format(p_value, sign_stars[bisect_left(sign_limit, p_value)])
def test_access_static_evaluate_pd_data_frame_method(self, invalid_object): """ Test that it is possible to access the static evaluate_pd_dataframe() method without instantiating the Assertor class """ with pt.raises(TypeError): Assertor.evaluate_pd_dataframe(invalid_object)
def test_access_static_evaluate_data_type_method(self, invalid_object): """ Test that it is possible to access the static evaluate_data_type() method without instantiating the Assertor class """ valid_type = list with pt.raises(TypeError): Assertor.evaluate_data_type({invalid_object: valid_type})
def __init__(self, seed: int, size: (tuple, int)): """ Initiates the class Parameters ---------- seed : int User can set a seed parameter to generate deterministic, non-random output size : tuple of integers, int dimensions or range of numbers in generated df, default is (30, 30) """ Assertor.evaluate_data_type({seed: int, size: tuple}) super().__init__(seed=seed, size=size)
def test_assertor_cannot_be_instantiated(self): """ Test that the base class (Assertor) cannot be instantiated, i.e. an BaseClassCannotBeInstantiated exception is thrown """ with pt.raises(BaseClassCannotBeInstantiated): Assertor()
def __init__(self, df: pd.DataFrame): """ Constructor / Initiate the class Parameters ---------- df : pandas.DataFrame Dataframe for which one wants to test for normality """ Assertor.evaluate_pd_dataframe(df) Assertor.evaluate_numeric_df(df) if np.prod(df.shape) < 400: raise ValueError( "pd.DataFrame must have at least 400 observations, i.e. (20 x 20) in order to " "conduct any meaningful normality tests, got {}".format( df.shape)) self.df = df
def __init__(self, df: pd.DataFrame = None): """ Constructor / Initiate the class Parameters ---------- df : pandas.DataFrame df to be analysed """ if type(self) == NormalityTest: raise BaseClassCannotBeInstantiated( "base class '{}' cannot be instantiated".format( self.__class__.__name__)) Assertor.evaluate_pd_dataframe(df) r('if (!is.element("MVN", installed.packages()[,1])){ ' 'install.packages("MVN", dep = TRUE)}') self.df = numpy2ri.numpy2ri(np.array(df)) gc.collect()
def __init__(self, df: pd.DataFrame, mn: str, un: str, dim: str = 'col', digits: int = 5): """ Constructor / Initiate the class Parameters ---------- df : pandas.DataFrame DataFrame used for analysis mn : str string with all the results from the multivariate normality tests un : str string with all the results from the univariate normality tests dim : str indicate whether one wants to test for normality along the columns 'col' or rows 'row', default is 'col' digits : int number of decimal places to round down """ super().__init__(dim=dim, digits=digits) Assertor.evaluate_pd_dataframe(df) Assertor.evaluate_numeric_df(df) Assertor.evaluate_data_type({mn: str, un: str, dim: str, digits: int}) self.df = df self.mn = mn self.un = un self.dim = dim self.digits = digits
def to_excel(df: pd.DataFrame, file_dir: str = "reports/xlsx", header: bool = True, index: bool = True): """ Method that converts dataframe (df) to Excel Parameters ---------- df : pandas.DataFrame dataframe to be converted into excel file_dir: str directory to save the file header : bool Write out the column names index : bool Write row names """ Assertor.evaluate_pd_dataframe(df) Assertor.evaluate_data_type({file_dir: str}) local_time = datetime.datetime.now().isoformat().replace(":", "-").replace( ".", "-") filepath = os.path.join(file_dir, "ExcelDataFrame_" + local_time + ".xlsx") try: if not os.path.exists(file_dir): os.makedirs(file_dir) except Exception as e: raise OSError("creation of dir " + file_dir + " failed with: " + str(e)) df.to_excel(filepath, header=header, index=index)
def mixed_data_frame(self, mu: (int, float) = 0, sigma: (int, float) = 1, limits: tuple = (-1, 1), excel: bool = False): """ Generates a df with an equal mix of uniformly and normally distributed values. Parameters ---------- mu : integer, float mean value sigma : integer, float standard deviation limits : tuple (lower, upper) limit of values to be generated in df excel : bool indicating if one wants to output to excel Returns ------- Out : pandas.DataFrame n x 1 (if size is integer) or n x m (if size is tuple) dimensional df """ np.random.seed(self.seed) Assertor.evaluate_data_type({mu: int, sigma: int, limits: tuple}) original_df = self.uniform_data_frame(limits) mixed_df = original_df.append(self.normal_data_frame(mu, sigma), ignore_index=True) df = mixed_df.apply(np.random.permutation).head(self.size[0]) if excel: self.to_excel(df) return df
def __init__(self, df: pd.DataFrame, dim: str = 'col', digits: int = 5): """ Constructor / Initiate the class Parameters ---------- df : pandas.DataFrame Dataframe for which one wants to generate / test dim : str indicate whether one wants to test for normality along the columns 'col' or rows 'row', default is 'col' digits : int number of decimal places to round down """ super().__init__(dim=dim, digits=digits) Assertor.evaluate_pd_dataframe(df) Assertor.evaluate_numeric_df(df) Assertor.evaluate_data_type({dim: str, digits: int}) self.df = df self.dim = dim self.digits = digits