def create_anova_section( analysis_tracker: AnalysisTracker ) -> Union[Tuple[str, List[str]], None]: """Construct anova section content Args: analysis_tracker: (AnalysisTracker), holder for all the analysis Returns: Union[Tuple[str, List[str]], None], (section_content, List[warning]) """ warnings = [] # extract the anova analysis result # each pair of numerical and categorical attributes will have # one corresponding analysis anova_analysis = analysis_tracker.get_analysis( run_metadata_pb2.Analysis.Name.Name(run_metadata_pb2.Analysis.ANOVA)) if anova_analysis: table_content = utils.create_order_pair_metric_section( analysis_list=anova_analysis, same_match_value='NA') for analysis in anova_analysis: corr_check = recommendation.check_p_value(analysis) if corr_check: warnings.append(corr_check) if warnings: table_content = table_content + utils.create_warning_notes( warnings) return table_content, warnings return None
def create_pearson_correlation_section( analysis_tracker: AnalysisTracker, figure_base_path: str) -> Union[Tuple[str, List[str]], None]: """Construct correlation section content for numerical attributes Args: analysis_tracker: (AnalysisTracker), holder for all the analysis figure_base_path: (string), the folder for holding figures Returns: Union[Tuple[str, List[str]], None], (section_content, List[warining]) """ warnings = [] # extract the correlation analysis result # each pair of numerical attributes will have one corresponding analysis corr_analysis = analysis_tracker.get_analysis( run_metadata_pb2.Analysis.Name.Name( run_metadata_pb2.Analysis.PEARSON_CORRELATION)) if corr_analysis: table_content = utils.create_no_order_pair_metric_section( analysis_list=corr_analysis, same_match_value=1.0, table_name="Correlation", figure_base_path=figure_base_path) for analysis in corr_analysis: # correlation condition check corr_check = recommendation.check_pearson_correlation(analysis) if corr_check: warnings.append(corr_check) if warnings: table_content = table_content + utils.create_warning_notes( warnings) return table_content, warnings return None
def create_chi_square_section( analysis_tracker: AnalysisTracker, figure_base_path: Text = '' ) -> Union[Tuple[Text, List[Text]], Tuple[None, None]]: """Construct chi-square section content. If chi-square text is not performed, None will be returned. Args: analysis_tracker: (AnalysisTracker), holder for all the analysis figure_base_path: (string), not used, for signature consistence Returns: Union[Tuple[Text, List[Text]], Tuple[None, None]], (section_content, List[warning]) """ warnings = [] # extract the anova analysis result # each pair of categorical attributes will have # one corresponding analysis chi_square_analysis = analysis_tracker.get_analysis_by_name( Analysis.Name.Name(Analysis.CHI_SQUARE)) if chi_square_analysis: table_content = utils.create_no_order_pair_metric_section( analysis_list=chi_square_analysis, same_match_value='NA', figure_base_path='NA') for analysis in chi_square_analysis: corr_check = recommendation.check_p_value(analysis) if corr_check: warnings.append(corr_check) if warnings: table_content = table_content + utils.create_warning_notes( warnings) return table_content, warnings return None, None
def create_anova_section( analysis_tracker: AnalysisTracker, figure_base_path: Text = '' ) -> Union[Tuple[Text, List[Text]], Tuple[None, None]]: """Construct anova section content. If anova test is not performed, None will be returned. Args: analysis_tracker: (AnalysisTracker), holder for all the analysis figure_base_path: (string), the folder for holding figures Returns: Union[Tuple[Text, List[Text]], Tuple[None, None]], (section_content, List[warning]) """ warnings = [] # extract the anova analysis result # each pair of numerical and categorical attributes will have # one corresponding analysis anova_analysis = analysis_tracker.get_analysis_by_name( Analysis.Name.Name(Analysis.ANOVA)) if anova_analysis: table_content = utils.create_order_pair_metric_section( analysis_list=anova_analysis, same_match_value='NA') for analysis in anova_analysis: corr_check = recommendation.check_p_value(analysis) if corr_check: warnings.append(corr_check) if warnings: table_content = table_content + utils.create_warning_notes( warnings) return table_content, warnings return None, None
def create_descriptive_section(analysis_tracker: AnalysisTracker, figure_base_path: str) -> (str, List[str]): """Create descriptive section of the report Args: analysis_tracker: (AnalysisTracker) figure_base_path: (string), the folder for holding figures Returns: (str, List[str]), (section_content, List[warnings]) """ numerical_attributes = analysis_tracker.get_numerical_attributes() categorical_attributes = analysis_tracker.get_categorical_attributes() # holders for section content and warnings based on descriptive analysis contents = [] warnings = [] section_template = template.TABLE_DESCRIPTIVE_TEMPLATE for att in numerical_attributes: # base analysis is one holding basic descriptive statistics base_analysis = analysis_tracker.get_attribute_analysis( att, run_metadata_pb2.Analysis.Name.Name( run_metadata_pb2.Analysis.DESCRIPTIVE))[0] # additional analysis is one holding histogram for numerical attribute additional_analysis = analysis_tracker.get_attribute_analysis( att, run_metadata_pb2.Analysis.Name.Name( run_metadata_pb2.Analysis.HISTOGRAM))[0] contents.append( utils.create_table_descriptive_row_from_analysis( attribute_name=att, base_analysis=base_analysis, additional_analysis=additional_analysis, figure_base_path=figure_base_path)) # check missing value condition missing_check = recommendation.check_missing(att, base_analysis) if missing_check: warnings.append(missing_check) for att in categorical_attributes: # base analysis is one holding basic descriptive statistics base_analysis = analysis_tracker.get_attribute_analysis( att, run_metadata_pb2.Analysis.Name.Name( run_metadata_pb2.Analysis.DESCRIPTIVE))[0] # additional analysis is one holding value counts # for categorical attribute additional_analysis = analysis_tracker.get_attribute_analysis( att, run_metadata_pb2.Analysis.Name.Name( run_metadata_pb2.Analysis.VALUE_COUNTS))[0] contents.append( utils.create_table_descriptive_row_from_analysis( attribute_name=att, base_analysis=base_analysis, additional_analysis=additional_analysis, figure_base_path=figure_base_path)) # check missing value condition missing_check = recommendation.check_missing(att, base_analysis) if missing_check: warnings.append(missing_check) # check cardinality condition cardinality_check = recommendation.check_cardinality( att, base_analysis) if cardinality_check: warnings.append(cardinality_check) table_content = section_template.format(row_content=''.join(contents)) if warnings: table_content = table_content + utils.create_warning_notes(warnings) return table_content, warnings
def create_descriptive_section(analysis_tracker: AnalysisTracker, figure_base_path: Text) -> (Text, List[Text]): """Create descriptive section of the report. Checking based on the descriptive results will be performed, e.g., missing values and high cardinality. Args: analysis_tracker: (AnalysisTracker), holder for all the analysis figure_base_path: (string), the folder for holding figures Returns: Tuple[Text, List[Text]], (section_content, List[warnings]) """ numerical_attributes = analysis_tracker.get_num_attribute_names() categorical_attributes = analysis_tracker.get_cat_attribute_names() # holders for section content and warnings based on descriptive analysis contents = [] warnings = [] section_template = template.TABLE_DESCRIPTIVE_TEMPLATE for att in numerical_attributes: # base analysis is one holding basic descriptive statistics base_analysis = analysis_tracker.get_analysis_by_attribute_and_name( att, Analysis.Name.Name(Analysis.DESCRIPTIVE))[0] # additional analysis is one holding histogram for numerical attribute additional_analysis = analysis_tracker.get_analysis_by_attribute_and_name( att, Analysis.Name.Name(Analysis.HISTOGRAM))[0] contents.append( utils.create_table_descriptive_row_from_analysis( attribute_name=att, base_analysis=base_analysis, additional_analysis=additional_analysis, figure_base_path=figure_base_path)) # check missing value condition missing_check = recommendation.check_missing(att, base_analysis) if missing_check: warnings.append(missing_check) for att in categorical_attributes: # base analysis is one holding basic descriptive statistics base_analysis = analysis_tracker.get_analysis_by_attribute_and_name( att, Analysis.Name.Name(Analysis.DESCRIPTIVE))[0] # additional analysis is one holding value counts # for categorical attribute additional_analysis = analysis_tracker.get_analysis_by_attribute_and_name( att, Analysis.Name.Name(Analysis.VALUE_COUNTS))[0] contents.append( utils.create_table_descriptive_row_from_analysis( attribute_name=att, base_analysis=base_analysis, additional_analysis=additional_analysis, figure_base_path=figure_base_path)) # check missing value condition missing_check = recommendation.check_missing(att, base_analysis) if missing_check: warnings.append(missing_check) # check cardinality condition cardinality_check = recommendation.check_cardinality( att, base_analysis) if cardinality_check: warnings.append(cardinality_check) # finally all the descriptive analysis result will be organised in a table table_content = section_template.format(row_content=''.join(contents)) if warnings: table_content = table_content + utils.create_warning_notes(warnings) return table_content, warnings