def _descriptive_result_to_proto(
      analysis_result: Dict[str, Dict[str, float]],
      attribute_name_map: Dict[str, analysis_entity_pb2.Attribute]
  ) -> Iterator[analysis_entity_pb2.Analysis]:
    """helper function to convert the descriptive analysis result to
    multiple analysis_entity_pb2.Analysis instances

    Args:
        analysis_result: (Dict[str, Dict[str, float]]), dictionary storing
        the analysis result
        attribute_name_map: (Dict[str, analysis_entity_pb2.Attribute]), mapping
        between name and Attribute instance

    Returns:
        Iterator[analysis_entity_pb2.Analysis]
    """
    for attribute_name in analysis_result:
      metric_names = []
      metric_values = []
      for metric in analysis_result[attribute_name]:
        metric_names.append(
            analysis_entity_pb2.ScalarMetric.Name.Value(metric))
        metric_values.append(analysis_result[attribute_name][metric])

      yield utils.create_analysis_proto_from_scalar_metrics(
          analysis_name=analysis_entity_pb2.Analysis.DESCRIPTIVE,
          attributes=[attribute_name_map[attribute_name]],
          metric_names=metric_names,
          metric_values=metric_values
      )
    def _run_single_chi_square(
        self, categorical_attribute_one: run_metadata_pb2.Attribute,
        categorical_attribute_two: run_metadata_pb2.Attribute
    ) -> run_metadata_pb2.Analysis:
        """Run a chi-square test

    Args:
        categorical_attribute_one: (run_metadata_pb2.Attribute)
        categorical_attribute_two: (run_metadata_pb2.Attribute)

    Returns:
        run_metadata_pb2.Analysis
    """
        categorical_column_one = categorical_attribute_one.name
        categorical_column_two = categorical_attribute_two.name

        chi_square_df = self._data_extractor.extract_categorical_aggregation(
            categorical_columns=[
                categorical_column_one, categorical_column_two
            ])
        p_value = self._data_analyzer.chi_square(chi_square_df)
        # pylint: disable-msg=logging-format-interpolation
        logging.info(
            'P-value for {cat_one} and {cat_two} is {p_value} under Chi-square test'
            .format(cat_one=categorical_column_one,
                    cat_two=categorical_column_two,
                    p_value=p_value))
        return utils.create_analysis_proto_from_scalar_metrics(
            run_metadata_pb2.Analysis.CHI_SQUARE,
            [categorical_attribute_one, categorical_attribute_two],
            [run_metadata_pb2.ScalarMetric.P_VALUE], [p_value])
    def run_pearson_correlation(self, ) -> List[run_metadata_pb2.Analysis]:
        """Compute pearson correlation for numerical attributes

    Args:

    Returns:
        List[run_metadata_pb2.Analysis]
    """
        name_proto_dict = {
            item.name: item
            for item in self._data_def.numerical_attributes
        }

        corr_df = self._data_extractor.extract_pearson_correlation_data(
            numerical_columns=name_proto_dict.keys())
        corrs = self._data_analyzer.pearson_correlation(corr_df)
        # pylint: disable-msg=logging-format-interpolation
        logging.info(
            'The correlations are: {corr_result}'.format(corr_result=corrs))

        analysis = []
        for item in corrs:
            numerical_one, numerical_two = item.split('_vs_')
            analysis.append(
                utils.create_analysis_proto_from_scalar_metrics(
                    run_metadata_pb2.Analysis.PEARSON_CORRELATION, [
                        name_proto_dict[numerical_one],
                        name_proto_dict[numerical_two]
                    ], [run_metadata_pb2.ScalarMetric.CORRELATION_COEFFICIENT],
                    [corrs[item]]))
        return analysis
    def _run_single_anova(
        self, categorical_attribute: run_metadata_pb2.Attribute,
        numerical_attribute: run_metadata_pb2.Attribute
    ) -> run_metadata_pb2.Analysis:
        """Run an anova test.

    Args:
        categorical_attribute: (run_metadata_pb2.Attribute)
        numerical_attribute: (run_metadata_pb2.Attribute)

    Returns:
        run_metadata_pb2.Analysis
    """
        categorical_column = categorical_attribute.name
        numerical_column = numerical_attribute.name

        anova_df = self._data_extractor.extract_anova_data(
            categorical_column, numerical_column)
        f_stat = self._data_analyzer.anova_one_way(anova_df)
        # pylint: disable-msg=logging-format-interpolation
        logging.info(
            'P-value for {cat} and {numeric} is {f_statistic} under ANOVA test'
            .format(cat=categorical_column,
                    numeric=numerical_column,
                    f_statistic=f_stat))
        return utils.create_analysis_proto_from_scalar_metrics(
            run_metadata_pb2.Analysis.ANOVA,
            [categorical_attribute, numerical_attribute],
            [run_metadata_pb2.ScalarMetric.F_STATISTIC], [f_stat])
    def _run_single_information_gain(
        self, categorical_attribute_one: run_metadata_pb2.Attribute,
        categorical_attribute_two: run_metadata_pb2.Attribute
    ) -> run_metadata_pb2.Analysis:
        """Run a chi-square test

    Args:
        categorical_attribute_one: (run_metadata_pb2.Attribute)
        categorical_attribute_two: (run_metadata_pb2.Attribute)

    Returns:
        run_metadata_pb2.Analysis
    """
        categorical_column_one = categorical_attribute_one.name
        categorical_column_two = categorical_attribute_two.name
        ig_df = self._data_extractor.extract_categorical_aggregation(
            categorical_columns=[
                categorical_column_one, categorical_column_two
            ])
        igain = self._data_analyzer.information_gain(ig_df)
        # pylint: disable-msg=logging-format-interpolation
        logging.info(
            'Information gain for {cat_one} and {cat_two} is {value}'.format(
                cat_one=categorical_column_one,
                cat_two=categorical_column_two,
                value=igain))
        return utils.create_analysis_proto_from_scalar_metrics(
            run_metadata_pb2.Analysis.INFORMATION_GAIN,
            [categorical_attribute_one, categorical_attribute_two],
            [run_metadata_pb2.ScalarMetric.INFORMATION_GAIN], [igain])