def test_generate_summary(self):
        """Various test cases for summarizer.generate_summary."""
        summarizer_simple = Summarizer(self.simple_dataframe, self.config)
        output_df_simple = summarizer_simple.generate_summary()
        self.assertEqual(len(output_df_simple), 3)
        self.assertIn(1, output_df_simple['Size'].values)
        self.assertIn('test', output_df_simple['Text'].values)
        self.assertIn('test2', output_df_simple['Text'].values)
        self.assertIn('test3', output_df_simple['Text'].values)
        self.assertIn('', output_df_simple['ClassLines'].values)

        summarizer_stack_lines = Summarizer(self.stack_lines_dataframe,
                                            self.config)
        output_df_stack_lines = summarizer_stack_lines.generate_summary()
        self.assertIn('some.class.java',
                      output_df_stack_lines['ClassLines'].values)
        self.assertIn('some.class2.java',
                      output_df_stack_lines['ClassLines'].values)
        self.assertIn('some.class3.java',
                      output_df_stack_lines['ClassLines'].values)
        self.assertIn('', output_df_stack_lines['Text'].values)

        summarizer_multi_cluster = Summarizer(self.multi_cluster_dataframe,
                                              self.config)
        output_df_multi_cluster = summarizer_multi_cluster.generate_summary()
        self.assertEqual(len(output_df_multi_cluster), 2)
        self.assertIn(2, output_df_multi_cluster['Size'].values)
        self.assertIn(1, output_df_multi_cluster['Size'].values)
Example #2
0
def run_classification_summary(df, classifier_config):
    """Runs the various classification algorithms outputting a summary dataframe.

  Args:
    df: pandas dataframe containing the error information we wish to classify and summarize

    classifier_config: config_pb2 proto specified by the configuration file

  Returns:
    pandas dataframe that summarizes the information obtained from the classification algorithms
      run on the input dataframe
  """
    # Running our classifiers
    error_code_matcher = ErrorCodeMatcher(df, classifier_config)
    error_code_matcher.match_informative_errors()
    k_means_classifier = KMeansClusterer(df, classifier_config)
    k_means_classifier.cluster_errors()

    # Running the summarizer
    summarizer = Summarizer(df, classifier_config)
    return summarizer.generate_summary()