def test_convert_data_csv(self):
        input_file = self._write_csv(self._create_example_csv())
        output_file = util.convert_comments_data(input_file)

        # Remove the quotes around identity terms list that read_csv injects.
        df = pd.read_csv(output_file).replace("'", '', regex=True)

        expected_df = pd.DataFrame()
        expected_df = expected_df.append(
            {
                'comment_text':
                'comment 1',
                'toxicity':
                0.0,
                'gender': [],
                'sexual_orientation': ['bisexual'],
                'race': ['other_race_or_ethnicity'],
                'religion': ['atheist', 'other_religion'],
                'disability': [
                    'physical_disability',
                    'intellectual_or_learning_disability',
                    'psychiatric_or_mental_illness', 'other_disability'
                ]
            },
            ignore_index=True)

        self.assertEqual(df.reset_index(drop=True, inplace=True),
                         expected_df.reset_index(drop=True, inplace=True))
    def test_convert_data_tfrecord(self):
        input_file = self._write_tf_records(self._create_example_tfrecord())
        output_file = util.convert_comments_data(input_file)
        output_example_list = []
        for serialized in tf.data.TFRecordDataset(filenames=[output_file]):
            output_example = tf.train.Example()
            output_example.ParseFromString(serialized.numpy())
            output_example_list.append(output_example)

        self.assertEqual(len(output_example_list), 1)
        self.assertEqual(
            output_example_list[0],
            text_format.Parse(
                """
        features {
          feature { key: "comment_text"
                    value { bytes_list {value: [ "comment 1" ] }}
                  }
          feature { key: "toxicity" value { float_list { value: [ 0.0 ] }}}
          feature { key: "sexual_orientation"
                    value { bytes_list { value: ["bisexual"] }}
                  }
          feature { key: "gender" value { bytes_list { }}}
          feature { key: "race"
                    value { bytes_list { value: [ "other_race_or_ethnicity" ] }}
                  }
          feature { key: "religion"
                    value { bytes_list {
                      value: [  "atheist", "other_religion" ] }
                    }
                  }
          feature { key: "disability" value { bytes_list {
                    value: [
                      "physical_disability",
                      "intellectual_or_learning_disability",
                      "psychiatric_or_mental_illness",
                      "other_disability"] }}
                  }
        }
        """, tf.train.Example()))