def test_stats_gen_with_dataframe_invalid_njobs_negative(self): records, _, _ = self._get_csv_test(delimiter=',', with_header=True) input_data_path = self._write_records_to_csv(records, self._get_temp_dir(), 'input_data.csv') dataframe = pd.read_csv(input_data_path) with self.assertRaisesRegexp( ValueError, 'Invalid n_jobs parameter.*'): _ = stats_gen_lib.generate_statistics_from_dataframe( dataframe=dataframe, stats_options=self._default_stats_options, n_jobs=-2)
def test_stats_gen_with_dataframe(self): records, _, expected_result = self._get_csv_test(delimiter=',', with_header=True) input_data_path = self._write_records_to_csv(records, self._get_temp_dir(), 'input_data.csv') dataframe = pd.read_csv(input_data_path) result = stats_gen_lib.generate_statistics_from_dataframe( dataframe=dataframe, stats_options=self._default_stats_options) self.assertLen(result.datasets, 1) test_util.assert_dataset_feature_stats_proto_equal( self, result.datasets[0], expected_result.datasets[0])
def test_stats_gen_with_dataframe_feature_allowlist(self): records, _, expected_result = self._get_csv_test(delimiter=',', with_header=True) input_data_path = self._write_records_to_csv(records, self._get_temp_dir(), 'input_data.csv') dataframe = pd.read_csv(input_data_path) stats_options_allowlist = self._default_stats_options stats_options_allowlist.feature_allowlist = list(dataframe.columns) dataframe['to_be_removed_column'] = [ [1, 2], [], None, [1], None, [3, 4], [], None] result = stats_gen_lib.generate_statistics_from_dataframe( dataframe=dataframe, stats_options=stats_options_allowlist, n_jobs=1) self.assertLen(result.datasets, 1) test_util.assert_dataset_feature_stats_proto_equal( self, result.datasets[0], expected_result.datasets[0])