def test_given_a_text_column_when_profiler_using_parallel_then_profiled_dataset_is_returned(): # given source_dataframe = create_source_dataframe() csv_filename = f'{EXPECTED_DATA_PATH}/expected_profiled_dataframe.csv' expected_dataframe = pd.read_csv(csv_filename) # when: using default method (joblib Parallel) for parallelisation actual_dataframe = apply_text_profiling(source_dataframe, "text") # then assert_frame_equal(expected_dataframe, actual_dataframe, check_like=True)
def test_given_a_text_column_when_profiler_is_applied_then_profiled_dataset_is_returned( ): # given source_dataframe = create_source_dataframe() csv_filename = f'{EXPECTED_DATA_PATH}/expected_profiled_dataframe.csv' expected_dataframe = pd.read_csv(csv_filename) # when actual_dataframe = apply_text_profiling(source_dataframe, "text") # then assert_frame_equal(expected_dataframe, actual_dataframe, check_like=True)
def test_given_a_text_column_when_profiler_is_applied_without_granular_analysis_then_profiled_dataset_is_returned(): # given source_dataframe = create_source_dataframe() csv_filename = f'{EXPECTED_DATA_PATH}/expected_profiled_dataframe_no_granular.csv' expected_dataframe = pd.read_csv(csv_filename) # when actual_dataframe = apply_text_profiling( source_dataframe, "text", {GRANULAR_OPTION: False} ) # then assert_frame_equal(expected_dataframe, actual_dataframe, check_like=True)
def test_given_a_text_column_when_profiler_is_applied_without_high_level_analysis_then_profiled_dataset_is_returned(): # given source_dataframe = create_source_dataframe() csv_filename = f'{EXPECTED_DATA_PATH}/expected_profiled_dataframe_no_high_level.csv' expected_dataframe = pd.read_csv(csv_filename) # when actual_dataframe = apply_text_profiling( source_dataframe, "text", {HIGH_LEVEL_OPTION: False, SPELLING_CHECK_OPTION: False} ) # then assert_frame_equal(expected_dataframe, actual_dataframe, check_like=True)
def test_given_a_text_column_when_profiler_using_swifter_then_profiled_dataset_is_returned(): # given source_dataframe = create_source_dataframe() csv_filename = f'{EXPECTED_DATA_PATH}/expected_profiled_dataframe.csv' expected_dataframe = pd.read_csv(csv_filename) # when: using swifter method for parallelisation actual_dataframe = apply_text_profiling( source_dataframe, "text", params={PARALLELISATION_METHOD_OPTION: SWIFTER_METHOD} ) # then assert_frame_equal(expected_dataframe, actual_dataframe, check_like=True)
def test_given_a_text_column_when_profiler_is_applied_with_then_all_options_disabled_then_no_profiled_dataset_is_returned(): # given source_dataframe = create_source_dataframe() expected_dataframe = source_dataframe.copy() # when actual_dataframe = apply_text_profiling( source_dataframe, "text", {HIGH_LEVEL_OPTION: False, SPELLING_CHECK_OPTION: False, GRANULAR_OPTION: False} ) # then assert_frame_equal(actual_dataframe, expected_dataframe, check_like=True) # source dataframe is returned
def test_given_a_text_column_when_profiler_is_applied_grammar_check_analysis_then_profiled_dataset_is_returned( ): # given source_dataframe = create_source_dataframe() csv_filename = f'{EXPECTED_DATA_PATH}/expected_profiled_dataframe_grammar_check.csv' expected_dataframe = pd.read_csv(csv_filename) # when: in the interest of time, only perform grammar check # other tests are covering for high_level and granular functionality actual_dataframe = apply_text_profiling(source_dataframe, "text", { 'high_level': False, 'granular': False, 'grammar_check': True }) actual_dataframe.to_csv(csv_filename, index=False) # then assert_equal(expected_dataframe, actual_dataframe)
def test_given_a_text_column_when_profiler_is_applied_grammar_check_analysis_then_profiled_dataset_is_returned( ): # given source_dataframe = create_source_dataframe() csv_filename = f'{EXPECTED_DATA_PATH}/expected_profiled_dataframe_grammar_check.csv' expected_dataframe = pd.read_csv(csv_filename) # when: in the interest of time, only perform grammar check # other tests are covering for high_level and granular functionality actual_dataframe = apply_text_profiling( source_dataframe, "text", { HIGH_LEVEL_OPTION: False, SPELLING_CHECK_OPTION: False, GRANULAR_OPTION: False, GRAMMAR_CHECK_OPTION: True }) # then assert_equal(expected_dataframe, actual_dataframe)