def test_dc_replacements(self): '''make sure we can read in various scenarios in dc_replacements using a test version of the replacement rules, and ensure any invalid format of rule is not included. ''' curr_dc = DataClean(replacements=self.tst_replacements) print('-------------------------') for k, v in curr_dc.replacements.items(): print('%s=%s' % (k, v)) print('--------------------------------------------------------') print('testing for %s, to be replaced with %s' % (k, v)) test_string = 'recording_7.wav %s Wow that is awesome' % k print('...test string >>%s' % test_string) #set contents prior to processing,typically users would not do this #would set contents at instance. doing this just for testing curr_dc.contents = test_string curr_dc.process_contents(show_results=False) print('...processed as>>%s' % curr_dc.contents.decode()) self.assertEqual( -1, curr_dc.contents.decode().find(k), '...seems the original token is still in the string') self.assertGreater(0, curr_dc.contents.decode().find(k), '...seems like the new token was not applied')
def test_dc_process_contents_clean_find_n_replace_symbols(self): '''ensure that we can replace at the multiple locations of symbols''' curr_replacements = self.dc.replacements for k, v in curr_replacements.items(): if k == 'SYMBOL_AS_SPACE': ignore_sym = ['[', ']', ','] for each in v: if each not in ignore_sym: print( '--------------------------------------------------------' ) print( 'testing for symbol "%s", to be removed (i.e. replaced with ' '' % each) test_string = 'recording_7.wav %s Wow %s%s%s that is awesome %s' % ( each, each, each, each, each) print('...test string >>%s' % test_string) tst_dc = DataClean(contents=test_string) tst_dc.process_contents(show_results=False) print('...processed as>>%s' % tst_dc.contents.decode()) self.assertEqual( -1, tst_dc.contents.decode().find(k), '...seems the original token is still in the string' ) self.assertGreater( 0, tst_dc.contents.decode().find(k), '...seems like the new token was not applied')
def test_dc_process_contents_clean_find_n_replace_end_of_string(self): '''ensure that we can replace at the end of the string''' curr_replacements = self.dc.replacements for k,v in curr_replacements.items(): if k != 'SYMBOL_AS_SPACE': print('--------------------------------------------------------') print('testing for %s, to be replaced with %s' % (k,v)) test_string = 'recording_7.wav Wow that is awesome %s' % k print('...test string >>%s' % test_string) tst_dc = DataClean(contents=test_string) tst_dc.process_contents(show_results=False) print('...processed as>>%s' % tst_dc.contents.decode()) self.assertEqual(-1,tst_dc.contents.decode().find(k),'...seems the original token is still in the string') self.assertGreater(0, tst_dc.contents.decode().find(k),'...seems like the new token was not applied')
def test_dc_datasource_data_to_castable(self): '''process everything and build a castable from results''' curr_dc = DataClean(conn=self.conn, contents_as_path=self.full_filename) response = curr_dc.process_contents(show_results=False) curr_dc.create_castable(response['results'], 'cool_cas', replace=True, promote=True)