Example #1
0
 def test_dc_replacements(self):
     '''make sure we can read in various scenarios in dc_replacements
     
     using a test version of the replacement rules, and ensure any 
     invalid format of rule is not included.
     '''
     curr_dc = DataClean(replacements=self.tst_replacements)
     print('-------------------------')
     for k, v in curr_dc.replacements.items():
         print('%s=%s' % (k, v))
         print('--------------------------------------------------------')
         print('testing for %s, to be replaced with %s' % (k, v))
         test_string = 'recording_7.wav %s Wow that is awesome' % k
         print('...test string >>%s' % test_string)
         #set contents prior to processing,typically users would not do this
         #would set contents at instance. doing this just for testing
         curr_dc.contents = test_string
         curr_dc.process_contents(show_results=False)
         print('...processed as>>%s' % curr_dc.contents.decode())
         self.assertEqual(
             -1,
             curr_dc.contents.decode().find(k),
             '...seems the original token is still in the string')
         self.assertGreater(0,
                            curr_dc.contents.decode().find(k),
                            '...seems like the new token was not applied')
Example #2
0
    def test_dc_process_contents_clean_find_n_replace_symbols(self):
        '''ensure that we can replace at the multiple locations of symbols'''

        curr_replacements = self.dc.replacements

        for k, v in curr_replacements.items():
            if k == 'SYMBOL_AS_SPACE':
                ignore_sym = ['[', ']', ',']
                for each in v:
                    if each not in ignore_sym:
                        print(
                            '--------------------------------------------------------'
                        )
                        print(
                            'testing for symbol "%s", to be removed (i.e. replaced with '
                            '' % each)
                        test_string = 'recording_7.wav %s Wow %s%s%s that is awesome %s' % (
                            each, each, each, each, each)
                        print('...test string >>%s' % test_string)
                        tst_dc = DataClean(contents=test_string)
                        tst_dc.process_contents(show_results=False)
                        print('...processed as>>%s' % tst_dc.contents.decode())
                        self.assertEqual(
                            -1,
                            tst_dc.contents.decode().find(k),
                            '...seems the original token is still in the string'
                        )
                        self.assertGreater(
                            0,
                            tst_dc.contents.decode().find(k),
                            '...seems like the new token was not applied')
 def test_dc_process_contents_clean_find_n_replace_end_of_string(self):
     '''ensure that we can replace at the end of the string'''
     curr_replacements = self.dc.replacements
     
     for k,v in curr_replacements.items():
         if k != 'SYMBOL_AS_SPACE':            
             print('--------------------------------------------------------')
             print('testing for %s, to be replaced with %s' % (k,v))
             test_string = 'recording_7.wav Wow that is awesome %s' % k
             print('...test string >>%s' % test_string)
             tst_dc = DataClean(contents=test_string)            
             tst_dc.process_contents(show_results=False)
             print('...processed as>>%s' % tst_dc.contents.decode())
             self.assertEqual(-1,tst_dc.contents.decode().find(k),'...seems the original token is still in the string')
             self.assertGreater(0, tst_dc.contents.decode().find(k),'...seems like the new token was not applied')            
Example #4
0
    def test_dc_datasource_data_to_castable(self):
        '''process everything and build a castable from results'''

        curr_dc = DataClean(conn=self.conn,
                            contents_as_path=self.full_filename)
        response = curr_dc.process_contents(show_results=False)
        curr_dc.create_castable(response['results'],
                                'cool_cas',
                                replace=True,
                                promote=True)