def test_composite_transducer(self): '''Transducer should allow composite transducer. A composite transducer is a list of transducers that are applied in a specific order. Now by making a composite transducer of a->b + aa->c we can get the 'bbbb' output from the length ordering test ''' data = [{"word": "aaba"}] transduced_data = [{"word": "bbbb"}] data_df = DataFrame(data) transduced_data_df = DataFrame(transduced_data) t_path = os.path.join(self.test_transducers_path, 'test_composite.json') transducer = Transducer( transducers_needed=[{ 'source': 'word', 'target': 'word', 'functions': ['test_composite'] }], transducers_available_dir=self.test_transducers_path) fns = transducer.load_composite(t_path) f_in = data[0]['word'] for fn in fns: f_in = fn(f_in) self.assertEqual(f_in, 'bbbb') self.assertTrue( transduced_data_df.equals(transducer.apply_to_data_frame(data_df)))
def test_intermediate(self): '''Transductions should not feed. a -> b & b -> c should turn a -> b, not a -> (b) -> c ''' t_path = os.path.join(self.test_transducers_path, 'test_intermediate_transducer.csv') transducer = Transducer() transducer_fn = transducer.create_transducer_function(t_path) self.assertEqual('bcb', transducer_fn('aba'))
def test_length_ordering(self): '''Transductions should be reverse ordered by length. Here with a transducer that turns a->b and aa->c we get 'cbb' instead of 'bbbb' ''' t_path = os.path.join(self.test_transducers_path, 'test_length_transducer.csv') transducer = Transducer() transducer_fn = transducer.create_transducer_function(t_path) self.assertEqual('cbb', transducer_fn('aaba'))
def transduce(self) -> List[Dict[str, Union[ResourceManifest, pd.DataFrame]]]: '''Return transduced data objs. Also transduces self.data_objs ''' transduced_data_objs = [] for data_obj in self.data_objs: df = data_obj['data'] transducers = [] if "transducers" in data_obj['manifest']: transducers = data_obj['manifest']['transducers'] transducer = Transducer(transducers) data_obj['data'] = transducer.apply_to_data_frame(df) return transduced_data_objs
def test_lambda_transducer(self): '''Transducer should allow lambda transductions. ''' data = [{"word": "test"}] transduced_data = [{"word": "TEST"}] data_df = DataFrame(data) transduced_data_df = DataFrame(transduced_data) transducer = Transducer([{ 'source': 'word', 'target': 'word', 'functions': ['lambda x: x.upper()'] }]) transducer_fn = eval(transducer.transducers_needed[0]['functions'][0]) self.assertEqual('TEST', transducer_fn('test')) self.assertTrue( transduced_data_df.equals(transducer.apply_to_data_frame(data_df)))
def test_feeding(self): '''Prevent feeding. I.e. if a rules a->b and b->c exist, aba should produce bcb, not ccc ''' data = [{"word": "aba"}] transduced_data = [{"word": "bcb"}] t_path = os.path.join(self.test_transducers_path, 'test_feeding.csv') transducer = Transducer([{ 'source': 'word', 'target': 'word', 'functions': [t_path] }]) transducer_fn = transducer.create_transducer_function(t_path) self.assertEqual(transduced_data[0]["word"], transducer_fn(data[0]['word']))
def test_incorrect_source_transducer(self): '''Test incorrect source specified ''' data = [{"word": "aaa"}] transduced_data = [{"word": "bbb"}] data_df = DataFrame(data) t_path = os.path.join(self.test_transducers_path, 'test_transducer.csv') transducer = Transducer([{ 'source': 'foobar', 'target': 'word', 'functions': [t_path] }]) transducer_fn = transducer.create_transducer_function(t_path) with self.assertRaises(TransducerSourceNotFoundError): transducer.apply_to_data_frame(data_df)
def return_formatted_config(self, form: str = "js") -> Union[str, dict]: '''Return config for Dictionary as either obj, js, or json. ''' config_template_object = { "L1": { "name": self.config['L1'], "lettersInLanguage": self.config['alphabet'] }, "L2": { "name": self.config['L2'] }, "build": datetime.datetime.today().strftime('%Y%m%d%H%M') } ## Add transducer name that converts search queries if 'L1_compare_transducer_name' in self.config: config_template_object['L1']['compare'] = self.config[ 'L1_compare_transducer_name'] if "audio_path" in self.config: config_template_object['audio_path'] = self.config['audio_path'] if "img_path" in self.config: config_template_object['img_path'] = self.config['img_path'] if form == 'obj': return config_template_object elif form == 'js': ## Add adhoc_vars adhoc_vars = '' if "adhoc_vars" in self.config: adhoc_vars = [] for av in self.config['adhoc_vars']: for k, v in av.items(): adhoc_vars.append(f"var {k} = {v};") adhoc_vars = "\n".join(adhoc_vars) ## Add transducers transducers_config = {} for data_obj in self.data_objs: transducers = [] if "transducers" in data_obj['manifest']: transducers = data_obj['manifest']['transducers'] transducer_obj = Transducer(transducers) configs = transducer_obj.return_mapping_configs() config_template_object["L1"]["transducers"] = configs return f"var config = {json.dumps(config_template_object)}" + adhoc_vars elif form == 'json': return json.dumps(config_template_object)
def test_javascript_transducer(self): '''Test javascript transducer ''' data = [{"word": "aaa"}] transduced_data = [{"word": "bbb"}] t_path = os.path.join(self.test_transducers_path, 'test_transducer.csv') transducer = Transducer([{ 'source': 'word', 'target': 'word', 'functions': [t_path] }]) js = "var mtd = {'transducers': {}};" js = js + transducer.return_js_template(t_path) js = js + f"mtd.transducers.test_transducer('aaa');" self.assertEqual(eval_js(js), transduced_data[0]['word'])
def test_find_path_to_transducers(self): '''Find path to transducer. Raise NotFoundError if transducer doesn't exist. ''' transducer = Transducer() path = transducer.return_transducer_path('norm') name = transducer.return_transducer_name(path) self.assertEqual(name, 'norm') self.assertTrue(os.path.exists(path)) with self.assertRaises(TransducerNotFoundError): transducer.return_transducer_path('foobar') with self.assertRaises(TransducerNotFoundError): transducer.return_transducer_name( os.path.join(self.transducers_path, 'foobar.csv'))
def test_chained_transducer(self): '''Transducer should allow chained transductions with lambda transductions ''' transducer = Transducer([{ 'source': 'word', 'target': 'word', 'functions': [ 'lambda x: x.upper()', 'lambda x: x.split("-")', 'lambda x: "".join(x)' ] }]) f_in = 'test-test' for t in transducer.transducers_needed: for fn in t['functions']: fn = eval(fn) f_in = fn(f_in) self.assertEqual('TESTTEST', f_in)
def test_normal_transducer(self): '''Sanity check a->b transducer. ''' data = [{"word": "aaa"}] transduced_data = [{"word": "bbb"}] data_df = DataFrame(data) transduced_data_df = DataFrame(transduced_data) t_path = os.path.join(self.test_transducers_path, 'test_transducer.csv') t_path_json = os.path.join(self.test_transducers_path, 'test_transducer.csv') transducer = Transducer([{ 'source': 'word', 'target': 'word', 'functions': [t_path] }]) transducer_json = Transducer([{ 'source': 'word', 'target': 'word', 'functions': [t_path_json] }]) transducer_fn = transducer.create_transducer_function(t_path) self.assertEqual(transduced_data[0]["word"], transducer_fn(data[0]['word'])) self.assertTrue( transduced_data_df.equals(transducer.apply_to_data_frame(data_df))) self.assertTrue( transduced_data_df.equals( transducer_json.apply_to_data_frame(data_df)))