예제 #1
0
 def test_from_component(self):
     # EventBook
     os.environ['HADRON_DEFAULT_PATH'] = 'eb://grey_storage/'
     os.environ[
         'HADRON_DEFAULT_MODULE'] = 'ds_engines.handlers.event_handlers'
     os.environ['HADRON_DEFAULT_SOURCE_HANDLER'] = 'EventPersistHandler'
     os.environ['HADRON_DEFAULT_PERSIST_HANDLER'] = 'EventSourceHandler'
     # Portfolio
     builder = SyntheticBuilder.from_env('members', has_contract=False)
     builder.set_outcome(uri_file="synthetic_members")
     builder = SyntheticBuilder.from_env('members')
 def test_dict_generate(self):
     builder = SyntheticBuilder.from_env('generator', has_contract=False)
     tools: SyntheticIntentModel = builder.tools
     df = pd.DataFrame()
     df['gender'] = tools.get_category(selection=['M', 'F'],
                                       column_name='gender')
     df['age'] = tools.get_number(from_value=18,
                                  to_value=90,
                                  column_name='age')
     target = {'method': '@generate', 'task_name': 'generator'}
     result = tools._get_canonical(data=target)
     self.assertCountEqual(['age', 'gender'], result.columns.to_list())
     target = {'method': '@generate', 'task_name': 'generator', 'size': 100}
     result = tools._get_canonical(data=target)
     self.assertCountEqual(['age', 'gender'], result.columns.to_list())
     self.assertEqual(100, result.shape[0])
     selection = [tools.select2dict(column='gender', condition="@=='M'")]
     target = {
         'method': '@generate',
         'task_name': 'generator',
         'size': 100,
         'selection': selection
     }
     result = tools._get_canonical(data=target)
     self.assertGreater(result.shape[0], 0)
     self.assertEqual(0, (result[result['gender'] == 'F']).shape[0])
 def test_model_us_zip(self):
     builder = SyntheticBuilder.from_env('test', default_save=False, default_save_intent=False, has_contract=False)
     df = pd.DataFrame(index=range(300))
     result = builder.tools.model_us_zip(df, state_code_filter=['NY', 'TX', 'FRED'])
     self.assertCountEqual(['NY', 'TX'], result['StateCode'].value_counts().index.to_list())
     self.assertCountEqual(['StateAbbrev', 'Zipcode', 'City', 'State', 'StateCode', 'Phone'], result.columns.to_list())
     self.assertEqual(300, result.shape[0])
 def test_remove_unwanted_headers(self):
     builder = SyntheticBuilder.from_env('test', default_save=False, default_save_intent=False, has_contract=False)
     builder.set_source_uri(uri="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv")
     selection = [builder.tools.select2dict(column='survived', condition='==1')]
     result = builder.tools.frame_selection(canonical=builder.CONNECTOR_SOURCE, selection=selection, headers=['survived', 'sex', 'fare'])
     self.assertCountEqual(['survived', 'sex', 'fare'], list(result.columns))
     self.assertEqual(1, result['survived'].min())
 def test_runs(self):
     """Basic smoke test"""
     im = SyntheticBuilder.from_env('tester',
                                    default_save=False,
                                    default_save_intent=False,
                                    reset_templates=False).intent_model
     self.assertTrue(SyntheticIntentModel, type(im))
 def test_model_columns_headers(self):
     builder = SyntheticBuilder.from_env('test', default_save=False, default_save_intent=False, has_contract=False)
     tools: SyntheticIntentModel = builder.tools
     builder.set_source_uri(uri="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv")
     df = pd.DataFrame(index=range(300))
     result = tools.model_concat(df, other=builder.CONNECTOR_SOURCE, as_rows=False, headers=['survived', 'sex', 'fare'])
     self.assertCountEqual(['survived', 'sex', 'fare'], list(result.columns))
     self.assertEqual(300, result.shape[0])
예제 #7
0
 def test_run_pipeline_with_analytics(self):
     builder: SyntheticBuilder = SyntheticBuilder.from_env(
         'sample', has_contract=False)
     tools: SyntheticIntentModel = builder.tools
     # load the sample dataset to analyse and rename columns
     clinical_health = 'https://assets.datacamp.com/production/repositories/628/datasets/444cdbf175d5fbf564b564bd36ac21740627a834/diabetes.csv'
     builder.add_connector_uri('clinical_health', uri=clinical_health)
     df_clinical = builder.load_canonical('clinical_health')
 def test_canonical_run_pipeline_dict(self):
     tools = self.builder.intent_model
     df = pd.DataFrame()
     df['numbers'] = tools.get_number(1, 2, column_name='numbers')
     # create a remote pm contract
     inst = SyntheticBuilder.from_env('sub_set', has_contract=False)
     _ = inst.tools.get_category(selection=['A', 'B'], column_name='value')
     sub_set = SyntheticCommons.param2dict()
     df['corr_num'] = tools.correlate_numbers(df, offset=1, header='numbers', column_name='numbers', intent_order=1)
 def test_dict_method(self):
     builder = SyntheticBuilder.from_env('generator', has_contract=False)
     tools: SyntheticIntentModel = builder.tools
     action = tools.canonical2dict(method='model_sample_map',
                                   canonical=tools.action2dict(
                                       method='@empty', size=100),
                                   sample_map='us_persona',
                                   female_bias=0.3)
     result = tools._get_canonical(data=action)
     self.assertEqual((100, 5), result.shape)
     self.assertEqual(30, result['gender'].value_counts().loc['F'])
예제 #10
0
 def setUp(self):
     os.environ['HADRON_PM_PATH'] = os.path.join('work', 'config')
     os.environ['HADRON_DEFAULT_PATH'] = os.path.join('work', 'data')
     try:
         os.makedirs(os.environ['HADRON_PM_PATH'])
         os.makedirs(os.environ['HADRON_DEFAULT_PATH'])
     except:
         pass
     PropertyManager._remove_all()
     self.builder: SyntheticBuilder = SyntheticBuilder.from_env(
         'sample', has_contract=False)
     self.builder.setup_bootstrap()
     self.tools: SyntheticIntentModel = self.builder.tools
예제 #11
0
 def test_run_synthetic_pipeline_seed(self):
     builder = SyntheticBuilder.from_env('tester', has_contract=False)
     builder.set_persist()
     tools: SyntheticIntentModel = builder.tools
     _ = tools.get_category(selection=['M', 'F'], relative_freq=[4, 3], column_name='gender')
     _ = tools.get_number(from_value=18, to_value=80, column_name='age')
     builder.run_synthetic_pipeline(size=1000, seed=23)
     df = builder.load_synthetic_canonical()
     dist = df['gender'].value_counts().values
     mean = df['age'].mean()
     builder.run_synthetic_pipeline(size=1000, seed=23)
     df = builder.load_synthetic_canonical()
     self.assertCountEqual(dist, df['gender'].value_counts().values)
     self.assertEqual(mean, df['age'].mean())
 def test_dict_empty(self):
     builder = SyntheticBuilder.from_env('generator', has_contract=False)
     tools: SyntheticIntentModel = builder.tools
     action = tools.canonical2dict(method='@empty')
     result = tools._get_canonical(data=action)
     self.assertEqual((0, 0), result.shape)
     action = tools.canonical2dict(method='@empty', size=100)
     result = tools._get_canonical(data=action)
     self.assertEqual((100, 0), result.shape)
     action = tools.canonical2dict(method='@empty', size=100)
     result = tools._get_canonical(data=action)
     self.assertEqual((100, 0), result.shape)
     action = tools.canonical2dict(method='@empty',
                                   size=100,
                                   headers=['A', 'B', 'C'])
     result = tools._get_canonical(data=action)
     self.assertEqual((100, 3), result.shape)
 def test_model_iterator(self):
     builder = SyntheticBuilder.from_env('test', default_save=False, default_save_intent=False, has_contract=False)
     tools: SyntheticIntentModel = builder.tools
     builder.add_connector_uri('titanic', uri="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv")
     # do nothing
     result = tools.model_iterator(canonical='titanic')
     self.assertEqual(builder.load_canonical('titanic').shape, result.shape)
     # add marker
     result = tools.model_iterator(canonical='titanic', marker_col='marker')
     self.assertEqual(builder.load_canonical('titanic').shape[1]+1, result.shape[1])
     # with selection
     selection = [tools.select2dict(column='survived', condition="==1")]
     control = tools.frame_selection(canonical='titanic', selection=selection)
     result = tools.model_iterator(canonical='titanic', marker_col='marker', selection=selection)
     self.assertEqual(control.shape[0], result.shape[0])
     # with iteration
     result = tools.model_iterator(canonical='titanic', marker_col='marker', iter_stop=3)
     self.assertCountEqual([0,1,2], result['marker'].value_counts().index.to_list())
     # with actions
     actions = {2: (tools.action2dict(method='get_category', selection=[4,5]))}
     result = tools.model_iterator(canonical='titanic', marker_col='marker', iter_stop=3, iteration_actions=actions)
     self.assertCountEqual([0,1,4,5], result['marker'].value_counts().index.to_list())
예제 #14
0
 def test_set_report_persist(self):
     builder = SyntheticBuilder.from_env('tester', default_save=False, has_contract=False)
     builder.setup_bootstrap(domain='domain', project_name='project_name', path=None)
     report = builder.report_connectors(stylise=False)
     _, file = os.path.split(report.uri.iloc[-1])
     self.assertTrue(file.startswith('project_name'))
예제 #15
0
 def test_runs(self):
     """Basic smoke test"""
     self.assertEqual(SyntheticBuilder, type(SyntheticBuilder.from_env('tester', has_contract=False)))
 def test_runs(self):
     """Basic smoke test"""
     self.assertEqual(SyntheticBuilder, type(SyntheticBuilder.from_env('tester')))
 def test_runs(self):
     """Basic smoke test"""
     SyntheticBuilder.from_env(self.name)
 def builder(self) -> SyntheticBuilder:
     return SyntheticBuilder.from_env('tester', has_contract=False)
 def builder(self) -> SyntheticBuilder:
     return SyntheticBuilder.from_env('tester')
 def test_tools(self):
     """test we can get tools"""
     fb = SyntheticBuilder.from_env(self.name)
     self.assertEqual(fb.tool_dir, DataBuilderTools.__dir__())