def test_from_component(self): # EventBook os.environ['HADRON_DEFAULT_PATH'] = 'eb://grey_storage/' os.environ[ 'HADRON_DEFAULT_MODULE'] = 'ds_engines.handlers.event_handlers' os.environ['HADRON_DEFAULT_SOURCE_HANDLER'] = 'EventPersistHandler' os.environ['HADRON_DEFAULT_PERSIST_HANDLER'] = 'EventSourceHandler' # Portfolio builder = SyntheticBuilder.from_env('members', has_contract=False) builder.set_outcome(uri_file="synthetic_members") builder = SyntheticBuilder.from_env('members')
def test_dict_generate(self): builder = SyntheticBuilder.from_env('generator', has_contract=False) tools: SyntheticIntentModel = builder.tools df = pd.DataFrame() df['gender'] = tools.get_category(selection=['M', 'F'], column_name='gender') df['age'] = tools.get_number(from_value=18, to_value=90, column_name='age') target = {'method': '@generate', 'task_name': 'generator'} result = tools._get_canonical(data=target) self.assertCountEqual(['age', 'gender'], result.columns.to_list()) target = {'method': '@generate', 'task_name': 'generator', 'size': 100} result = tools._get_canonical(data=target) self.assertCountEqual(['age', 'gender'], result.columns.to_list()) self.assertEqual(100, result.shape[0]) selection = [tools.select2dict(column='gender', condition="@=='M'")] target = { 'method': '@generate', 'task_name': 'generator', 'size': 100, 'selection': selection } result = tools._get_canonical(data=target) self.assertGreater(result.shape[0], 0) self.assertEqual(0, (result[result['gender'] == 'F']).shape[0])
def test_model_us_zip(self): builder = SyntheticBuilder.from_env('test', default_save=False, default_save_intent=False, has_contract=False) df = pd.DataFrame(index=range(300)) result = builder.tools.model_us_zip(df, state_code_filter=['NY', 'TX', 'FRED']) self.assertCountEqual(['NY', 'TX'], result['StateCode'].value_counts().index.to_list()) self.assertCountEqual(['StateAbbrev', 'Zipcode', 'City', 'State', 'StateCode', 'Phone'], result.columns.to_list()) self.assertEqual(300, result.shape[0])
def test_remove_unwanted_headers(self): builder = SyntheticBuilder.from_env('test', default_save=False, default_save_intent=False, has_contract=False) builder.set_source_uri(uri="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv") selection = [builder.tools.select2dict(column='survived', condition='==1')] result = builder.tools.frame_selection(canonical=builder.CONNECTOR_SOURCE, selection=selection, headers=['survived', 'sex', 'fare']) self.assertCountEqual(['survived', 'sex', 'fare'], list(result.columns)) self.assertEqual(1, result['survived'].min())
def test_runs(self): """Basic smoke test""" im = SyntheticBuilder.from_env('tester', default_save=False, default_save_intent=False, reset_templates=False).intent_model self.assertTrue(SyntheticIntentModel, type(im))
def test_model_columns_headers(self): builder = SyntheticBuilder.from_env('test', default_save=False, default_save_intent=False, has_contract=False) tools: SyntheticIntentModel = builder.tools builder.set_source_uri(uri="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv") df = pd.DataFrame(index=range(300)) result = tools.model_concat(df, other=builder.CONNECTOR_SOURCE, as_rows=False, headers=['survived', 'sex', 'fare']) self.assertCountEqual(['survived', 'sex', 'fare'], list(result.columns)) self.assertEqual(300, result.shape[0])
def test_run_pipeline_with_analytics(self): builder: SyntheticBuilder = SyntheticBuilder.from_env( 'sample', has_contract=False) tools: SyntheticIntentModel = builder.tools # load the sample dataset to analyse and rename columns clinical_health = 'https://assets.datacamp.com/production/repositories/628/datasets/444cdbf175d5fbf564b564bd36ac21740627a834/diabetes.csv' builder.add_connector_uri('clinical_health', uri=clinical_health) df_clinical = builder.load_canonical('clinical_health')
def test_canonical_run_pipeline_dict(self): tools = self.builder.intent_model df = pd.DataFrame() df['numbers'] = tools.get_number(1, 2, column_name='numbers') # create a remote pm contract inst = SyntheticBuilder.from_env('sub_set', has_contract=False) _ = inst.tools.get_category(selection=['A', 'B'], column_name='value') sub_set = SyntheticCommons.param2dict() df['corr_num'] = tools.correlate_numbers(df, offset=1, header='numbers', column_name='numbers', intent_order=1)
def test_dict_method(self): builder = SyntheticBuilder.from_env('generator', has_contract=False) tools: SyntheticIntentModel = builder.tools action = tools.canonical2dict(method='model_sample_map', canonical=tools.action2dict( method='@empty', size=100), sample_map='us_persona', female_bias=0.3) result = tools._get_canonical(data=action) self.assertEqual((100, 5), result.shape) self.assertEqual(30, result['gender'].value_counts().loc['F'])
def setUp(self): os.environ['HADRON_PM_PATH'] = os.path.join('work', 'config') os.environ['HADRON_DEFAULT_PATH'] = os.path.join('work', 'data') try: os.makedirs(os.environ['HADRON_PM_PATH']) os.makedirs(os.environ['HADRON_DEFAULT_PATH']) except: pass PropertyManager._remove_all() self.builder: SyntheticBuilder = SyntheticBuilder.from_env( 'sample', has_contract=False) self.builder.setup_bootstrap() self.tools: SyntheticIntentModel = self.builder.tools
def test_run_synthetic_pipeline_seed(self): builder = SyntheticBuilder.from_env('tester', has_contract=False) builder.set_persist() tools: SyntheticIntentModel = builder.tools _ = tools.get_category(selection=['M', 'F'], relative_freq=[4, 3], column_name='gender') _ = tools.get_number(from_value=18, to_value=80, column_name='age') builder.run_synthetic_pipeline(size=1000, seed=23) df = builder.load_synthetic_canonical() dist = df['gender'].value_counts().values mean = df['age'].mean() builder.run_synthetic_pipeline(size=1000, seed=23) df = builder.load_synthetic_canonical() self.assertCountEqual(dist, df['gender'].value_counts().values) self.assertEqual(mean, df['age'].mean())
def test_dict_empty(self): builder = SyntheticBuilder.from_env('generator', has_contract=False) tools: SyntheticIntentModel = builder.tools action = tools.canonical2dict(method='@empty') result = tools._get_canonical(data=action) self.assertEqual((0, 0), result.shape) action = tools.canonical2dict(method='@empty', size=100) result = tools._get_canonical(data=action) self.assertEqual((100, 0), result.shape) action = tools.canonical2dict(method='@empty', size=100) result = tools._get_canonical(data=action) self.assertEqual((100, 0), result.shape) action = tools.canonical2dict(method='@empty', size=100, headers=['A', 'B', 'C']) result = tools._get_canonical(data=action) self.assertEqual((100, 3), result.shape)
def test_model_iterator(self): builder = SyntheticBuilder.from_env('test', default_save=False, default_save_intent=False, has_contract=False) tools: SyntheticIntentModel = builder.tools builder.add_connector_uri('titanic', uri="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv") # do nothing result = tools.model_iterator(canonical='titanic') self.assertEqual(builder.load_canonical('titanic').shape, result.shape) # add marker result = tools.model_iterator(canonical='titanic', marker_col='marker') self.assertEqual(builder.load_canonical('titanic').shape[1]+1, result.shape[1]) # with selection selection = [tools.select2dict(column='survived', condition="==1")] control = tools.frame_selection(canonical='titanic', selection=selection) result = tools.model_iterator(canonical='titanic', marker_col='marker', selection=selection) self.assertEqual(control.shape[0], result.shape[0]) # with iteration result = tools.model_iterator(canonical='titanic', marker_col='marker', iter_stop=3) self.assertCountEqual([0,1,2], result['marker'].value_counts().index.to_list()) # with actions actions = {2: (tools.action2dict(method='get_category', selection=[4,5]))} result = tools.model_iterator(canonical='titanic', marker_col='marker', iter_stop=3, iteration_actions=actions) self.assertCountEqual([0,1,4,5], result['marker'].value_counts().index.to_list())
def test_set_report_persist(self): builder = SyntheticBuilder.from_env('tester', default_save=False, has_contract=False) builder.setup_bootstrap(domain='domain', project_name='project_name', path=None) report = builder.report_connectors(stylise=False) _, file = os.path.split(report.uri.iloc[-1]) self.assertTrue(file.startswith('project_name'))
def test_runs(self): """Basic smoke test""" self.assertEqual(SyntheticBuilder, type(SyntheticBuilder.from_env('tester', has_contract=False)))
def test_runs(self): """Basic smoke test""" self.assertEqual(SyntheticBuilder, type(SyntheticBuilder.from_env('tester')))
def test_runs(self): """Basic smoke test""" SyntheticBuilder.from_env(self.name)
def builder(self) -> SyntheticBuilder: return SyntheticBuilder.from_env('tester', has_contract=False)
def builder(self) -> SyntheticBuilder: return SyntheticBuilder.from_env('tester')
def test_tools(self): """test we can get tools""" fb = SyntheticBuilder.from_env(self.name) self.assertEqual(fb.tool_dir, DataBuilderTools.__dir__())