def test_run_workflow(self): op_reg = OP_REGISTRY.add_op(timeseries, fail_if_exists=True) workflow_file = os.path.join(os.path.dirname(__file__), 'timeseries.json') self.assertTrue(os.path.exists(workflow_file), msg='missing test file %s' % workflow_file) try: # Run without --monitor and --write self.assert_main(['run', workflow_file, 'lat=13.2', 'lon=52.9'], expected_stdout=['[0.3, 0.25, 0.05, 0.4, 0.2, 0.1, 0.5]']) # Run with --monitor and without --write self.assert_main(['run', '--monitor', workflow_file, 'lat=13.2', 'lon=52.9'], expected_stdout=['[0.3, 0.25, 0.05, 0.4, 0.2, 0.1, 0.5]']) # Run with --monitor and --write self.assert_main(['run', '--monitor', '--write', 'timeseries_data.json', workflow_file, 'lat=13.2', 'lon=52.9'], expected_stdout=['Writing output to timeseries_data.json using JSON format...']) self.assertTrue(os.path.isfile('timeseries_data.json')) os.remove('timeseries_data.json') finally: OP_REGISTRY.remove_op(op_reg, fail_if_not_exists=True)
def test_run_workflow(self): op_reg = OP_REGISTRY.add_op(timeseries, fail_if_exists=True) workflow_file = os.path.join(os.path.dirname(__file__), 'timeseries.json') self.assertTrue(os.path.exists(workflow_file), msg='missing test file %s' % workflow_file) try: # Run without --monitor and --write self.assert_main( ['run', workflow_file, 'lat=13.2', 'lon=52.9'], expected_stdout=['[0.3, 0.25, 0.05, 0.4, 0.2, 0.1, 0.5]']) # Run with --monitor and without --write self.assert_main( ['run', '--monitor', workflow_file, 'lat=13.2', 'lon=52.9'], expected_stdout=['[0.3, 0.25, 0.05, 0.4, 0.2, 0.1, 0.5]']) # Run with --monitor and --write self.assert_main( [ 'run', '--monitor', '--write', 'timeseries_data.json', workflow_file, 'lat=13.2', 'lon=52.9' ], expected_stdout=[ 'Writing output to timeseries_data.json using JSON format...' ]) self.assertTrue(os.path.isfile('timeseries_data.json')) os.remove('timeseries_data.json') finally: OP_REGISTRY.remove_op(op_reg, fail_if_not_exists=True)
def test_run_op(self): op_reg = OP_REGISTRY.add_op(timeseries, fail_if_exists=True) try: # Run without --monitor and --write self.assert_main(['run', op_reg.op_meta_info.qualified_name, 'lat=13.2', 'lon=52.9'], expected_stdout=['[0.3, 0.25, 0.05, 0.4, 0.2, 0.1, 0.5]']) # Run with --monitor and without --write self.assert_main(['run', '--monitor', op_reg.op_meta_info.qualified_name, 'lat=13.2', 'lon=52.9'], expected_stdout=['[0.3, 0.25, 0.05, 0.4, 0.2, 0.1, 0.5]']) # Run with --monitor and --write self.assert_main(['run', '--monitor', '--write', 'timeseries_data.txt', op_reg.op_meta_info.qualified_name, 'lat=13.2', 'lon=52.9'], expected_stdout=['Writing output to timeseries_data.txt using TEXT format...']) self.assertTrue(os.path.isfile('timeseries_data.txt')) os.remove('timeseries_data.txt') # Run with invalid keyword self.assert_main(['run', op_reg.op_meta_info.qualified_name, 'l*t=13.2', 'lon=52.9'], expected_status=1, expected_stderr=['cate run: error: "l*t" is not a valid input name'], expected_stdout='') finally: OP_REGISTRY.remove_op(op_reg, fail_if_not_exists=True)
def test_workspace_is_part_of_context(self): def some_op(ctx: dict) -> dict: return dict(ctx) from cate.core.op import OP_REGISTRY try: op_reg = OP_REGISTRY.add_op(some_op) op_reg.op_meta_info.inputs['ctx']['context'] = True ws = Workspace( '/path', Workflow( OpMetaInfo('workspace_workflow', header=dict(description='Test!')))) ws.set_resource(op_reg.op_meta_info.qualified_name, {}, res_name='new_ctx') ws.execute_workflow('new_ctx') self.assertTrue('new_ctx' in ws.resource_cache) self.assertTrue('workspace' in ws.resource_cache['new_ctx']) self.assertIs(ws.resource_cache['new_ctx']['workspace'], ws) finally: OP_REGISTRY.remove_op(some_op)
def test_workspace_is_part_of_context(self): def some_op(ctx: dict) -> dict: return dict(ctx) from cate.core.op import OP_REGISTRY try: op_reg = OP_REGISTRY.add_op(some_op) op_reg.op_meta_info.inputs['ctx']['context'] = True ws = Workspace('/path', Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!')))) ws.set_resource(op_reg.op_meta_info.qualified_name, {}, res_name='new_ctx') ws.execute_workflow('new_ctx') self.assertTrue('new_ctx' in ws.resource_cache) self.assertTrue('workspace' in ws.resource_cache['new_ctx']) self.assertIs(ws.resource_cache['new_ctx']['workspace'], ws) finally: OP_REGISTRY.remove_op(some_op)
def test_invoke_with_context_inputs(self): def some_op(context, workflow, workflow_id, step, step_id, invalid): return dict(context=context, workflow=workflow, workflow_id=workflow_id, step=step, step_id=step_id, invalid=invalid) from cate.core.op import OP_REGISTRY try: op_reg = OP_REGISTRY.add_op(some_op) op_reg.op_meta_info.inputs['context']['context'] = True op_reg.op_meta_info.inputs['workflow']['context'] = 'workflow' op_reg.op_meta_info.inputs['workflow_id'][ 'context'] = 'workflow.id' op_reg.op_meta_info.inputs['step']['context'] = 'step' op_reg.op_meta_info.inputs['step_id']['context'] = 'step.id' op_reg.op_meta_info.inputs['invalid']['context'] = 'gnarz[8]' step = OpStep(op_reg, node_id='test_step') workflow = Workflow(OpMetaInfo('test_workflow')) workflow.add_step(step) workflow.invoke() output = step.outputs['return'].value self.assertIsInstance(output, dict) self.assertIsInstance(output.get('context'), dict) self.assertIs(output.get('workflow'), workflow) self.assertEqual(output.get('workflow_id'), 'test_workflow') self.assertIs(output.get('step'), step) self.assertEqual(output.get('step_id'), 'test_step') self.assertEqual(output.get('invalid', 1), None) finally: OP_REGISTRY.remove_op(some_op)
def test_invoke_with_context_inputs(self): def some_op(context, workflow, workflow_id, step, step_id, invalid): return dict(context=context, workflow=workflow, workflow_id=workflow_id, step=step, step_id=step_id, invalid=invalid) from cate.core.op import OP_REGISTRY try: op_reg = OP_REGISTRY.add_op(some_op) op_reg.op_meta_info.inputs['context']['context'] = True op_reg.op_meta_info.inputs['workflow']['context'] = 'workflow' op_reg.op_meta_info.inputs['workflow_id']['context'] = 'workflow.id' op_reg.op_meta_info.inputs['step']['context'] = 'step' op_reg.op_meta_info.inputs['step_id']['context'] = 'step.id' op_reg.op_meta_info.inputs['invalid']['context'] = 'gnarz[8]' step = OpStep(op_reg, node_id='test_step') workflow = Workflow(OpMetaInfo('test_workflow')) workflow.add_step(step) workflow.invoke() output = step.outputs['return'].value self.assertIsInstance(output, dict) self.assertIsInstance(output.get('context'), dict) self.assertIs(output.get('workflow'), workflow) self.assertEqual(output.get('workflow_id'), 'test_workflow') self.assertIs(output.get('step'), step) self.assertEqual(output.get('step_id'), 'test_step') self.assertEqual(output.get('invalid', 1), None) finally: OP_REGISTRY.remove_op(some_op)
def test_to_json_dict(self): def dataset_op() -> xr.Dataset: periods = 5 temperature_data = (15 + 8 * np.random.randn(periods, 2, 2)).round(decimals=1) temperature_attrs = {'a': np.array([1, 2, 3]), 'comment': 'hot', '_FillValue': np.nan} precipitation_data = (10 * np.random.rand(periods, 2, 2)).round(decimals=1) precipitation_attrs = {'x': True, 'comment': 'wet', '_FillValue': -1.0} ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), temperature_data, temperature_attrs), 'precipitation': (('time', 'lat', 'lon'), precipitation_data, precipitation_attrs) }, coords={ 'lon': np.array([12, 13]), 'lat': np.array([50, 51]), 'time': pd.date_range('2014-09-06', periods=periods) }, attrs={ 'history': 'a b c' }) return ds def scalar_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), [[[15.2]]]), 'precipitation': (('time', 'lat', 'lon'), [[[10.1]]]) }, coords={ 'lon': [12.], 'lat': [50.], 'time': [pd.to_datetime('2014-09-06')], }, attrs={ 'history': 'a b c' }) return ds def empty_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)), 'precipitation': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)) }, coords={ 'lon': np.ndarray(shape=(0,), dtype=np.float32), 'lat': np.ndarray(shape=(0,), dtype=np.float32), 'time': np.ndarray(shape=(0,), dtype=np.datetime64), }, attrs={ 'history': 'a b c' }) return ds def data_frame_op() -> pd.DataFrame: data = {'A': [1, 2, 3, np.nan, 4, 9, np.nan, np.nan, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, 5, 5, 9, 1, 2, 7, 6]} time = pd.date_range('2000-01-01', freq='MS', periods=12) return pd.DataFrame(data=data, index=time, dtype=float, columns=['A', 'B']) def scalar_data_frame_op() -> pd.DataFrame: data = {'A': [1.3], 'B': [5.9]} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def empty_data_frame_op() -> pd.DataFrame: data = {'A': [], 'B': []} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': ['A', 'B', 'C'], 'lat': [45, 46, 47.5], 'lon': [-120, -121.2, -122.9]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def scalar_geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': [2000 * 'A'], 'lat': [45], 'lon': [-120]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def int_op() -> int: return 394852 def str_op() -> str: return 'Hi!' from cate.core.op import OP_REGISTRY try: OP_REGISTRY.add_op(dataset_op) OP_REGISTRY.add_op(data_frame_op) OP_REGISTRY.add_op(geo_data_frame_op) OP_REGISTRY.add_op(scalar_dataset_op) OP_REGISTRY.add_op(scalar_data_frame_op) OP_REGISTRY.add_op(scalar_geo_data_frame_op) OP_REGISTRY.add_op(empty_dataset_op) OP_REGISTRY.add_op(empty_data_frame_op) OP_REGISTRY.add_op(int_op) OP_REGISTRY.add_op(str_op) workflow = Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!'))) workflow.add_step(OpStep(dataset_op, node_id='ds')) workflow.add_step(OpStep(data_frame_op, node_id='df')) workflow.add_step(OpStep(geo_data_frame_op, node_id='gdf')) workflow.add_step(OpStep(scalar_dataset_op, node_id='scalar_ds')) workflow.add_step(OpStep(scalar_data_frame_op, node_id='scalar_df')) workflow.add_step(OpStep(scalar_geo_data_frame_op, node_id='scalar_gdf')) workflow.add_step(OpStep(empty_dataset_op, node_id='empty_ds')) workflow.add_step(OpStep(empty_data_frame_op, node_id='empty_df')) workflow.add_step(OpStep(int_op, node_id='i')) workflow.add_step(OpStep(str_op, node_id='s')) ws = Workspace('/path', workflow) ws.execute_workflow() d_ws = ws.to_json_dict() # import pprint # pprint.pprint(d_ws) d_wf = d_ws.get('workflow') self.assertIsNotNone(d_wf) l_res = d_ws.get('resources') self.assertIsNotNone(l_res) self.assertEqual(len(l_res), 10) res_ds = l_res[0] self.assertEqual(res_ds.get('name'), 'ds') self.assertEqual(res_ds.get('dataType'), 'xarray.core.dataset.Dataset') self.assertEqual(res_ds.get('dimSizes'), dict(lat=2, lon=2, time=5)) self.assertEqual(res_ds.get('attributes'), {'history': 'a b c'}) res_ds_vars = res_ds.get('variables') self.assertIsNotNone(res_ds_vars) self.assertEqual(len(res_ds_vars), 2) res_ds_var_1 = res_ds_vars[0] self.assertEqual(res_ds_var_1.get('name'), 'precipitation') self.assertEqual(res_ds_var_1.get('dataType'), 'float64') self.assertEqual(res_ds_var_1.get('numDims'), 3) self.assertEqual(res_ds_var_1.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_1.get('chunkSizes'), None) self.assertEqual(res_ds_var_1.get('isYFlipped'), True) self.assertEqual(res_ds_var_1.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_1.get('attributes'), dict(x=True, comment='wet', _FillValue=-1.)) res_ds_var_2 = res_ds_vars[1] self.assertEqual(res_ds_var_2.get('name'), 'temperature') self.assertEqual(res_ds_var_2.get('dataType'), 'float64') self.assertEqual(res_ds_var_2.get('numDims'), 3) self.assertEqual(res_ds_var_2.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_2.get('chunkSizes'), None) self.assertEqual(res_ds_var_2.get('isYFlipped'), True) self.assertEqual(res_ds_var_2.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_2.get('attributes'), dict(a=[1, 2, 3], comment='hot', _FillValue=np.nan)) res_df = l_res[1] self.assertEqual(res_df.get('name'), 'df') self.assertEqual(res_df.get('dataType'), 'pandas.core.frame.DataFrame') self.assertEqual(res_df.get('attributes'), {'num_rows': 12, 'num_columns': 2}) res_df_vars = res_df.get('variables') self.assertIsNotNone(res_df_vars) self.assertEqual(len(res_df_vars), 2) res_df_var_1 = res_df_vars[0] self.assertEqual(res_df_var_1.get('name'), 'A') self.assertEqual(res_df_var_1.get('dataType'), 'float64') self.assertEqual(res_df_var_1.get('numDims'), 1) self.assertEqual(res_df_var_1.get('shape'), (12,)) self.assertEqual(res_df_var_1.get('isYFlipped'), None) self.assertEqual(res_df_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_1.get('attributes')) res_df_var_2 = res_df_vars[1] self.assertEqual(res_df_var_2.get('name'), 'B') self.assertEqual(res_df_var_2.get('dataType'), 'float64') self.assertEqual(res_df_var_2.get('numDims'), 1) self.assertEqual(res_df_var_2.get('shape'), (12,)) self.assertEqual(res_df_var_2.get('isYFlipped'), None) self.assertEqual(res_df_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_2.get('attributes')) res_gdf = l_res[2] self.assertEqual(res_gdf.get('name'), 'gdf') self.assertEqual(res_gdf.get('dataType'), 'geopandas.geodataframe.GeoDataFrame') self.assertEqual(res_gdf.get('attributes'), {'num_rows': 3, 'num_columns': 4, 'geom_type': 'Point'}) res_gdf_vars = res_gdf.get('variables') self.assertIsNotNone(res_gdf_vars) self.assertEqual(len(res_gdf_vars), 4) res_gdf_var_1 = res_gdf_vars[0] self.assertEqual(res_gdf_var_1.get('name'), 'name') self.assertEqual(res_gdf_var_1.get('dataType'), 'object') self.assertEqual(res_gdf_var_1.get('numDims'), 1) self.assertEqual(res_gdf_var_1.get('shape'), (3,)) self.assertEqual(res_gdf_var_1.get('isYFlipped'), None) self.assertEqual(res_gdf_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_1.get('attributes')) res_gdf_var_2 = res_gdf_vars[1] self.assertEqual(res_gdf_var_2.get('name'), 'lat') self.assertEqual(res_gdf_var_2.get('dataType'), 'float64') self.assertEqual(res_gdf_var_2.get('numDims'), 1) self.assertEqual(res_gdf_var_2.get('shape'), (3,)) self.assertEqual(res_gdf_var_2.get('isYFlipped'), None) self.assertEqual(res_gdf_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_2.get('attributes')) res_gdf_var_3 = res_gdf_vars[2] self.assertEqual(res_gdf_var_3.get('name'), 'lon') self.assertEqual(res_gdf_var_3.get('dataType'), 'float64') self.assertEqual(res_gdf_var_3.get('numDims'), 1) self.assertEqual(res_gdf_var_3.get('shape'), (3,)) self.assertEqual(res_gdf_var_3.get('isYFlipped'), None) self.assertEqual(res_gdf_var_3.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_3.get('attributes')) res_gdf_var_4 = res_gdf_vars[3] self.assertEqual(res_gdf_var_4.get('name'), 'geometry') self.assertEqual(res_gdf_var_4.get('dataType'), 'geometry') self.assertEqual(res_gdf_var_4.get('numDims'), 1) self.assertEqual(res_gdf_var_4.get('shape'), (3,)) self.assertEqual(res_gdf_var_4.get('isYFlipped'), None) self.assertEqual(res_gdf_var_4.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_4.get('attributes')) res_scalar_ds = l_res[3] res_scalar_ds_vars = res_scalar_ds.get('variables') self.assertIsNotNone(res_scalar_ds_vars) self.assertEqual(len(res_scalar_ds_vars), 2) scalar_values = {res_scalar_ds_vars[0].get('name'): res_scalar_ds_vars[0].get('value'), res_scalar_ds_vars[1].get('name'): res_scalar_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': 15.2, 'precipitation': 10.1}) res_scalar_df = l_res[4] res_scalar_df_vars = res_scalar_df.get('variables') self.assertIsNotNone(res_scalar_df_vars) self.assertEqual(len(res_scalar_df_vars), 2) scalar_values = {res_scalar_df_vars[0].get('name'): res_scalar_df_vars[0].get('value'), res_scalar_df_vars[1].get('name'): res_scalar_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': 1.3, 'B': 5.9}) res_scalar_gdf = l_res[5] res_scalar_gdf_vars = res_scalar_gdf.get('variables') self.assertIsNotNone(res_scalar_gdf_vars) self.assertEqual(len(res_scalar_gdf_vars), 4) scalar_values = {res_scalar_gdf_vars[0].get('name'): res_scalar_gdf_vars[0].get('value'), res_scalar_gdf_vars[1].get('name'): res_scalar_gdf_vars[1].get('value'), res_scalar_gdf_vars[2].get('name'): res_scalar_gdf_vars[2].get('value'), res_scalar_gdf_vars[3].get('name'): res_scalar_gdf_vars[3].get('value')} self.assertEqual(scalar_values, {'name': (1000 * 'A') + '...', 'lat': 45, 'lon': -120, 'geometry': 'POINT (-120 45)'}) res_empty_ds = l_res[6] res_empty_ds_vars = res_empty_ds.get('variables') self.assertIsNotNone(res_empty_ds_vars) self.assertEqual(len(res_empty_ds_vars), 2) scalar_values = {res_empty_ds_vars[0].get('name'): res_empty_ds_vars[0].get('value'), res_empty_ds_vars[1].get('name'): res_empty_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': None, 'precipitation': None}) res_empty_df = l_res[7] res_empty_df_vars = res_empty_df.get('variables') self.assertIsNotNone(res_empty_df_vars) self.assertEqual(len(res_empty_df_vars), 2) scalar_values = {res_empty_df_vars[0].get('name'): res_empty_df_vars[0].get('value'), res_empty_df_vars[1].get('name'): res_empty_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': None, 'B': None}) res_int = l_res[8] self.assertEqual(res_int.get('name'), 'i') self.assertEqual(res_int.get('dataType'), 'int') self.assertIsNone(res_int.get('attributes')) self.assertIsNone(res_int.get('variables')) res_str = l_res[9] self.assertEqual(res_str.get('name'), 's') self.assertEqual(res_str.get('dataType'), 'str') self.assertIsNone(res_str.get('attributes')) self.assertIsNone(res_str.get('variables')) finally: OP_REGISTRY.remove_op(dataset_op) OP_REGISTRY.remove_op(data_frame_op) OP_REGISTRY.remove_op(geo_data_frame_op) OP_REGISTRY.remove_op(scalar_dataset_op) OP_REGISTRY.remove_op(scalar_data_frame_op) OP_REGISTRY.remove_op(scalar_geo_data_frame_op) OP_REGISTRY.remove_op(empty_dataset_op) OP_REGISTRY.remove_op(empty_data_frame_op) OP_REGISTRY.remove_op(int_op) OP_REGISTRY.remove_op(str_op)
def test_to_json_dict(self): def dataset_op() -> xr.Dataset: periods = 5 temperature_data = (15 + 8 * np.random.randn(periods, 2, 2)).round( decimals=1) temperature_attrs = { 'a': np.array([1, 2, 3]), 'comment': 'hot', '_FillValue': np.nan } precipitation_data = (10 * np.random.rand(periods, 2, 2)).round( decimals=1) precipitation_attrs = { 'x': True, 'comment': 'wet', '_FillValue': -1.0 } ds = xr.Dataset(data_vars={ 'temperature': (('time', 'lat', 'lon'), temperature_data, temperature_attrs), 'precipitation': (('time', 'lat', 'lon'), precipitation_data, precipitation_attrs) }, coords={ 'lon': np.array([12, 13]), 'lat': np.array([50, 51]), 'time': pd.date_range('2014-09-06', periods=periods) }, attrs={'history': 'a b c'}) return ds def data_frame_op() -> pd.DataFrame: data = { 'A': [1, 2, 3, np.nan, 4, 9, np.nan, np.nan, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, 5, 5, 9, 1, 2, 7, 6] } time = pd.date_range('2000-01-01', freq='MS', periods=12) return pd.DataFrame(data=data, index=time, dtype=float) def int_op() -> int: return 394852 def str_op() -> str: return 'Hi!' from cate.core.op import OP_REGISTRY try: OP_REGISTRY.add_op(dataset_op) OP_REGISTRY.add_op(data_frame_op) OP_REGISTRY.add_op(int_op) OP_REGISTRY.add_op(str_op) workflow = Workflow( OpMetaInfo('workspace_workflow', header=dict(description='Test!'))) workflow.add_step(OpStep(dataset_op, node_id='ds')) workflow.add_step(OpStep(data_frame_op, node_id='df')) workflow.add_step(OpStep(int_op, node_id='i')) workflow.add_step(OpStep(str_op, node_id='s')) ws = Workspace('/path', workflow) ws.execute_workflow() d_ws = ws.to_json_dict() # import pprint # pprint.pprint(d_ws) d_wf = d_ws.get('workflow') self.assertIsNotNone(d_wf) l_res = d_ws.get('resources') self.assertIsNotNone(l_res) self.assertEqual(len(l_res), 4) res_1 = l_res[0] self.assertEqual(res_1.get('name'), 'ds') self.assertEqual(res_1.get('dataType'), 'xarray.core.dataset.Dataset') self.assertEqual(res_1.get('dimSizes'), dict(lat=2, lon=2, time=5)) self.assertEqual(res_1.get('attributes'), {'history': 'a b c'}) res_1_vars = res_1.get('variables') self.assertIsNotNone(res_1_vars) self.assertEqual(len(res_1_vars), 2) var_1 = res_1_vars[0] self.assertEqual(var_1.get('name'), 'precipitation') self.assertEqual(var_1.get('dataType'), 'float64') self.assertEqual(var_1.get('numDims'), 3) self.assertEqual(var_1.get('shape'), (5, 2, 2)) self.assertEqual(var_1.get('chunkSizes'), None) self.assertEqual(var_1.get('isYFlipped'), True) self.assertEqual(var_1.get('isFeatureAttribute'), None) self.assertEqual(var_1.get('attributes'), dict(x=True, comment='wet', _FillValue=-1.)) var_2 = res_1_vars[1] self.assertEqual(var_2.get('name'), 'temperature') self.assertEqual(var_2.get('dataType'), 'float64') self.assertEqual(var_2.get('numDims'), 3) self.assertEqual(var_2.get('shape'), (5, 2, 2)) self.assertEqual(var_2.get('chunkSizes'), None) self.assertEqual(var_2.get('isYFlipped'), True) self.assertEqual(var_2.get('isFeatureAttribute'), None) self.assertEqual( var_2.get('attributes'), dict(a=[1, 2, 3], comment='hot', _FillValue=np.nan)) res_2 = l_res[1] self.assertEqual(res_2.get('name'), 'df') self.assertEqual(res_2.get('dataType'), 'pandas.core.frame.DataFrame') self.assertIsNone(res_2.get('attributes')) res_2_vars = res_2.get('variables') self.assertIsNotNone(res_2_vars) self.assertEqual(len(res_2_vars), 2) var_1 = res_2_vars[0] self.assertEqual(var_1.get('name'), 'A') self.assertEqual(var_1.get('dataType'), 'float64') self.assertEqual(var_1.get('numDims'), 1) self.assertEqual(var_1.get('shape'), (12, )) self.assertEqual(var_1.get('isYFlipped'), None) self.assertEqual(var_1.get('isFeatureAttribute'), None) self.assertIsNone(var_1.get('attributes')) var_2 = res_2_vars[1] self.assertEqual(var_2.get('name'), 'B') self.assertEqual(var_2.get('dataType'), 'float64') self.assertEqual(var_2.get('numDims'), 1) self.assertEqual(var_2.get('shape'), (12, )) self.assertEqual(var_2.get('isYFlipped'), None) self.assertEqual(var_2.get('isFeatureAttribute'), None) self.assertIsNone(var_2.get('attributes')) res_3 = l_res[2] self.assertEqual(res_3.get('name'), 'i') self.assertEqual(res_3.get('dataType'), 'int') self.assertIsNone(res_3.get('attributes')) self.assertIsNone(res_3.get('variables')) res_4 = l_res[3] self.assertEqual(res_4.get('name'), 's') self.assertEqual(res_4.get('dataType'), 'str') self.assertIsNone(res_4.get('attrs')) self.assertIsNone(res_4.get('variables')) finally: OP_REGISTRY.remove_op(dataset_op) OP_REGISTRY.remove_op(data_frame_op) OP_REGISTRY.remove_op(int_op) OP_REGISTRY.remove_op(str_op)
def test_to_json_dict(self): def dataset_op() -> xr.Dataset: periods = 5 temperature_data = (15 + 8 * np.random.randn(periods, 2, 2)).round(decimals=1) temperature_attrs = {'a': np.array([1, 2, 3]), 'comment': 'hot', '_FillValue': np.nan} precipitation_data = (10 * np.random.rand(periods, 2, 2)).round(decimals=1) precipitation_attrs = {'x': True, 'comment': 'wet', '_FillValue': -1.0} ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), temperature_data, temperature_attrs), 'precipitation': (('time', 'lat', 'lon'), precipitation_data, precipitation_attrs) }, coords={ 'lon': np.array([12, 13]), 'lat': np.array([50, 51]), 'time': pd.date_range('2014-09-06', periods=periods) }, attrs={ 'history': 'a b c' }) return ds def scalar_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), [[[15.2]]]), 'precipitation': (('time', 'lat', 'lon'), [[[10.1]]]) }, coords={ 'lon': [12.], 'lat': [50.], 'time': [pd.to_datetime('2014-09-06')], }, attrs={ 'history': 'a b c' }) return ds def empty_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)), 'precipitation': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)) }, coords={ 'lon': np.ndarray(shape=(0,), dtype=np.float32), 'lat': np.ndarray(shape=(0,), dtype=np.float32), 'time': np.ndarray(shape=(0,), dtype=np.datetime64), }, attrs={ 'history': 'a b c' }) return ds def data_frame_op() -> pd.DataFrame: data = {'A': [1, 2, 3, np.nan, 4, 9, np.nan, np.nan, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, 5, 5, 9, 1, 2, 7, 6]} time = pd.date_range('2000-01-01', freq='MS', periods=12) return pd.DataFrame(data=data, index=time, dtype=float, columns=['A', 'B']) def scalar_data_frame_op() -> pd.DataFrame: data = {'A': [1.3], 'B': [5.9]} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def empty_data_frame_op() -> pd.DataFrame: data = {'A': [], 'B': []} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': ['A', 'B', 'C'], 'lat': [45, 46, 47.5], 'lon': [-120, -121.2, -122.9]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def scalar_geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': [2000 * 'A'], 'lat': [45], 'lon': [-120]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def int_op() -> int: return 394852 def str_op() -> str: return 'Hi!' from cate.core.op import OP_REGISTRY try: OP_REGISTRY.add_op(dataset_op) OP_REGISTRY.add_op(data_frame_op) OP_REGISTRY.add_op(geo_data_frame_op) OP_REGISTRY.add_op(scalar_dataset_op) OP_REGISTRY.add_op(scalar_data_frame_op) OP_REGISTRY.add_op(scalar_geo_data_frame_op) OP_REGISTRY.add_op(empty_dataset_op) OP_REGISTRY.add_op(empty_data_frame_op) OP_REGISTRY.add_op(int_op) OP_REGISTRY.add_op(str_op) workflow = Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!'))) workflow.add_step(OpStep(dataset_op, node_id='ds')) workflow.add_step(OpStep(data_frame_op, node_id='df')) workflow.add_step(OpStep(geo_data_frame_op, node_id='gdf')) workflow.add_step(OpStep(scalar_dataset_op, node_id='scalar_ds')) workflow.add_step(OpStep(scalar_data_frame_op, node_id='scalar_df')) workflow.add_step(OpStep(scalar_geo_data_frame_op, node_id='scalar_gdf')) workflow.add_step(OpStep(empty_dataset_op, node_id='empty_ds')) workflow.add_step(OpStep(empty_data_frame_op, node_id='empty_df')) workflow.add_step(OpStep(int_op, node_id='i')) workflow.add_step(OpStep(str_op, node_id='s')) ws = Workspace('/path', workflow) ws.execute_workflow() d_ws = ws.to_json_dict() # import pprint # pprint.pprint(d_ws) d_wf = d_ws.get('workflow') self.assertIsNotNone(d_wf) l_res = d_ws.get('resources') self.assertIsNotNone(l_res) self.assertEqual(len(l_res), 10) res_ds = l_res[0] self.assertEqual(res_ds.get('name'), 'ds') self.assertEqual(res_ds.get('dataType'), 'xarray.core.dataset.Dataset') self.assertEqual(res_ds.get('dimSizes'), dict(lat=2, lon=2, time=5)) self.assertEqual(res_ds.get('attributes'), {'history': 'a b c'}) res_ds_vars = res_ds.get('variables') self.assertIsNotNone(res_ds_vars) self.assertEqual(len(res_ds_vars), 2) res_ds_var_1 = res_ds_vars[0] self.assertEqual(res_ds_var_1.get('name'), 'precipitation') self.assertEqual(res_ds_var_1.get('dataType'), 'float64') self.assertEqual(res_ds_var_1.get('numDims'), 3) self.assertEqual(res_ds_var_1.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_1.get('chunkSizes'), None) self.assertEqual(res_ds_var_1.get('isYFlipped'), True) self.assertEqual(res_ds_var_1.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_1.get('attributes'), dict(x=True, comment='wet', _FillValue=-1.)) res_ds_var_2 = res_ds_vars[1] self.assertEqual(res_ds_var_2.get('name'), 'temperature') self.assertEqual(res_ds_var_2.get('dataType'), 'float64') self.assertEqual(res_ds_var_2.get('numDims'), 3) self.assertEqual(res_ds_var_2.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_2.get('chunkSizes'), None) self.assertEqual(res_ds_var_2.get('isYFlipped'), True) self.assertEqual(res_ds_var_2.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_2.get('attributes'), dict(a=[1, 2, 3], comment='hot', _FillValue=np.nan)) res_df = l_res[1] self.assertEqual(res_df.get('name'), 'df') self.assertEqual(res_df.get('dataType'), 'pandas.core.frame.DataFrame') self.assertEqual(res_df.get('attributes'), {'num_rows': 12, 'num_columns': 2}) res_df_vars = res_df.get('variables') self.assertIsNotNone(res_df_vars) self.assertEqual(len(res_df_vars), 2) res_df_var_1 = res_df_vars[0] self.assertEqual(res_df_var_1.get('name'), 'A') self.assertEqual(res_df_var_1.get('dataType'), 'float64') self.assertEqual(res_df_var_1.get('numDims'), 1) self.assertEqual(res_df_var_1.get('shape'), (12,)) self.assertEqual(res_df_var_1.get('isYFlipped'), None) self.assertEqual(res_df_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_1.get('attributes')) res_df_var_2 = res_df_vars[1] self.assertEqual(res_df_var_2.get('name'), 'B') self.assertEqual(res_df_var_2.get('dataType'), 'float64') self.assertEqual(res_df_var_2.get('numDims'), 1) self.assertEqual(res_df_var_2.get('shape'), (12,)) self.assertEqual(res_df_var_2.get('isYFlipped'), None) self.assertEqual(res_df_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_2.get('attributes')) res_gdf = l_res[2] self.assertEqual(res_gdf.get('name'), 'gdf') self.assertEqual(res_gdf.get('dataType'), 'geopandas.geodataframe.GeoDataFrame') self.assertEqual(res_gdf.get('attributes'), {'num_rows': 3, 'num_columns': 4, 'geom_type': 'Point'}) res_gdf_vars = res_gdf.get('variables') self.assertIsNotNone(res_gdf_vars) self.assertEqual(len(res_gdf_vars), 4) res_gdf_var_1 = res_gdf_vars[0] self.assertEqual(res_gdf_var_1.get('name'), 'name') self.assertEqual(res_gdf_var_1.get('dataType'), 'object') self.assertEqual(res_gdf_var_1.get('numDims'), 1) self.assertEqual(res_gdf_var_1.get('shape'), (3,)) self.assertEqual(res_gdf_var_1.get('isYFlipped'), None) self.assertEqual(res_gdf_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_1.get('attributes')) res_gdf_var_2 = res_gdf_vars[1] self.assertEqual(res_gdf_var_2.get('name'), 'lat') self.assertEqual(res_gdf_var_2.get('dataType'), 'float64') self.assertEqual(res_gdf_var_2.get('numDims'), 1) self.assertEqual(res_gdf_var_2.get('shape'), (3,)) self.assertEqual(res_gdf_var_2.get('isYFlipped'), None) self.assertEqual(res_gdf_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_2.get('attributes')) res_gdf_var_3 = res_gdf_vars[2] self.assertEqual(res_gdf_var_3.get('name'), 'lon') self.assertEqual(res_gdf_var_3.get('dataType'), 'float64') self.assertEqual(res_gdf_var_3.get('numDims'), 1) self.assertEqual(res_gdf_var_3.get('shape'), (3,)) self.assertEqual(res_gdf_var_3.get('isYFlipped'), None) self.assertEqual(res_gdf_var_3.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_3.get('attributes')) res_gdf_var_4 = res_gdf_vars[3] self.assertEqual(res_gdf_var_4.get('name'), 'geometry') self.assertEqual(res_gdf_var_4.get('dataType'), 'object') self.assertEqual(res_gdf_var_4.get('numDims'), 1) self.assertEqual(res_gdf_var_4.get('shape'), (3,)) self.assertEqual(res_gdf_var_4.get('isYFlipped'), None) self.assertEqual(res_gdf_var_4.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_4.get('attributes')) res_scalar_ds = l_res[3] res_scalar_ds_vars = res_scalar_ds.get('variables') self.assertIsNotNone(res_scalar_ds_vars) self.assertEqual(len(res_scalar_ds_vars), 2) scalar_values = {res_scalar_ds_vars[0].get('name'): res_scalar_ds_vars[0].get('value'), res_scalar_ds_vars[1].get('name'): res_scalar_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': 15.2, 'precipitation': 10.1}) res_scalar_df = l_res[4] res_scalar_df_vars = res_scalar_df.get('variables') self.assertIsNotNone(res_scalar_df_vars) self.assertEqual(len(res_scalar_df_vars), 2) scalar_values = {res_scalar_df_vars[0].get('name'): res_scalar_df_vars[0].get('value'), res_scalar_df_vars[1].get('name'): res_scalar_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': 1.3, 'B': 5.9}) res_scalar_gdf = l_res[5] res_scalar_gdf_vars = res_scalar_gdf.get('variables') self.assertIsNotNone(res_scalar_gdf_vars) self.assertEqual(len(res_scalar_gdf_vars), 4) scalar_values = {res_scalar_gdf_vars[0].get('name'): res_scalar_gdf_vars[0].get('value'), res_scalar_gdf_vars[1].get('name'): res_scalar_gdf_vars[1].get('value'), res_scalar_gdf_vars[2].get('name'): res_scalar_gdf_vars[2].get('value'), res_scalar_gdf_vars[3].get('name'): res_scalar_gdf_vars[3].get('value')} self.assertEqual(scalar_values, {'name': (1000 * 'A') + '...', 'lat': 45, 'lon': -120, 'geometry': 'POINT (-120 45)'}) res_empty_ds = l_res[6] res_empty_ds_vars = res_empty_ds.get('variables') self.assertIsNotNone(res_empty_ds_vars) self.assertEqual(len(res_empty_ds_vars), 2) scalar_values = {res_empty_ds_vars[0].get('name'): res_empty_ds_vars[0].get('value'), res_empty_ds_vars[1].get('name'): res_empty_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': None, 'precipitation': None}) res_empty_df = l_res[7] res_empty_df_vars = res_empty_df.get('variables') self.assertIsNotNone(res_empty_df_vars) self.assertEqual(len(res_empty_df_vars), 2) scalar_values = {res_empty_df_vars[0].get('name'): res_empty_df_vars[0].get('value'), res_empty_df_vars[1].get('name'): res_empty_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': None, 'B': None}) res_int = l_res[8] self.assertEqual(res_int.get('name'), 'i') self.assertEqual(res_int.get('dataType'), 'int') self.assertIsNone(res_int.get('attributes')) self.assertIsNone(res_int.get('variables')) res_str = l_res[9] self.assertEqual(res_str.get('name'), 's') self.assertEqual(res_str.get('dataType'), 'str') self.assertIsNone(res_str.get('attributes')) self.assertIsNone(res_str.get('variables')) finally: OP_REGISTRY.remove_op(dataset_op) OP_REGISTRY.remove_op(data_frame_op) OP_REGISTRY.remove_op(geo_data_frame_op) OP_REGISTRY.remove_op(scalar_dataset_op) OP_REGISTRY.remove_op(scalar_data_frame_op) OP_REGISTRY.remove_op(scalar_geo_data_frame_op) OP_REGISTRY.remove_op(empty_dataset_op) OP_REGISTRY.remove_op(empty_data_frame_op) OP_REGISTRY.remove_op(int_op) OP_REGISTRY.remove_op(str_op)