def test_from_json_dict_invalids(self): json_dict = json.loads('{"header": {}}') with self.assertRaises(ValueError) as cm: Workflow.from_json_dict(json_dict) self.assertEqual( str(cm.exception), 'missing mandatory property "qualified_name" in Workflow-JSON')
def create_example_3_steps_workflow(cls): step1 = OpStep(op1, node_id='op1') step2 = OpStep(op2, node_id='op2') step3 = OpStep(op3, node_id='op3') workflow = Workflow(OpMetaInfo('myWorkflow', inputs=OrderedDict(p={}), outputs=OrderedDict(q={}))) workflow.add_steps(step1, step2, step3) step1.inputs.x.source = workflow.inputs.p step2.inputs.a.source = step1.outputs.y step3.inputs.u.source = step1.outputs.y step3.inputs.v.source = step2.outputs.b workflow.outputs.q.source = step3.outputs.w return step1, step2, step3, workflow
def execute_workflow(self, res_name: str = None, monitor: Monitor = Monitor.NONE): self._assert_open() if not res_name: steps = self.workflow.sorted_steps else: res_step = self.workflow.find_node(res_name) if res_step is None: raise WorkspaceError('Resource "%s" not found' % res_name) steps = self.workflow.find_steps_to_compute(res_step.id) Workflow.invoke_steps(steps, value_cache=self._resource_cache, monitor=monitor) return steps[-1].get_output_value()
def test_repr_svg(self): step1 = OpStep(Op1, node_id='op1') step2 = OpStep(Op2, node_id='op2') step3 = OpStep(Op3, node_id='op3') workflow = Workflow(OpMetaInfo('my_workflow', input_dict=OrderedDict(p={}), output_dict=OrderedDict(q={}))) workflow.add_steps(step1, step2, step3) step1.input.x.source = workflow.input.p step2.input.a.source = step1.output.y step3.input.u.source = step1.output.y step3.input.v.source = step2.output.b workflow.output.q.source = step3.output.w workflow_json = workflow._repr_svg_() # print('\n\n%s\n\n' % workflow_json) self.assertIsNotNone(workflow_json)
def test_to_json_dict(self): resource = get_resource('workflows/three_ops.json') workflow = Workflow.load(resource) step = WorkflowStep(workflow, resource, node_id='jojo_87') actual_json_dict = step.to_json_dict() expected_json_text = """ { "id": "jojo_87", "workflow": "%s", "inputs": { "p": {} }, "outputs": { "q": {} } } """ % resource actual_json_text = json.dumps(actual_json_dict, indent=4) expected_json_dict = json.loads(expected_json_text) actual_json_dict = json.loads(actual_json_text) self.assertEqual(actual_json_dict, expected_json_dict, msg='\n%sexpected:\n%s\n%s\nbut got:\n%s\n' % (120 * '-', expected_json_text, 120 * '-', actual_json_text))
def test_to_json_dict(self): resource = get_resource('workflows/three_ops.json') workflow = Workflow.load(resource) step = WorkflowStep(workflow, resource, node_id='jojo_87') actual_json_dict = step.to_json_dict() expected_json_text = """ { "id": "jojo_87", "workflow": "%s", "inputs": { "p": {} }, "outputs": { "q": {} } } """ % resource actual_json_text = json.dumps(actual_json_dict, indent=4) expected_json_dict = json.loads(expected_json_text) actual_json_dict = json.loads(actual_json_text) self.assertEqual( actual_json_dict, expected_json_dict, msg='\n%sexpected:\n%s\n%s\nbut got:\n%s\n' % (120 * '-', expected_json_text, 120 * '-', actual_json_text))
def test_resolve_source_ref(self): step1 = OpStep(op1, node_id='myop1') step2 = OpStep(op2, node_id='myop2') step2.inputs.a._source_ref = ('myop1', 'y') g = Workflow(OpMetaInfo('myWorkflow', has_monitor=True, inputs=OrderedDict(x={}), outputs=OrderedDict(b={}))) g.add_steps(step1, step2) step2.inputs.a.update_source() self.assertEqual(step2.inputs.a._source_ref, ('myop1', 'y')) self.assertIs(step2.inputs.a.source, step1.outputs.y) self.assertIs(step2.inputs.a.value, None)
def test_execute_empty_workflow(self): ws = Workspace( '/path', Workflow( OpMetaInfo('workspace_workflow', header=dict(description='Test!')))) ws.execute_workflow()
def test_workspace_is_part_of_context(self): def some_op(ctx: dict) -> dict: return dict(ctx) from cate.core.op import OP_REGISTRY try: op_reg = OP_REGISTRY.add_op(some_op) op_reg.op_meta_info.inputs['ctx']['context'] = True ws = Workspace( '/path', Workflow( OpMetaInfo('workspace_workflow', header=dict(description='Test!')))) ws.set_resource(op_reg.op_meta_info.qualified_name, {}, res_name='new_ctx') ws.execute_workflow('new_ctx') self.assertTrue('new_ctx' in ws.resource_cache) self.assertTrue('workspace' in ws.resource_cache['new_ctx']) self.assertIs(ws.resource_cache['new_ctx']['workspace'], ws) finally: OP_REGISTRY.remove_op(some_op)
def test_set_resource_is_reentrant(self): from concurrent.futures import ThreadPoolExecutor ws = Workspace( '/path', Workflow( OpMetaInfo('workspace_workflow', header=dict(description='Test!')))) def set_resource_and_execute(): res_name = ws.set_resource('cate.ops.utility.no_op', op_kwargs=dict( num_steps=dict(value=10), step_duration=dict(value=0.05))) ws.execute_workflow(res_name=res_name) return res_name num_res = 5 res_names = [] with ThreadPoolExecutor(max_workers=2 * num_res) as executor: for i in range(num_res): res_names.append(executor.submit(set_resource_and_execute)) actual_res_names = {f.result() for f in res_names} expected_res_names = {'res_%s' % (i + 1) for i in range(num_res)} self.assertEqual(actual_res_names, expected_res_names)
def test_set_and_execute_step(self): ws = Workspace('/path', Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!')))) ws.set_resource('cate.ops.io.read_netcdf', mk_op_kwargs(file=NETCDF_TEST_FILE_1), res_name='X') ws.set_resource('cate.ops.timeseries.tseries_mean', mk_op_kwargs(ds="@X", var="precipitation"), res_name='Y') self.assertEqual(ws.resource_cache, {}) ws.execute_workflow('Y') self.assertIn('X', ws.resource_cache) self.assertIn('Y', ws.resource_cache) ws.set_resource('cate.ops.timeseries.tseries_mean', mk_op_kwargs(ds="@X", var="temperature"), res_name='Y', overwrite=True) self.assertIn('X', ws.resource_cache) self.assertIn('Y', ws.resource_cache) self.assertIs(ws.resource_cache['Y'], UNDEFINED) ws.execute_workflow('Y') self.assertIn('X', ws.resource_cache) self.assertIn('Y', ws.resource_cache) ws.set_resource('cate.ops.io.read_netcdf', mk_op_kwargs(file=NETCDF_TEST_FILE_2), res_name='X', overwrite=True) self.assertIn('X', ws.resource_cache) self.assertIs(ws.resource_cache['X'], UNDEFINED) self.assertIn('Y', ws.resource_cache) self.assertIs(ws.resource_cache['Y'], UNDEFINED) ws.execute_workflow('Y') self.assertIn('X', ws.resource_cache) self.assertIn('Y', ws.resource_cache)
def test_set_step_and_run_op(self): ws = Workspace('/path', Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!')))) ws.set_resource('cate.ops.io.read_netcdf', mk_op_kwargs(file=NETCDF_TEST_FILE_1), res_name='X') ws.execute_workflow('X') self.assertIsNotNone(ws.workflow) self.assertEqual(len(ws.workflow.steps), 1) self.assertIn('X', ws.resource_cache) op_name = '_extract_point' op_args = mk_op_kwargs(ds='@X', point='10.22, 34.52', indexers=dict(time='2014-09-11'), should_return=True) op_result = ws.run_op(op_name, op_args) self.assertEqual(len(op_result), 4) self.assertAlmostEqual(op_result['lat'], 34.5) self.assertAlmostEqual(op_result['lon'], 10.2) self.assertAlmostEqual(op_result['precipitation'], 5.5) self.assertAlmostEqual(op_result['temperature'], 32.9) # without asking for return data op_args = mk_op_kwargs(ds='@X', point='10.22, 34.52', indexers=dict(time='2014-09-11')) op_result = ws.run_op(op_name, op_args) self.assertIsNone(op_result) # with a non existing operator name with self.assertRaises(ValidationError) as we: ws.run_op("not_existing_op", {}) self.assertEqual('Unknown operation "not_existing_op"', str(we.exception))
def test_example(self): expected_json_text = """{ "schema_version": 1, "qualified_name": "workspace_workflow", "header": { "description": "Test!" }, "inputs": {}, "outputs": {}, "steps": [ { "id": "p", "op": "cate.ops.io.read_netcdf", "inputs": { "file": { "value": "%s" } } }, { "id": "ts", "op": "cate.ops.timeseries.tseries_mean", "inputs": { "ds": "p", "var": { "value": "precipitation" } } } ] } """ % NETCDF_TEST_FILE_1.replace('\\', '\\\\') expected_json_dict = json.loads(expected_json_text) ws = Workspace('/path', Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!')))) # print("wf_1: " + json.dumps(ws.workflow.to_json_dict(), indent=' ')) ws.set_resource('cate.ops.io.read_netcdf', mk_op_kwargs(file=NETCDF_TEST_FILE_1), res_name='p') # print("wf_2: " + json.dumps(ws.workflow.to_json_dict(), indent=' ')) ws.set_resource('cate.ops.timeseries.tseries_mean', mk_op_kwargs(ds="@p", var="precipitation"), res_name='ts') # print("wf_3: " + json.dumps(ws.workflow.to_json_dict(), indent=' ')) self.maxDiff = None self.assertEqual(ws.workflow.to_json_dict(), expected_json_dict) with self.assertRaises(ValueError) as e: ws.set_resource('cate.ops.timeseries.tseries_point', mk_op_kwargs(ds="@p", point="iih!", var="precipitation"), res_name='ts2', validate_args=True) self.assertEqual(str(e.exception), "Input 'point' for operation 'cate.ops.timeseries.tseries_point': " "Value cannot be converted into a 'PointLike': " "Invalid geometry WKT format.") ws2 = Workspace.from_json_dict(ws.to_json_dict()) self.assertEqual(ws2.base_dir, ws.base_dir) self.assertEqual(ws2.workflow.op_meta_info.qualified_name, ws.workflow.op_meta_info.qualified_name) self.assertEqual(len(ws2.workflow.steps), len(ws.workflow.steps))
def test_invoke_as_part_of_workflow(self): resource = get_resource('workflows/three_ops.json') workflow = Workflow.load(resource) step = WorkflowStep(workflow, resource, node_id='jojo_87') workflow = Workflow( OpMetaInfo('contains_jojo_87', has_monitor=True, inputs=OrderedDict(x={}), outputs=OrderedDict(y={}))) workflow.add_step(step) step.inputs.p.source = workflow.inputs.x workflow.outputs.y.source = step.outputs.q value_cache = ValueCache() workflow.inputs.x.value = 4 workflow.invoke(context=dict(value_cache=value_cache)) output_value = workflow.outputs.y.value self.assertEqual(output_value, 2 * (4 + 1) + 3 * (2 * (4 + 1))) self.assertEqual( value_cache, { 'jojo_87._child': { 'op1': { 'y': 5 }, 'op2': { 'b': 10 }, 'op3': { 'w': 40 } } })
def test_routing(self): from cate.core.workflow import Workflow import os.path workflow = Workflow.load( os.path.join(os.path.dirname(__file__), 'workflows', 'four_steps_chain.json')) actual_svg = workflow._repr_svg_() self._write_svg_html(actual_svg)
def open(cls, base_dir: str) -> 'Workspace': if not os.path.isdir(cls.get_workspace_dir(base_dir)): raise WorkspaceError('Not a valid workspace: %s' % base_dir) try: workflow_file = cls.get_workflow_file(base_dir) workflow = Workflow.load(workflow_file) return Workspace(base_dir, workflow) except (IOError, OSError) as e: raise WorkspaceError(e)
def test_invoke(self): resource = get_resource('workflows/three_ops.json') workflow = Workflow.load(resource) step = WorkflowStep(workflow, resource, node_id='jojo_87') value_cache = {} step.inputs.p.value = 3 step.invoke(context=dict(value_cache=value_cache)) output_value = step.outputs.q.value self.assertEqual(output_value, 2 * (3 + 1) + 3 * (2 * (3 + 1))) self.assertEqual(value_cache, {'op1': {'y': 4}, 'op2': {'b': 8}, 'op3': {'w': 32}})
def test_workspace_can_create_new_res_names(self): ws = Workspace('/path', Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!')))) res_name_1 = ws.set_resource('cate.ops.utility.identity', mk_op_kwargs(value='A')) res_name_2 = ws.set_resource('cate.ops.utility.identity', mk_op_kwargs(value='B')) res_name_3 = ws.set_resource('cate.ops.utility.identity', mk_op_kwargs(value='C')) self.assertEqual(res_name_1, 'res_1') self.assertEqual(res_name_2, 'res_2') self.assertEqual(res_name_3, 'res_3') self.assertIsNotNone(ws.workflow.find_node(res_name_1)) self.assertIsNotNone(ws.workflow.find_node(res_name_2)) self.assertIsNotNone(ws.workflow.find_node(res_name_3))
def test_init(self): resource = get_resource('workflows/three_ops.json') workflow = Workflow.load(resource) step = WorkflowStep(workflow, resource, node_id='jojo_87') self.assertEqual(step.id, 'jojo_87') self.assertEqual(step.resource, resource) self.assertEqual(str(step), 'jojo_87 = cool_workflow(p=None) -> (q) [WorkflowStep]') self.assertEqual(repr(step), "WorkflowStep(Workflow('cool_workflow'), '%s', node_id='jojo_87')" % resource) self.assertIsNotNone(step.workflow) self.assertIn('p', step.workflow.inputs) self.assertIn('q', step.workflow.outputs)
def test_set_and_rename_and_execute_step(self): ws = Workspace('/path', Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!')))) self.assertEqual(ws.user_data, {}) ws.set_resource('cate.ops.utility.identity', mk_op_kwargs(value=1), res_name='X') ws.set_resource('cate.ops.utility.identity', mk_op_kwargs(value="@X"), res_name='Y') ws.set_resource('cate.ops.utility.identity', mk_op_kwargs(value="@X"), res_name='Z') self.assertEqual(len(ws.workflow.steps), 3) self.assertEqual(ws.resource_cache, {}) value = ws.execute_workflow('Y') self.assertEqual(value, 1) self.assertEqual(ws.resource_cache.get('X'), 1) self.assertEqual(ws.resource_cache.get('Y'), 1) self.assertEqual(ws.resource_cache.get('Z'), None) value = ws.execute_workflow('Z') self.assertEqual(value, 1) self.assertEqual(ws.resource_cache.get('X'), 1) self.assertEqual(ws.resource_cache.get('Y'), 1) self.assertEqual(ws.resource_cache.get('Z'), 1) ws.set_resource('cate.ops.utility.identity', mk_op_kwargs(value=9), res_name='X', overwrite=True) self.assertEqual(len(ws.workflow.steps), 3) self.assertEqual(ws.resource_cache.get('X'), UNDEFINED) self.assertEqual(ws.resource_cache.get('Y'), UNDEFINED) self.assertEqual(ws.resource_cache.get('Z'), UNDEFINED) ws.execute_workflow() self.assertEqual(ws.resource_cache.get('X'), 9) self.assertEqual(ws.resource_cache.get('Y'), 9) self.assertEqual(ws.resource_cache.get('Z'), 9) ws.rename_resource('X', 'A') self.assertIsNone(ws.workflow.find_node('X')) self.assertIsNotNone(ws.workflow.find_node('A')) self.assertEqual(ws.resource_cache.get('X', '--'), '--') self.assertEqual(ws.resource_cache.get('A'), 9) self.assertEqual(ws.resource_cache.get('Y'), 9) self.assertEqual(ws.resource_cache.get('Z'), 9) ws.set_resource('cate.ops.utility.identity', mk_op_kwargs(value=5), res_name='A', overwrite=True) self.assertEqual(ws.resource_cache.get('X', '--'), '--') self.assertEqual(ws.resource_cache.get('A'), UNDEFINED) self.assertEqual(ws.resource_cache.get('Y'), UNDEFINED) self.assertEqual(ws.resource_cache.get('Z'), UNDEFINED) ws.execute_workflow() self.assertEqual(ws.resource_cache.get('X', '--'), '--') self.assertEqual(ws.resource_cache.get('A'), 5) self.assertEqual(ws.resource_cache.get('Y'), 5) self.assertEqual(ws.resource_cache.get('Z'), 5)
def test_invoke_from_workflow(self): resource = get_resource('workflows/one_expr.json') workflow = Workflow.load(resource) a = 1.5 b = -2.6 c = 4.3 workflow.inputs.a.value = a workflow.inputs.b.value = b workflow.inputs.c.value = c workflow.invoke() output_value_x = workflow.outputs.x.value output_value_y = workflow.outputs.y.value self.assertEqual(output_value_x, 1 + 2 * a) self.assertEqual(output_value_y, 3 * b**2 + 4 * c**3)
def test_invoke_from_workflow(self): resource = get_resource('workflows/one_expr.json') workflow = Workflow.load(resource) a = 1.5 b = -2.6 c = 4.3 workflow.inputs.a.value = a workflow.inputs.b.value = b workflow.inputs.c.value = c workflow.invoke() output_value_x = workflow.outputs.x.value output_value_y = workflow.outputs.y.value self.assertEqual(output_value_x, 1 + 2 * a) self.assertEqual(output_value_y, 3 * b ** 2 + 4 * c ** 3)
def test_invoke_with_context_inputs(self): def some_op(context, workflow, workflow_id, step, step_id, invalid): return dict(context=context, workflow=workflow, workflow_id=workflow_id, step=step, step_id=step_id, invalid=invalid) from cate.core.op import OP_REGISTRY try: op_reg = OP_REGISTRY.add_op(some_op) op_reg.op_meta_info.inputs['context']['context'] = True op_reg.op_meta_info.inputs['workflow']['context'] = 'workflow' op_reg.op_meta_info.inputs['workflow_id']['context'] = 'workflow.id' op_reg.op_meta_info.inputs['step']['context'] = 'step' op_reg.op_meta_info.inputs['step_id']['context'] = 'step.id' op_reg.op_meta_info.inputs['invalid']['context'] = 'gnarz[8]' step = OpStep(op_reg, node_id='test_step') workflow = Workflow(OpMetaInfo('test_workflow')) workflow.add_step(step) workflow.invoke() output = step.outputs['return'].value self.assertIsInstance(output, dict) self.assertIsInstance(output.get('context'), dict) self.assertIs(output.get('workflow'), workflow) self.assertEqual(output.get('workflow_id'), 'test_workflow') self.assertIs(output.get('step'), step) self.assertEqual(output.get('step_id'), 'test_step') self.assertEqual(output.get('invalid', 1), None) finally: OP_REGISTRY.remove_op(some_op)
def test_set_and_execute_step(self): ws = Workspace('/path', Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!')))) with self.assertRaises(ValidationError) as we: ws.set_resource("not_existing_op", {}) self.assertEqual('Unknown operation "not_existing_op"', str(we.exception)) with self.assertRaises(ValidationError) as we: ws.set_resource('cate.ops.io.read_netcdf', mk_op_kwargs(location=NETCDF_TEST_FILE_1), res_name='X') self.assertEqual('"location" is not an input of operation "cate.ops.io.read_netcdf"', str(we.exception)) with self.assertRaises(ValidationError) as we: ws.set_resource('cate.ops.io.read_netcdf', {'file': {'foo': 'bar'}}, res_name='X') self.assertEqual('Illegal argument for input "file" of operation "cate.ops.io.read_netcdf', str(we.exception)) ws.set_resource('cate.ops.io.read_netcdf', mk_op_kwargs(file=NETCDF_TEST_FILE_1), res_name='X') ws.set_resource('cate.ops.timeseries.tseries_mean', mk_op_kwargs(ds="@X", var="precipitation"), res_name='Y') self.assertEqual(ws.resource_cache, {}) ws.execute_workflow('Y') self.assertIn('X', ws.resource_cache) self.assertIn('Y', ws.resource_cache) ws.set_resource('cate.ops.timeseries.tseries_mean', mk_op_kwargs(ds="@X", var="temperature"), res_name='Y', overwrite=True) self.assertIn('X', ws.resource_cache) self.assertIn('Y', ws.resource_cache) self.assertIs(ws.resource_cache['Y'], UNDEFINED) ws.execute_workflow('Y') self.assertIn('X', ws.resource_cache) self.assertIn('Y', ws.resource_cache) ws.set_resource('cate.ops.io.read_netcdf', mk_op_kwargs(file=NETCDF_TEST_FILE_2), res_name='X', overwrite=True) self.assertIn('X', ws.resource_cache) self.assertIs(ws.resource_cache['X'], UNDEFINED) self.assertIn('Y', ws.resource_cache) self.assertIs(ws.resource_cache['Y'], UNDEFINED) ws.execute_workflow('Y') self.assertIn('X', ws.resource_cache) self.assertIn('Y', ws.resource_cache)
def test_set_and_execute_step(self): ws = Workspace( '/path', Workflow( OpMetaInfo('workspace_workflow', header_dict=dict(description='Test!')))) ws.set_resource('X', 'cate.ops.io.read_netcdf', ["file=%s" % NETCDF_TEST_FILE_1]) ws.set_resource('Y', 'cate.ops.timeseries.tseries_mean', ["ds=X", "var=precipitation"]) self.assertEqual(ws.resource_cache, {}) ws.execute_workflow('Y') self.assertIn('X', ws.resource_cache) self.assertIn('Y', ws.resource_cache) ws.set_resource('Y', 'cate.ops.timeseries.tseries_mean', ["ds=X", "var=temperature"], overwrite=True) self.assertIn('X', ws.resource_cache) self.assertNotIn('Y', ws.resource_cache) ws.execute_workflow('Y') self.assertIn('X', ws.resource_cache) self.assertIn('Y', ws.resource_cache) ws.set_resource('X', 'cate.ops.io.read_netcdf', ["file=%s" % NETCDF_TEST_FILE_2], overwrite=True) self.assertNotIn('X', ws.resource_cache) self.assertNotIn('Y', ws.resource_cache) ws.execute_workflow('Y') self.assertIn('X', ws.resource_cache) self.assertIn('Y', ws.resource_cache)
def test_invoke_as_part_of_workflow(self): resource = get_resource('workflows/three_ops.json') workflow = Workflow.load(resource) step = WorkflowStep(workflow, resource, node_id='jojo_87') workflow = Workflow(OpMetaInfo('contains_jojo_87', has_monitor=True, inputs=OrderedDict(x={}), outputs=OrderedDict(y={}))) workflow.add_step(step) step.inputs.p.source = workflow.inputs.x workflow.outputs.y.source = step.outputs.q value_cache = ValueCache() workflow.inputs.x.value = 4 workflow.invoke(context=dict(value_cache=value_cache)) output_value = workflow.outputs.y.value self.assertEqual(output_value, 2 * (4 + 1) + 3 * (2 * (4 + 1))) self.assertEqual(value_cache, {'jojo_87._child': {'op1': {'y': 5}, 'op2': {'b': 10}, 'op3': {'w': 40}}})
def test_invoke_with_context_inputs(self): def some_op(context, workflow, workflow_id, step, step_id, invalid): return dict(context=context, workflow=workflow, workflow_id=workflow_id, step=step, step_id=step_id, invalid=invalid) from cate.core.op import OP_REGISTRY try: op_reg = OP_REGISTRY.add_op(some_op) op_reg.op_meta_info.inputs['context']['context'] = True op_reg.op_meta_info.inputs['workflow']['context'] = 'workflow' op_reg.op_meta_info.inputs['workflow_id'][ 'context'] = 'workflow.id' op_reg.op_meta_info.inputs['step']['context'] = 'step' op_reg.op_meta_info.inputs['step_id']['context'] = 'step.id' op_reg.op_meta_info.inputs['invalid']['context'] = 'gnarz[8]' step = OpStep(op_reg, node_id='test_step') workflow = Workflow(OpMetaInfo('test_workflow')) workflow.add_step(step) workflow.invoke() output = step.outputs['return'].value self.assertIsInstance(output, dict) self.assertIsInstance(output.get('context'), dict) self.assertIs(output.get('workflow'), workflow) self.assertEqual(output.get('workflow_id'), 'test_workflow') self.assertIs(output.get('step'), step) self.assertEqual(output.get('step_id'), 'test_step') self.assertEqual(output.get('invalid', 1), None) finally: OP_REGISTRY.remove_op(some_op)
def test_to_json_dict(self): def dataset_op() -> xr.Dataset: periods = 5 temperature_data = (15 + 8 * np.random.randn(periods, 2, 2)).round(decimals=1) temperature_attrs = {'a': np.array([1, 2, 3]), 'comment': 'hot', '_FillValue': np.nan} precipitation_data = (10 * np.random.rand(periods, 2, 2)).round(decimals=1) precipitation_attrs = {'x': True, 'comment': 'wet', '_FillValue': -1.0} ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), temperature_data, temperature_attrs), 'precipitation': (('time', 'lat', 'lon'), precipitation_data, precipitation_attrs) }, coords={ 'lon': np.array([12, 13]), 'lat': np.array([50, 51]), 'time': pd.date_range('2014-09-06', periods=periods) }, attrs={ 'history': 'a b c' }) return ds def scalar_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), [[[15.2]]]), 'precipitation': (('time', 'lat', 'lon'), [[[10.1]]]) }, coords={ 'lon': [12.], 'lat': [50.], 'time': [pd.to_datetime('2014-09-06')], }, attrs={ 'history': 'a b c' }) return ds def empty_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)), 'precipitation': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)) }, coords={ 'lon': np.ndarray(shape=(0,), dtype=np.float32), 'lat': np.ndarray(shape=(0,), dtype=np.float32), 'time': np.ndarray(shape=(0,), dtype=np.datetime64), }, attrs={ 'history': 'a b c' }) return ds def data_frame_op() -> pd.DataFrame: data = {'A': [1, 2, 3, np.nan, 4, 9, np.nan, np.nan, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, 5, 5, 9, 1, 2, 7, 6]} time = pd.date_range('2000-01-01', freq='MS', periods=12) return pd.DataFrame(data=data, index=time, dtype=float, columns=['A', 'B']) def scalar_data_frame_op() -> pd.DataFrame: data = {'A': [1.3], 'B': [5.9]} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def empty_data_frame_op() -> pd.DataFrame: data = {'A': [], 'B': []} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': ['A', 'B', 'C'], 'lat': [45, 46, 47.5], 'lon': [-120, -121.2, -122.9]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def scalar_geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': [2000 * 'A'], 'lat': [45], 'lon': [-120]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def int_op() -> int: return 394852 def str_op() -> str: return 'Hi!' from cate.core.op import OP_REGISTRY try: OP_REGISTRY.add_op(dataset_op) OP_REGISTRY.add_op(data_frame_op) OP_REGISTRY.add_op(geo_data_frame_op) OP_REGISTRY.add_op(scalar_dataset_op) OP_REGISTRY.add_op(scalar_data_frame_op) OP_REGISTRY.add_op(scalar_geo_data_frame_op) OP_REGISTRY.add_op(empty_dataset_op) OP_REGISTRY.add_op(empty_data_frame_op) OP_REGISTRY.add_op(int_op) OP_REGISTRY.add_op(str_op) workflow = Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!'))) workflow.add_step(OpStep(dataset_op, node_id='ds')) workflow.add_step(OpStep(data_frame_op, node_id='df')) workflow.add_step(OpStep(geo_data_frame_op, node_id='gdf')) workflow.add_step(OpStep(scalar_dataset_op, node_id='scalar_ds')) workflow.add_step(OpStep(scalar_data_frame_op, node_id='scalar_df')) workflow.add_step(OpStep(scalar_geo_data_frame_op, node_id='scalar_gdf')) workflow.add_step(OpStep(empty_dataset_op, node_id='empty_ds')) workflow.add_step(OpStep(empty_data_frame_op, node_id='empty_df')) workflow.add_step(OpStep(int_op, node_id='i')) workflow.add_step(OpStep(str_op, node_id='s')) ws = Workspace('/path', workflow) ws.execute_workflow() d_ws = ws.to_json_dict() # import pprint # pprint.pprint(d_ws) d_wf = d_ws.get('workflow') self.assertIsNotNone(d_wf) l_res = d_ws.get('resources') self.assertIsNotNone(l_res) self.assertEqual(len(l_res), 10) res_ds = l_res[0] self.assertEqual(res_ds.get('name'), 'ds') self.assertEqual(res_ds.get('dataType'), 'xarray.core.dataset.Dataset') self.assertEqual(res_ds.get('dimSizes'), dict(lat=2, lon=2, time=5)) self.assertEqual(res_ds.get('attributes'), {'history': 'a b c'}) res_ds_vars = res_ds.get('variables') self.assertIsNotNone(res_ds_vars) self.assertEqual(len(res_ds_vars), 2) res_ds_var_1 = res_ds_vars[0] self.assertEqual(res_ds_var_1.get('name'), 'precipitation') self.assertEqual(res_ds_var_1.get('dataType'), 'float64') self.assertEqual(res_ds_var_1.get('numDims'), 3) self.assertEqual(res_ds_var_1.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_1.get('chunkSizes'), None) self.assertEqual(res_ds_var_1.get('isYFlipped'), True) self.assertEqual(res_ds_var_1.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_1.get('attributes'), dict(x=True, comment='wet', _FillValue=-1.)) res_ds_var_2 = res_ds_vars[1] self.assertEqual(res_ds_var_2.get('name'), 'temperature') self.assertEqual(res_ds_var_2.get('dataType'), 'float64') self.assertEqual(res_ds_var_2.get('numDims'), 3) self.assertEqual(res_ds_var_2.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_2.get('chunkSizes'), None) self.assertEqual(res_ds_var_2.get('isYFlipped'), True) self.assertEqual(res_ds_var_2.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_2.get('attributes'), dict(a=[1, 2, 3], comment='hot', _FillValue=np.nan)) res_df = l_res[1] self.assertEqual(res_df.get('name'), 'df') self.assertEqual(res_df.get('dataType'), 'pandas.core.frame.DataFrame') self.assertEqual(res_df.get('attributes'), {'num_rows': 12, 'num_columns': 2}) res_df_vars = res_df.get('variables') self.assertIsNotNone(res_df_vars) self.assertEqual(len(res_df_vars), 2) res_df_var_1 = res_df_vars[0] self.assertEqual(res_df_var_1.get('name'), 'A') self.assertEqual(res_df_var_1.get('dataType'), 'float64') self.assertEqual(res_df_var_1.get('numDims'), 1) self.assertEqual(res_df_var_1.get('shape'), (12,)) self.assertEqual(res_df_var_1.get('isYFlipped'), None) self.assertEqual(res_df_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_1.get('attributes')) res_df_var_2 = res_df_vars[1] self.assertEqual(res_df_var_2.get('name'), 'B') self.assertEqual(res_df_var_2.get('dataType'), 'float64') self.assertEqual(res_df_var_2.get('numDims'), 1) self.assertEqual(res_df_var_2.get('shape'), (12,)) self.assertEqual(res_df_var_2.get('isYFlipped'), None) self.assertEqual(res_df_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_2.get('attributes')) res_gdf = l_res[2] self.assertEqual(res_gdf.get('name'), 'gdf') self.assertEqual(res_gdf.get('dataType'), 'geopandas.geodataframe.GeoDataFrame') self.assertEqual(res_gdf.get('attributes'), {'num_rows': 3, 'num_columns': 4, 'geom_type': 'Point'}) res_gdf_vars = res_gdf.get('variables') self.assertIsNotNone(res_gdf_vars) self.assertEqual(len(res_gdf_vars), 4) res_gdf_var_1 = res_gdf_vars[0] self.assertEqual(res_gdf_var_1.get('name'), 'name') self.assertEqual(res_gdf_var_1.get('dataType'), 'object') self.assertEqual(res_gdf_var_1.get('numDims'), 1) self.assertEqual(res_gdf_var_1.get('shape'), (3,)) self.assertEqual(res_gdf_var_1.get('isYFlipped'), None) self.assertEqual(res_gdf_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_1.get('attributes')) res_gdf_var_2 = res_gdf_vars[1] self.assertEqual(res_gdf_var_2.get('name'), 'lat') self.assertEqual(res_gdf_var_2.get('dataType'), 'float64') self.assertEqual(res_gdf_var_2.get('numDims'), 1) self.assertEqual(res_gdf_var_2.get('shape'), (3,)) self.assertEqual(res_gdf_var_2.get('isYFlipped'), None) self.assertEqual(res_gdf_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_2.get('attributes')) res_gdf_var_3 = res_gdf_vars[2] self.assertEqual(res_gdf_var_3.get('name'), 'lon') self.assertEqual(res_gdf_var_3.get('dataType'), 'float64') self.assertEqual(res_gdf_var_3.get('numDims'), 1) self.assertEqual(res_gdf_var_3.get('shape'), (3,)) self.assertEqual(res_gdf_var_3.get('isYFlipped'), None) self.assertEqual(res_gdf_var_3.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_3.get('attributes')) res_gdf_var_4 = res_gdf_vars[3] self.assertEqual(res_gdf_var_4.get('name'), 'geometry') self.assertEqual(res_gdf_var_4.get('dataType'), 'geometry') self.assertEqual(res_gdf_var_4.get('numDims'), 1) self.assertEqual(res_gdf_var_4.get('shape'), (3,)) self.assertEqual(res_gdf_var_4.get('isYFlipped'), None) self.assertEqual(res_gdf_var_4.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_4.get('attributes')) res_scalar_ds = l_res[3] res_scalar_ds_vars = res_scalar_ds.get('variables') self.assertIsNotNone(res_scalar_ds_vars) self.assertEqual(len(res_scalar_ds_vars), 2) scalar_values = {res_scalar_ds_vars[0].get('name'): res_scalar_ds_vars[0].get('value'), res_scalar_ds_vars[1].get('name'): res_scalar_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': 15.2, 'precipitation': 10.1}) res_scalar_df = l_res[4] res_scalar_df_vars = res_scalar_df.get('variables') self.assertIsNotNone(res_scalar_df_vars) self.assertEqual(len(res_scalar_df_vars), 2) scalar_values = {res_scalar_df_vars[0].get('name'): res_scalar_df_vars[0].get('value'), res_scalar_df_vars[1].get('name'): res_scalar_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': 1.3, 'B': 5.9}) res_scalar_gdf = l_res[5] res_scalar_gdf_vars = res_scalar_gdf.get('variables') self.assertIsNotNone(res_scalar_gdf_vars) self.assertEqual(len(res_scalar_gdf_vars), 4) scalar_values = {res_scalar_gdf_vars[0].get('name'): res_scalar_gdf_vars[0].get('value'), res_scalar_gdf_vars[1].get('name'): res_scalar_gdf_vars[1].get('value'), res_scalar_gdf_vars[2].get('name'): res_scalar_gdf_vars[2].get('value'), res_scalar_gdf_vars[3].get('name'): res_scalar_gdf_vars[3].get('value')} self.assertEqual(scalar_values, {'name': (1000 * 'A') + '...', 'lat': 45, 'lon': -120, 'geometry': 'POINT (-120 45)'}) res_empty_ds = l_res[6] res_empty_ds_vars = res_empty_ds.get('variables') self.assertIsNotNone(res_empty_ds_vars) self.assertEqual(len(res_empty_ds_vars), 2) scalar_values = {res_empty_ds_vars[0].get('name'): res_empty_ds_vars[0].get('value'), res_empty_ds_vars[1].get('name'): res_empty_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': None, 'precipitation': None}) res_empty_df = l_res[7] res_empty_df_vars = res_empty_df.get('variables') self.assertIsNotNone(res_empty_df_vars) self.assertEqual(len(res_empty_df_vars), 2) scalar_values = {res_empty_df_vars[0].get('name'): res_empty_df_vars[0].get('value'), res_empty_df_vars[1].get('name'): res_empty_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': None, 'B': None}) res_int = l_res[8] self.assertEqual(res_int.get('name'), 'i') self.assertEqual(res_int.get('dataType'), 'int') self.assertIsNone(res_int.get('attributes')) self.assertIsNone(res_int.get('variables')) res_str = l_res[9] self.assertEqual(res_str.get('name'), 's') self.assertEqual(res_str.get('dataType'), 'str') self.assertIsNone(res_str.get('attributes')) self.assertIsNone(res_str.get('variables')) finally: OP_REGISTRY.remove_op(dataset_op) OP_REGISTRY.remove_op(data_frame_op) OP_REGISTRY.remove_op(geo_data_frame_op) OP_REGISTRY.remove_op(scalar_dataset_op) OP_REGISTRY.remove_op(scalar_data_frame_op) OP_REGISTRY.remove_op(scalar_geo_data_frame_op) OP_REGISTRY.remove_op(empty_dataset_op) OP_REGISTRY.remove_op(empty_data_frame_op) OP_REGISTRY.remove_op(int_op) OP_REGISTRY.remove_op(str_op)
def test_to_json_dict(self): def dataset_op() -> xr.Dataset: periods = 5 temperature_data = (15 + 8 * np.random.randn(periods, 2, 2)).round(decimals=1) temperature_attrs = {'a': np.array([1, 2, 3]), 'comment': 'hot', '_FillValue': np.nan} precipitation_data = (10 * np.random.rand(periods, 2, 2)).round(decimals=1) precipitation_attrs = {'x': True, 'comment': 'wet', '_FillValue': -1.0} ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), temperature_data, temperature_attrs), 'precipitation': (('time', 'lat', 'lon'), precipitation_data, precipitation_attrs) }, coords={ 'lon': np.array([12, 13]), 'lat': np.array([50, 51]), 'time': pd.date_range('2014-09-06', periods=periods) }, attrs={ 'history': 'a b c' }) return ds def scalar_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), [[[15.2]]]), 'precipitation': (('time', 'lat', 'lon'), [[[10.1]]]) }, coords={ 'lon': [12.], 'lat': [50.], 'time': [pd.to_datetime('2014-09-06')], }, attrs={ 'history': 'a b c' }) return ds def empty_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)), 'precipitation': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)) }, coords={ 'lon': np.ndarray(shape=(0,), dtype=np.float32), 'lat': np.ndarray(shape=(0,), dtype=np.float32), 'time': np.ndarray(shape=(0,), dtype=np.datetime64), }, attrs={ 'history': 'a b c' }) return ds def data_frame_op() -> pd.DataFrame: data = {'A': [1, 2, 3, np.nan, 4, 9, np.nan, np.nan, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, 5, 5, 9, 1, 2, 7, 6]} time = pd.date_range('2000-01-01', freq='MS', periods=12) return pd.DataFrame(data=data, index=time, dtype=float, columns=['A', 'B']) def scalar_data_frame_op() -> pd.DataFrame: data = {'A': [1.3], 'B': [5.9]} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def empty_data_frame_op() -> pd.DataFrame: data = {'A': [], 'B': []} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': ['A', 'B', 'C'], 'lat': [45, 46, 47.5], 'lon': [-120, -121.2, -122.9]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def scalar_geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': [2000 * 'A'], 'lat': [45], 'lon': [-120]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def int_op() -> int: return 394852 def str_op() -> str: return 'Hi!' from cate.core.op import OP_REGISTRY try: OP_REGISTRY.add_op(dataset_op) OP_REGISTRY.add_op(data_frame_op) OP_REGISTRY.add_op(geo_data_frame_op) OP_REGISTRY.add_op(scalar_dataset_op) OP_REGISTRY.add_op(scalar_data_frame_op) OP_REGISTRY.add_op(scalar_geo_data_frame_op) OP_REGISTRY.add_op(empty_dataset_op) OP_REGISTRY.add_op(empty_data_frame_op) OP_REGISTRY.add_op(int_op) OP_REGISTRY.add_op(str_op) workflow = Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!'))) workflow.add_step(OpStep(dataset_op, node_id='ds')) workflow.add_step(OpStep(data_frame_op, node_id='df')) workflow.add_step(OpStep(geo_data_frame_op, node_id='gdf')) workflow.add_step(OpStep(scalar_dataset_op, node_id='scalar_ds')) workflow.add_step(OpStep(scalar_data_frame_op, node_id='scalar_df')) workflow.add_step(OpStep(scalar_geo_data_frame_op, node_id='scalar_gdf')) workflow.add_step(OpStep(empty_dataset_op, node_id='empty_ds')) workflow.add_step(OpStep(empty_data_frame_op, node_id='empty_df')) workflow.add_step(OpStep(int_op, node_id='i')) workflow.add_step(OpStep(str_op, node_id='s')) ws = Workspace('/path', workflow) ws.execute_workflow() d_ws = ws.to_json_dict() # import pprint # pprint.pprint(d_ws) d_wf = d_ws.get('workflow') self.assertIsNotNone(d_wf) l_res = d_ws.get('resources') self.assertIsNotNone(l_res) self.assertEqual(len(l_res), 10) res_ds = l_res[0] self.assertEqual(res_ds.get('name'), 'ds') self.assertEqual(res_ds.get('dataType'), 'xarray.core.dataset.Dataset') self.assertEqual(res_ds.get('dimSizes'), dict(lat=2, lon=2, time=5)) self.assertEqual(res_ds.get('attributes'), {'history': 'a b c'}) res_ds_vars = res_ds.get('variables') self.assertIsNotNone(res_ds_vars) self.assertEqual(len(res_ds_vars), 2) res_ds_var_1 = res_ds_vars[0] self.assertEqual(res_ds_var_1.get('name'), 'precipitation') self.assertEqual(res_ds_var_1.get('dataType'), 'float64') self.assertEqual(res_ds_var_1.get('numDims'), 3) self.assertEqual(res_ds_var_1.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_1.get('chunkSizes'), None) self.assertEqual(res_ds_var_1.get('isYFlipped'), True) self.assertEqual(res_ds_var_1.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_1.get('attributes'), dict(x=True, comment='wet', _FillValue=-1.)) res_ds_var_2 = res_ds_vars[1] self.assertEqual(res_ds_var_2.get('name'), 'temperature') self.assertEqual(res_ds_var_2.get('dataType'), 'float64') self.assertEqual(res_ds_var_2.get('numDims'), 3) self.assertEqual(res_ds_var_2.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_2.get('chunkSizes'), None) self.assertEqual(res_ds_var_2.get('isYFlipped'), True) self.assertEqual(res_ds_var_2.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_2.get('attributes'), dict(a=[1, 2, 3], comment='hot', _FillValue=np.nan)) res_df = l_res[1] self.assertEqual(res_df.get('name'), 'df') self.assertEqual(res_df.get('dataType'), 'pandas.core.frame.DataFrame') self.assertEqual(res_df.get('attributes'), {'num_rows': 12, 'num_columns': 2}) res_df_vars = res_df.get('variables') self.assertIsNotNone(res_df_vars) self.assertEqual(len(res_df_vars), 2) res_df_var_1 = res_df_vars[0] self.assertEqual(res_df_var_1.get('name'), 'A') self.assertEqual(res_df_var_1.get('dataType'), 'float64') self.assertEqual(res_df_var_1.get('numDims'), 1) self.assertEqual(res_df_var_1.get('shape'), (12,)) self.assertEqual(res_df_var_1.get('isYFlipped'), None) self.assertEqual(res_df_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_1.get('attributes')) res_df_var_2 = res_df_vars[1] self.assertEqual(res_df_var_2.get('name'), 'B') self.assertEqual(res_df_var_2.get('dataType'), 'float64') self.assertEqual(res_df_var_2.get('numDims'), 1) self.assertEqual(res_df_var_2.get('shape'), (12,)) self.assertEqual(res_df_var_2.get('isYFlipped'), None) self.assertEqual(res_df_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_2.get('attributes')) res_gdf = l_res[2] self.assertEqual(res_gdf.get('name'), 'gdf') self.assertEqual(res_gdf.get('dataType'), 'geopandas.geodataframe.GeoDataFrame') self.assertEqual(res_gdf.get('attributes'), {'num_rows': 3, 'num_columns': 4, 'geom_type': 'Point'}) res_gdf_vars = res_gdf.get('variables') self.assertIsNotNone(res_gdf_vars) self.assertEqual(len(res_gdf_vars), 4) res_gdf_var_1 = res_gdf_vars[0] self.assertEqual(res_gdf_var_1.get('name'), 'name') self.assertEqual(res_gdf_var_1.get('dataType'), 'object') self.assertEqual(res_gdf_var_1.get('numDims'), 1) self.assertEqual(res_gdf_var_1.get('shape'), (3,)) self.assertEqual(res_gdf_var_1.get('isYFlipped'), None) self.assertEqual(res_gdf_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_1.get('attributes')) res_gdf_var_2 = res_gdf_vars[1] self.assertEqual(res_gdf_var_2.get('name'), 'lat') self.assertEqual(res_gdf_var_2.get('dataType'), 'float64') self.assertEqual(res_gdf_var_2.get('numDims'), 1) self.assertEqual(res_gdf_var_2.get('shape'), (3,)) self.assertEqual(res_gdf_var_2.get('isYFlipped'), None) self.assertEqual(res_gdf_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_2.get('attributes')) res_gdf_var_3 = res_gdf_vars[2] self.assertEqual(res_gdf_var_3.get('name'), 'lon') self.assertEqual(res_gdf_var_3.get('dataType'), 'float64') self.assertEqual(res_gdf_var_3.get('numDims'), 1) self.assertEqual(res_gdf_var_3.get('shape'), (3,)) self.assertEqual(res_gdf_var_3.get('isYFlipped'), None) self.assertEqual(res_gdf_var_3.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_3.get('attributes')) res_gdf_var_4 = res_gdf_vars[3] self.assertEqual(res_gdf_var_4.get('name'), 'geometry') self.assertEqual(res_gdf_var_4.get('dataType'), 'object') self.assertEqual(res_gdf_var_4.get('numDims'), 1) self.assertEqual(res_gdf_var_4.get('shape'), (3,)) self.assertEqual(res_gdf_var_4.get('isYFlipped'), None) self.assertEqual(res_gdf_var_4.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_4.get('attributes')) res_scalar_ds = l_res[3] res_scalar_ds_vars = res_scalar_ds.get('variables') self.assertIsNotNone(res_scalar_ds_vars) self.assertEqual(len(res_scalar_ds_vars), 2) scalar_values = {res_scalar_ds_vars[0].get('name'): res_scalar_ds_vars[0].get('value'), res_scalar_ds_vars[1].get('name'): res_scalar_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': 15.2, 'precipitation': 10.1}) res_scalar_df = l_res[4] res_scalar_df_vars = res_scalar_df.get('variables') self.assertIsNotNone(res_scalar_df_vars) self.assertEqual(len(res_scalar_df_vars), 2) scalar_values = {res_scalar_df_vars[0].get('name'): res_scalar_df_vars[0].get('value'), res_scalar_df_vars[1].get('name'): res_scalar_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': 1.3, 'B': 5.9}) res_scalar_gdf = l_res[5] res_scalar_gdf_vars = res_scalar_gdf.get('variables') self.assertIsNotNone(res_scalar_gdf_vars) self.assertEqual(len(res_scalar_gdf_vars), 4) scalar_values = {res_scalar_gdf_vars[0].get('name'): res_scalar_gdf_vars[0].get('value'), res_scalar_gdf_vars[1].get('name'): res_scalar_gdf_vars[1].get('value'), res_scalar_gdf_vars[2].get('name'): res_scalar_gdf_vars[2].get('value'), res_scalar_gdf_vars[3].get('name'): res_scalar_gdf_vars[3].get('value')} self.assertEqual(scalar_values, {'name': (1000 * 'A') + '...', 'lat': 45, 'lon': -120, 'geometry': 'POINT (-120 45)'}) res_empty_ds = l_res[6] res_empty_ds_vars = res_empty_ds.get('variables') self.assertIsNotNone(res_empty_ds_vars) self.assertEqual(len(res_empty_ds_vars), 2) scalar_values = {res_empty_ds_vars[0].get('name'): res_empty_ds_vars[0].get('value'), res_empty_ds_vars[1].get('name'): res_empty_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': None, 'precipitation': None}) res_empty_df = l_res[7] res_empty_df_vars = res_empty_df.get('variables') self.assertIsNotNone(res_empty_df_vars) self.assertEqual(len(res_empty_df_vars), 2) scalar_values = {res_empty_df_vars[0].get('name'): res_empty_df_vars[0].get('value'), res_empty_df_vars[1].get('name'): res_empty_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': None, 'B': None}) res_int = l_res[8] self.assertEqual(res_int.get('name'), 'i') self.assertEqual(res_int.get('dataType'), 'int') self.assertIsNone(res_int.get('attributes')) self.assertIsNone(res_int.get('variables')) res_str = l_res[9] self.assertEqual(res_str.get('name'), 's') self.assertEqual(res_str.get('dataType'), 'str') self.assertIsNone(res_str.get('attributes')) self.assertIsNone(res_str.get('variables')) finally: OP_REGISTRY.remove_op(dataset_op) OP_REGISTRY.remove_op(data_frame_op) OP_REGISTRY.remove_op(geo_data_frame_op) OP_REGISTRY.remove_op(scalar_dataset_op) OP_REGISTRY.remove_op(scalar_data_frame_op) OP_REGISTRY.remove_op(scalar_geo_data_frame_op) OP_REGISTRY.remove_op(empty_dataset_op) OP_REGISTRY.remove_op(empty_data_frame_op) OP_REGISTRY.remove_op(int_op) OP_REGISTRY.remove_op(str_op)
def test_from_json_dict(self): workflow_json_text = """ { "qualified_name": "my_workflow", "header": { "description": "My workflow is not too bad." }, "inputs": { "p": {"description": "Input 'p'"} }, "outputs": { "q": {"source": "op3.w", "description": "Output 'q'"} }, "steps": [ { "id": "op1", "op": "test.core.test_workflow.op1", "inputs": { "x": { "source": ".p" } } }, { "id": "op2", "op": "test.core.test_workflow.op2", "inputs": { "a": {"source": "op1"} } }, { "id": "op3", "persistent": true, "op": "test.core.test_workflow.op3", "inputs": { "u": {"source": "op1.y"}, "v": {"source": "op2.b"} } } ] } """ workflow_json_dict = json.loads(workflow_json_text) workflow = Workflow.from_json_dict(workflow_json_dict) self.assertIsNotNone(workflow) self.assertEqual(workflow.id, "my_workflow") self.assertEqual(workflow.op_meta_info.qualified_name, workflow.id) self.assertEqual(workflow.op_meta_info.header, dict(description="My workflow is not too bad.")) self.assertEqual(len(workflow.op_meta_info.inputs), 1) self.assertEqual(len(workflow.op_meta_info.outputs), 1) self.assertEqual(workflow.op_meta_info.inputs['p'], dict(description="Input 'p'")) self.assertEqual(workflow.op_meta_info.outputs['q'], dict(source="op3.w", description="Output 'q'")) self.assertEqual(len(workflow.inputs), 1) self.assertEqual(len(workflow.outputs), 1) self.assertIn('p', workflow.inputs) self.assertIn('q', workflow.outputs) self.assertEqual(len(workflow.steps), 3) step1 = workflow.steps[0] step2 = workflow.steps[1] step3 = workflow.steps[2] self.assertEqual(step1.id, 'op1') self.assertEqual(step2.id, 'op2') self.assertEqual(step3.id, 'op3') self.assertIs(step1.inputs.x.source, workflow.inputs.p) self.assertIs(step2.inputs.a.source, step1.outputs.y) self.assertIs(step3.inputs.u.source, step1.outputs.y) self.assertIs(step3.inputs.v.source, step2.outputs.b) self.assertIs(workflow.outputs.q.source, step3.outputs.w) self.assertEqual(step1.persistent, False) self.assertEqual(step2.persistent, False) self.assertEqual(step3.persistent, True)
def test_from_json_dict_empty(self): json_dict = json.loads('{"qualified_name": "hello"}') workflow = Workflow.from_json_dict(json_dict) self.assertEqual(workflow.id, 'hello')
def test_from_json_dict_invalids(self): json_dict = json.loads('{"header": {}}') with self.assertRaises(ValueError) as cm: Workflow.from_json_dict(json_dict) self.assertEqual(str(cm.exception), 'missing mandatory property "qualified_name" in Workflow-JSON')
def test_to_json_dict(self): step1 = OpStep(op1, node_id='op1') step2 = OpStep(op2, node_id='op2') step3 = OpStep(op3, node_id='op3') workflow = Workflow(OpMetaInfo('my_workflow', inputs=OrderedDict(p={}), outputs=OrderedDict(q={}))) workflow.add_steps(step1, step2, step3) step1.inputs.x.source = workflow.inputs.p step2.inputs.a.source = step1.outputs.y step3.inputs.u.source = step1.outputs.y step3.inputs.v.source = step2.outputs.b workflow.outputs.q.source = step3.outputs.w step2.persistent = True workflow_dict = workflow.to_json_dict() expected_json_text = """ { "schema_version": 1, "qualified_name": "my_workflow", "header": {}, "inputs": { "p": {} }, "outputs": { "q": { "source": "op3.w" } }, "steps": [ { "id": "op1", "op": "test.core.test_workflow.op1", "inputs": { "x": "my_workflow.p" } }, { "id": "op2", "persistent": true, "op": "test.core.test_workflow.op2", "inputs": { "a": "op1.y" } }, { "id": "op3", "op": "test.core.test_workflow.op3", "inputs": { "v": "op2.b", "u": "op1.y" } } ] } """ actual_json_text = json.dumps(workflow_dict, indent=4) expected_json_obj = json.loads(expected_json_text) actual_json_obj = json.loads(actual_json_text) self.assertEqual(actual_json_obj, expected_json_obj, msg='\nexpected:\n%s\n%s\nbut got:\n%s\n%s\n' % (120 * '-', expected_json_text, 120 * '-', actual_json_text))
def test_to_json_dict(self): def dataset_op() -> xr.Dataset: periods = 5 temperature_data = (15 + 8 * np.random.randn(periods, 2, 2)).round( decimals=1) temperature_attrs = { 'a': np.array([1, 2, 3]), 'comment': 'hot', '_FillValue': np.nan } precipitation_data = (10 * np.random.rand(periods, 2, 2)).round( decimals=1) precipitation_attrs = { 'x': True, 'comment': 'wet', '_FillValue': -1.0 } ds = xr.Dataset(data_vars={ 'temperature': (('time', 'lat', 'lon'), temperature_data, temperature_attrs), 'precipitation': (('time', 'lat', 'lon'), precipitation_data, precipitation_attrs) }, coords={ 'lon': np.array([12, 13]), 'lat': np.array([50, 51]), 'time': pd.date_range('2014-09-06', periods=periods) }, attrs={'history': 'a b c'}) return ds def data_frame_op() -> pd.DataFrame: data = { 'A': [1, 2, 3, np.nan, 4, 9, np.nan, np.nan, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, 5, 5, 9, 1, 2, 7, 6] } time = pd.date_range('2000-01-01', freq='MS', periods=12) return pd.DataFrame(data=data, index=time, dtype=float) def int_op() -> int: return 394852 def str_op() -> str: return 'Hi!' from cate.core.op import OP_REGISTRY try: OP_REGISTRY.add_op(dataset_op) OP_REGISTRY.add_op(data_frame_op) OP_REGISTRY.add_op(int_op) OP_REGISTRY.add_op(str_op) workflow = Workflow( OpMetaInfo('workspace_workflow', header=dict(description='Test!'))) workflow.add_step(OpStep(dataset_op, node_id='ds')) workflow.add_step(OpStep(data_frame_op, node_id='df')) workflow.add_step(OpStep(int_op, node_id='i')) workflow.add_step(OpStep(str_op, node_id='s')) ws = Workspace('/path', workflow) ws.execute_workflow() d_ws = ws.to_json_dict() # import pprint # pprint.pprint(d_ws) d_wf = d_ws.get('workflow') self.assertIsNotNone(d_wf) l_res = d_ws.get('resources') self.assertIsNotNone(l_res) self.assertEqual(len(l_res), 4) res_1 = l_res[0] self.assertEqual(res_1.get('name'), 'ds') self.assertEqual(res_1.get('dataType'), 'xarray.core.dataset.Dataset') self.assertEqual(res_1.get('dimSizes'), dict(lat=2, lon=2, time=5)) self.assertEqual(res_1.get('attributes'), {'history': 'a b c'}) res_1_vars = res_1.get('variables') self.assertIsNotNone(res_1_vars) self.assertEqual(len(res_1_vars), 2) var_1 = res_1_vars[0] self.assertEqual(var_1.get('name'), 'precipitation') self.assertEqual(var_1.get('dataType'), 'float64') self.assertEqual(var_1.get('numDims'), 3) self.assertEqual(var_1.get('shape'), (5, 2, 2)) self.assertEqual(var_1.get('chunkSizes'), None) self.assertEqual(var_1.get('isYFlipped'), True) self.assertEqual(var_1.get('isFeatureAttribute'), None) self.assertEqual(var_1.get('attributes'), dict(x=True, comment='wet', _FillValue=-1.)) var_2 = res_1_vars[1] self.assertEqual(var_2.get('name'), 'temperature') self.assertEqual(var_2.get('dataType'), 'float64') self.assertEqual(var_2.get('numDims'), 3) self.assertEqual(var_2.get('shape'), (5, 2, 2)) self.assertEqual(var_2.get('chunkSizes'), None) self.assertEqual(var_2.get('isYFlipped'), True) self.assertEqual(var_2.get('isFeatureAttribute'), None) self.assertEqual( var_2.get('attributes'), dict(a=[1, 2, 3], comment='hot', _FillValue=np.nan)) res_2 = l_res[1] self.assertEqual(res_2.get('name'), 'df') self.assertEqual(res_2.get('dataType'), 'pandas.core.frame.DataFrame') self.assertIsNone(res_2.get('attributes')) res_2_vars = res_2.get('variables') self.assertIsNotNone(res_2_vars) self.assertEqual(len(res_2_vars), 2) var_1 = res_2_vars[0] self.assertEqual(var_1.get('name'), 'A') self.assertEqual(var_1.get('dataType'), 'float64') self.assertEqual(var_1.get('numDims'), 1) self.assertEqual(var_1.get('shape'), (12, )) self.assertEqual(var_1.get('isYFlipped'), None) self.assertEqual(var_1.get('isFeatureAttribute'), None) self.assertIsNone(var_1.get('attributes')) var_2 = res_2_vars[1] self.assertEqual(var_2.get('name'), 'B') self.assertEqual(var_2.get('dataType'), 'float64') self.assertEqual(var_2.get('numDims'), 1) self.assertEqual(var_2.get('shape'), (12, )) self.assertEqual(var_2.get('isYFlipped'), None) self.assertEqual(var_2.get('isFeatureAttribute'), None) self.assertIsNone(var_2.get('attributes')) res_3 = l_res[2] self.assertEqual(res_3.get('name'), 'i') self.assertEqual(res_3.get('dataType'), 'int') self.assertIsNone(res_3.get('attributes')) self.assertIsNone(res_3.get('variables')) res_4 = l_res[3] self.assertEqual(res_4.get('name'), 's') self.assertEqual(res_4.get('dataType'), 'str') self.assertIsNone(res_4.get('attrs')) self.assertIsNone(res_4.get('variables')) finally: OP_REGISTRY.remove_op(dataset_op) OP_REGISTRY.remove_op(data_frame_op) OP_REGISTRY.remove_op(int_op) OP_REGISTRY.remove_op(str_op)
def test_sort_steps(self): step1, step2, step3, _ = self.create_example_3_steps_workflow() self.assertEqual(Workflow.sort_steps([]), []) self.assertEqual(Workflow.sort_steps([step1]), [step1]) self.assertEqual(Workflow.sort_steps([step1, step2]), [step1, step2]) self.assertEqual(Workflow.sort_steps([step1, step3]), [step1, step3]) self.assertEqual(Workflow.sort_steps([step2, step1]), [step1, step2]) self.assertEqual(Workflow.sort_steps([step3, step1]), [step1, step3]) self.assertEqual(Workflow.sort_steps([step2, step3]), [step2, step3]) self.assertEqual(Workflow.sort_steps([step3, step2]), [step2, step3]) self.assertEqual(Workflow.sort_steps([step1, step2, step3]), [step1, step2, step3]) self.assertEqual(Workflow.sort_steps([step2, step1, step3]), [step1, step2, step3]) self.assertEqual(Workflow.sort_steps([step3, step2, step1]), [step1, step2, step3]) self.assertEqual(Workflow.sort_steps([step1, step3, step2]), [step1, step2, step3])
def from_json_dict(cls, json_dict): base_dir = json_dict.get('base_dir', None) workflow_json = json_dict.get('workflow', {}) is_modified = json_dict.get('is_modified', False) workflow = Workflow.from_json_dict(workflow_json) return Workspace(base_dir, workflow, is_modified=is_modified)
def test_from_json_dict(self): workflow_json_text = """ { "qualified_name": "my_workflow", "header": { "description": "My workflow is not too bad." }, "inputs": { "p": {"description": "Input 'p'"} }, "outputs": { "q": {"source": "op3.w", "description": "Output 'q'"} }, "steps": [ { "id": "op1", "op": "tests.core.test_workflow.op1", "inputs": { "x": { "source": ".p" } } }, { "id": "op2", "op": "tests.core.test_workflow.op2", "inputs": { "a": {"source": "op1"} } }, { "id": "op3", "persistent": true, "op": "tests.core.test_workflow.op3", "inputs": { "u": {"source": "op1.y"}, "v": {"source": "op2.b"} } } ] } """ workflow_json_dict = json.loads(workflow_json_text) workflow = Workflow.from_json_dict(workflow_json_dict) self.assertIsNotNone(workflow) self.assertEqual(workflow.id, "my_workflow") self.assertEqual(workflow.op_meta_info.qualified_name, workflow.id) self.assertEqual(workflow.op_meta_info.header, dict(description="My workflow is not too bad.")) self.assertEqual(len(workflow.op_meta_info.inputs), 1) self.assertEqual(len(workflow.op_meta_info.outputs), 1) self.assertEqual(workflow.op_meta_info.inputs['p'], dict(description="Input 'p'")) self.assertEqual(workflow.op_meta_info.outputs['q'], dict(source="op3.w", description="Output 'q'")) self.assertEqual(len(workflow.inputs), 1) self.assertEqual(len(workflow.outputs), 1) self.assertIn('p', workflow.inputs) self.assertIn('q', workflow.outputs) self.assertEqual(len(workflow.steps), 3) step1 = workflow.steps[0] step2 = workflow.steps[1] step3 = workflow.steps[2] self.assertEqual(step1.id, 'op1') self.assertEqual(step2.id, 'op2') self.assertEqual(step3.id, 'op3') self.assertIs(step1.inputs.x.source, workflow.inputs.p) self.assertIs(step2.inputs.a.source, step1.outputs.y) self.assertIs(step3.inputs.u.source, step1.outputs.y) self.assertIs(step3.inputs.v.source, step2.outputs.b) self.assertIs(workflow.outputs.q.source, step3.outputs.w) self.assertEqual(step1.persistent, False) self.assertEqual(step2.persistent, False) self.assertEqual(step3.persistent, True)
def test_to_json_dict(self): step1 = OpStep(op1, node_id='op1') step2 = OpStep(op2, node_id='op2') step3 = OpStep(op3, node_id='op3') workflow = Workflow( OpMetaInfo('my_workflow', inputs=OrderedDict(p={}), outputs=OrderedDict(q={}))) workflow.add_steps(step1, step2, step3) step1.inputs.x.source = workflow.inputs.p step2.inputs.a.source = step1.outputs.y step3.inputs.u.source = step1.outputs.y step3.inputs.v.source = step2.outputs.b workflow.outputs.q.source = step3.outputs.w step2.persistent = True workflow_dict = workflow.to_json_dict() expected_json_text = """ { "schema_version": 1, "qualified_name": "my_workflow", "header": {}, "inputs": { "p": {} }, "outputs": { "q": { "source": "op3.w" } }, "steps": [ { "id": "op1", "op": "tests.core.test_workflow.op1", "inputs": { "x": "my_workflow.p" } }, { "id": "op2", "persistent": true, "op": "tests.core.test_workflow.op2", "inputs": { "a": "op1.y" } }, { "id": "op3", "op": "tests.core.test_workflow.op3", "inputs": { "v": "op2.b", "u": "op1.y" } } ] } """ actual_json_text = json.dumps(workflow_dict, indent=4) expected_json_obj = json.loads(expected_json_text) actual_json_obj = json.loads(actual_json_text) self.assertEqual( actual_json_obj, expected_json_obj, msg='\nexpected:\n%s\n%s\nbut got:\n%s\n%s\n' % (120 * '-', expected_json_text, 120 * '-', actual_json_text))