def test_init_operation_and_name_are_equivalent(self): step3 = OpStep(op3) self.assertIsNotNone(step3.op) self.assertIsNotNone(step3.op_meta_info) node31 = OpStep(object_to_qualified_name(op3)) self.assertIs(node31.op, step3.op) self.assertIs(node31.op_meta_info, step3.op_meta_info)
def test_to_json_dict(self): step3 = OpStep(op3, node_id='op3') step3.inputs.u.value = 2.8 step3.inputs.c.value = 1 step3_dict = step3.to_json_dict() expected_json_text = """ { "id": "op3", "op": "test.core.test_workflow.op3", "inputs": { "u": {"value": 2.8}, "c": {"value": 1} } } """ actual_json_text = json.dumps(step3_dict) expected_json_obj = json.loads(expected_json_text) actual_json_obj = json.loads(actual_json_text) self.assertEqual(actual_json_obj, expected_json_obj, msg='\n%sexpected:\n%s\n%s\nbut got:\n%s\n' % (120 * '-', expected_json_text, 120 * '-', actual_json_text)) # Invoke OpStep, and assert that output values are NOT serialized to JSON step3.inputs.u.value = 2.8 step3.inputs.v.value = 1.2 step3.invoke() step3_dict = step3.to_json_dict() expected_json_text = """ { "id": "op3", "op": "test.core.test_workflow.op3", "inputs": { "v": {"value": 1.2}, "u": {"value": 2.8}, "c": {"value": 1} } } """ actual_json_text = json.dumps(step3_dict) expected_json_obj = json.loads(expected_json_text) actual_json_obj = json.loads(actual_json_text) self.assertEqual(actual_json_obj, expected_json_obj, msg='\n%sexpected:\n%s\n%s\nbut got:\n%s\n' % (120 * '-', expected_json_text, 120 * '-', actual_json_text))
def test_connect_source(self): step1 = OpStep(op1) step2 = OpStep(op2) step3 = OpStep(op3) step2.inputs.a.source = step1.outputs.y step3.inputs.u.source = step1.outputs.y step3.inputs.v.source = step2.outputs.b self.assertConnectionsAreOk(step1, step2, step3) with self.assertRaises(AttributeError) as cm: step1.inputs.a.source = step3.inputs.u self.assertEqual(str(cm.exception), "attribute 'a' not found")
def create_example_3_steps_workflow(cls): step1 = OpStep(op1, node_id='op1') step2 = OpStep(op2, node_id='op2') step3 = OpStep(op3, node_id='op3') workflow = Workflow(OpMetaInfo('myWorkflow', inputs=OrderedDict(p={}), outputs=OrderedDict(q={}))) workflow.add_steps(step1, step2, step3) step1.inputs.x.source = workflow.inputs.p step2.inputs.a.source = step1.outputs.y step3.inputs.u.source = step1.outputs.y step3.inputs.v.source = step2.outputs.b workflow.outputs.q.source = step3.outputs.w return step1, step2, step3, workflow
def test_call(self): step1 = OpStep(op1) step1.inputs.x.value = 3 output_value = step1(x=3) self.assertEqual(output_value, dict(y=3 + 1)) step2 = OpStep(op2) output_value = step2(a=3) self.assertEqual(output_value, dict(b=2 * 3)) step3 = OpStep(op3) output_value = step3(u=4, v=5) self.assertEqual(output_value, dict(w=2 * 4 + 3 * 5))
def test_to_json_dict(self): step1 = OpStep(Op1, node_id='myop1') step2 = OpStep(Op2, node_id='myop2') self.assertEqual(step2.input.a.to_json_dict(), dict()) step2.input.a.value = 982 self.assertEqual(step2.input.a.to_json_dict(), dict(value=982)) step2.input.a.source = step1.output.y self.assertEqual(step2.input.a.to_json_dict(), dict(source='myop1.y')) step2.input.a.source = None self.assertEqual(step2.input.a.to_json_dict(), dict())
def test_repr_svg(self): step1 = OpStep(Op1, node_id='op1') step2 = OpStep(Op2, node_id='op2') step3 = OpStep(Op3, node_id='op3') workflow = Workflow(OpMetaInfo('my_workflow', input_dict=OrderedDict(p={}), output_dict=OrderedDict(q={}))) workflow.add_steps(step1, step2, step3) step1.input.x.source = workflow.input.p step2.input.a.source = step1.output.y step3.input.u.source = step1.output.y step3.input.v.source = step2.output.b workflow.output.q.source = step3.output.w workflow_json = workflow._repr_svg_() # print('\n\n%s\n\n' % workflow_json) self.assertIsNotNone(workflow_json)
def test_init(self): step = OpStep(op3) self.assertRegex(step.id, '^opstep_[0-9a-f]+$') self.assertTrue(len(step.inputs), 2) self.assertTrue(len(step.outputs), 1) self.assertTrue(hasattr(step.inputs, 'u')) self.assertIs(step.inputs.u.node, step) self.assertEqual(step.inputs.u.name, 'u') self.assertTrue(hasattr(step.inputs, 'v')) self.assertIs(step.inputs.v.node, step) self.assertEqual(step.inputs.v.name, 'v') self.assertTrue(hasattr(step.outputs, 'w')) self.assertIs(step.outputs.w.node, step) self.assertEqual(step.outputs.w.name, 'w') self.assertEqual( str(step), step.id + ' = tests.core.test_workflow.op3(u=None, v=None, c=0) -> (w) [OpStep]' ) self.assertEqual( repr(step), "OpStep('tests.core.test_workflow.op3', node_id='%s')" % step.id)
def test_from_json_dict_source(self): json_text = """ { "id": "op3", "op": "test.core.test_workflow.op3", "inputs": { "u": {"source": "stat_op.stats"}, "v": {"source": ".latitude"} } } """ json_dict = json.loads(json_text) step3 = OpStep.from_json_dict(json_dict) self.assertIsInstance(step3, OpStep) self.assertEqual(step3.id, "op3") self.assertIsInstance(step3.op, Operation) self.assertIn('u', step3.inputs) self.assertIn('v', step3.inputs) self.assertIn('w', step3.outputs) self.assertEqual(step3.inputs.u._source_ref, ('stat_op', 'stats')) self.assertEqual(step3.inputs.u.source, None) self.assertEqual(step3.inputs.v._source_ref, (None, 'latitude')) self.assertEqual(step3.inputs.v.source, None)
def test_from_json_dict_value(self): json_text = """ { "id": "op3", "op": "test.core.test_workflow.op3", "inputs": { "u": {"value": 647}, "v": {"value": 2.9} } } """ json_dict = json.loads(json_text) step3 = OpStep.from_json_dict(json_dict) self.assertIsInstance(step3, OpStep) self.assertEqual(step3.id, "op3") self.assertIsInstance(step3.op, Operation) self.assertIn('u', step3.inputs) self.assertIn('v', step3.inputs) self.assertIn('w', step3.outputs) self.assertEqual(step3.inputs.u.value, 647) self.assertEqual(step3.inputs.v.value, 2.9)
def test_init_failures(self): with self.assertRaises(ValueError): # "ValueError: operation with name 'test_node.NodeTest' not registered" OpStep(OpStepTest) with self.assertRaises(ValueError): # "ValueError: operation with name 'X' not registered" OpStep('X') with self.assertRaises(ValueError): # "ValueError: operation with name 'X.Y' not registered" OpStep('X.Y') with self.assertRaises(ValueError): # "ValueError: operation must be given" OpStep(None)
def test_resolve_source_ref(self): step1 = OpStep(op1, node_id='myop1') step2 = OpStep(op2, node_id='myop2') step2.inputs.a._source_ref = ('myop1', 'y') g = Workflow(OpMetaInfo('myWorkflow', has_monitor=True, inputs=OrderedDict(x={}), outputs=OrderedDict(b={}))) g.add_steps(step1, step2) step2.inputs.a.update_source() self.assertEqual(step2.inputs.a._source_ref, ('myop1', 'y')) self.assertIs(step2.inputs.a.source, step1.outputs.y) self.assertIs(step2.inputs.a.value, None)
def test_from_json_dict_value(self): json_text = """ { "id": "op3", "op": "tests.core.test_workflow.op3", "inputs": { "u": {"value": 647}, "v": {"value": 2.9} } } """ json_dict = json.loads(json_text) step3 = OpStep.from_json_dict(json_dict) self.assertIsInstance(step3, OpStep) self.assertEqual(step3.id, "op3") self.assertIsInstance(step3.op, Operation) self.assertIn('u', step3.inputs) self.assertIn('v', step3.inputs) self.assertIn('w', step3.outputs) self.assertEqual(step3.inputs.u.value, 647) self.assertEqual(step3.inputs.v.value, 2.9)
def test_from_json_dict_source(self): json_text = """ { "id": "op3", "op": "tests.core.test_workflow.op3", "inputs": { "u": {"source": "stat_op.stats"}, "v": {"source": ".latitude"} } } """ json_dict = json.loads(json_text) step3 = OpStep.from_json_dict(json_dict) self.assertIsInstance(step3, OpStep) self.assertEqual(step3.id, "op3") self.assertIsInstance(step3.op, Operation) self.assertIn('u', step3.inputs) self.assertIn('v', step3.inputs) self.assertIn('w', step3.outputs) self.assertEqual(step3.inputs.u._source_ref, ('stat_op', 'stats')) self.assertEqual(step3.inputs.u.source, None) self.assertEqual(step3.inputs.v._source_ref, (None, 'latitude')) self.assertEqual(step3.inputs.v.source, None)
def test_source_and_value(self): step1 = OpStep(op1, node_id='op1') step2 = OpStep(op2, node_id='op2') x_port = NodePort(step1, 'x') b_port = NodePort(step2, 'b') self.assertEqual(x_port.is_source, False) self.assertEqual(x_port.source, None) self.assertEqual(x_port.source_ref, None) self.assertEqual(x_port.is_value, False) self.assertEqual(x_port.has_value, False) self.assertEqual(x_port.value, None) x_port.value = 11 self.assertEqual(x_port.is_source, False) self.assertEqual(x_port.source, None) self.assertEqual(x_port.source_ref, None) self.assertEqual(x_port.is_value, True) self.assertEqual(x_port.has_value, True) self.assertEqual(x_port.value, 11) x_port.source = b_port self.assertEqual(x_port.is_source, True) self.assertEqual(x_port.source, b_port) self.assertEqual(x_port.source_ref, SourceRef(node_id='op2', port_name='b')) self.assertEqual(x_port.is_value, False) self.assertEqual(x_port.has_value, False) self.assertEqual(x_port.value, None) b_port.value = 67382 self.assertEqual(x_port.is_source, True) self.assertEqual(x_port.source, b_port) self.assertEqual(x_port.source_ref, SourceRef(node_id='op2', port_name='b')) self.assertEqual(x_port.is_value, False) self.assertEqual(x_port.has_value, True) self.assertEqual(x_port.value, 67382) with self.assertRaises(ValueError) as cm: x_port.source = x_port self.assertEqual(str(cm.exception), "cannot connect 'op1.x' with itself")
def test_init(self): step = OpStep(Op1, node_id='myop') source = NodePort(step, 'x') self.assertIs(source.node, step) self.assertEqual(source.node_id, 'myop') self.assertEqual(source.name, 'x') self.assertEqual(source.source, None) self.assertEqual(source.value, None) self.assertEqual(str(source), 'myop.x') self.assertEqual(repr(source), "NodePort('myop', 'x')")
def test_disconnect_source(self): step1 = OpStep(op1) step2 = OpStep(op2) step3 = OpStep(op3) step2.inputs.a.source = step1.outputs.y step3.inputs.u.source = step1.outputs.y step3.inputs.v.source = step2.outputs.b self.assertConnectionsAreOk(step1, step2, step3) step3.inputs.v.source = None self.assertIs(step2.inputs.a.source, step1.outputs.y) self.assertIs(step3.inputs.u.source, step1.outputs.y) step2.inputs.a.source = None self.assertIs(step3.inputs.u.source, step1.outputs.y) self.assertIs(step3.inputs.u.source, step1.outputs.y) step3.inputs.u.source = None
def test_to_json_dict(self): step3 = OpStep(Op3, node_id='op3') step3.input.u.value = 2.8 step3_dict = step3.to_json_dict() expected_json_text = """ { "id": "op3", "op": "test.core.test_workflow.Op3", "input": { "v": {}, "u": {"value": 2.8} }, "output": { "w": {} } } """ actual_json_text = json.dumps(step3_dict) expected_json_obj = json.loads(expected_json_text) actual_json_obj = json.loads(actual_json_text) self.assertEqual(actual_json_obj, expected_json_obj, msg='\n%sexpected:\n%s\n%s\nbut got:\n%s\n' % (120 * '-', expected_json_text, 120 * '-', actual_json_text)) # Invoke OpStep, and assert that output values are NOT serialized to JSON step3.input.u.value = 2.8 step3.input.v.value = 1.2 step3.invoke() step3_dict = step3.to_json_dict() expected_json_text = """ { "id": "op3", "op": "test.core.test_workflow.Op3", "input": { "v": {"value": 1.2}, "u": {"value": 2.8} }, "output": { "w": {} } } """ actual_json_text = json.dumps(step3_dict) expected_json_obj = json.loads(expected_json_text) actual_json_obj = json.loads(actual_json_text) self.assertEqual(actual_json_obj, expected_json_obj, msg='\n%sexpected:\n%s\n%s\nbut got:\n%s\n' % (120 * '-', expected_json_text, 120 * '-', actual_json_text))
def test_invoke(self): step1 = OpStep(op1) step1.inputs.x.value = 3 step1.invoke() output_value = step1.outputs.y.value self.assertEqual(output_value, 3 + 1) step2 = OpStep(op2) step2.inputs.a.value = 3 step2.invoke() output_value = step2.outputs.b.value self.assertEqual(output_value, 2 * 3) step3 = OpStep(op3) step3.inputs.u.value = 4 step3.inputs.v.value = 5 step3.invoke() output_value = step3.outputs.w.value self.assertEqual(output_value, 2 * 4 + 3 * 5)
def test_init(self): step = OpStep(op1, node_id='myop') x_port = NodePort(step, 'x') self.assertIs(x_port.node, step) self.assertEqual(x_port.node_id, 'myop') self.assertEqual(x_port.name, 'x') self.assertEqual(x_port.is_source, False) self.assertEqual(x_port.source, None) self.assertEqual(x_port.source_ref, None) self.assertEqual(x_port.is_value, False) self.assertEqual(x_port.has_value, False) self.assertEqual(x_port.value, None) self.assertEqual(str(x_port), 'myop.x') self.assertEqual(repr(x_port), "NodePort('myop', 'x')")
def test_init(self): step = OpStep(Op3) self.assertRegex(step.id, '^op_step_[0-9a-f]+$') self.assertTrue(len(step.input), 2) self.assertTrue(len(step.output), 1) self.assertTrue(hasattr(step.input, 'u')) self.assertIs(step.input.u.node, step) self.assertEqual(step.input.u.name, 'u') self.assertTrue(hasattr(step.input, 'v')) self.assertIs(step.input.v.node, step) self.assertEqual(step.input.v.name, 'v') self.assertTrue(hasattr(step.output, 'w')) self.assertIs(step.output.w.node, step) self.assertEqual(step.output.w.name, 'w') self.assertEqual(str(step), step.id + ' = test.core.test_workflow.Op3(u=None, v=None) -> (w) [OpStep]') self.assertEqual(repr(step), "OpStep(test.core.test_workflow.Op3, node_id='%s')" % step.id)
def test_invoke_with_context_inputs(self): def some_op(context, workflow, workflow_id, step, step_id, invalid): return dict(context=context, workflow=workflow, workflow_id=workflow_id, step=step, step_id=step_id, invalid=invalid) from cate.core.op import OP_REGISTRY try: op_reg = OP_REGISTRY.add_op(some_op) op_reg.op_meta_info.inputs['context']['context'] = True op_reg.op_meta_info.inputs['workflow']['context'] = 'workflow' op_reg.op_meta_info.inputs['workflow_id'][ 'context'] = 'workflow.id' op_reg.op_meta_info.inputs['step']['context'] = 'step' op_reg.op_meta_info.inputs['step_id']['context'] = 'step.id' op_reg.op_meta_info.inputs['invalid']['context'] = 'gnarz[8]' step = OpStep(op_reg, node_id='test_step') workflow = Workflow(OpMetaInfo('test_workflow')) workflow.add_step(step) workflow.invoke() output = step.outputs['return'].value self.assertIsInstance(output, dict) self.assertIsInstance(output.get('context'), dict) self.assertIs(output.get('workflow'), workflow) self.assertEqual(output.get('workflow_id'), 'test_workflow') self.assertIs(output.get('step'), step) self.assertEqual(output.get('step_id'), 'test_step') self.assertEqual(output.get('invalid', 1), None) finally: OP_REGISTRY.remove_op(some_op)
def test_to_json_dict(self): def dataset_op() -> xr.Dataset: periods = 5 temperature_data = (15 + 8 * np.random.randn(periods, 2, 2)).round(decimals=1) temperature_attrs = {'a': np.array([1, 2, 3]), 'comment': 'hot', '_FillValue': np.nan} precipitation_data = (10 * np.random.rand(periods, 2, 2)).round(decimals=1) precipitation_attrs = {'x': True, 'comment': 'wet', '_FillValue': -1.0} ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), temperature_data, temperature_attrs), 'precipitation': (('time', 'lat', 'lon'), precipitation_data, precipitation_attrs) }, coords={ 'lon': np.array([12, 13]), 'lat': np.array([50, 51]), 'time': pd.date_range('2014-09-06', periods=periods) }, attrs={ 'history': 'a b c' }) return ds def scalar_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), [[[15.2]]]), 'precipitation': (('time', 'lat', 'lon'), [[[10.1]]]) }, coords={ 'lon': [12.], 'lat': [50.], 'time': [pd.to_datetime('2014-09-06')], }, attrs={ 'history': 'a b c' }) return ds def empty_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)), 'precipitation': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)) }, coords={ 'lon': np.ndarray(shape=(0,), dtype=np.float32), 'lat': np.ndarray(shape=(0,), dtype=np.float32), 'time': np.ndarray(shape=(0,), dtype=np.datetime64), }, attrs={ 'history': 'a b c' }) return ds def data_frame_op() -> pd.DataFrame: data = {'A': [1, 2, 3, np.nan, 4, 9, np.nan, np.nan, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, 5, 5, 9, 1, 2, 7, 6]} time = pd.date_range('2000-01-01', freq='MS', periods=12) return pd.DataFrame(data=data, index=time, dtype=float, columns=['A', 'B']) def scalar_data_frame_op() -> pd.DataFrame: data = {'A': [1.3], 'B': [5.9]} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def empty_data_frame_op() -> pd.DataFrame: data = {'A': [], 'B': []} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': ['A', 'B', 'C'], 'lat': [45, 46, 47.5], 'lon': [-120, -121.2, -122.9]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def scalar_geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': [2000 * 'A'], 'lat': [45], 'lon': [-120]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def int_op() -> int: return 394852 def str_op() -> str: return 'Hi!' from cate.core.op import OP_REGISTRY try: OP_REGISTRY.add_op(dataset_op) OP_REGISTRY.add_op(data_frame_op) OP_REGISTRY.add_op(geo_data_frame_op) OP_REGISTRY.add_op(scalar_dataset_op) OP_REGISTRY.add_op(scalar_data_frame_op) OP_REGISTRY.add_op(scalar_geo_data_frame_op) OP_REGISTRY.add_op(empty_dataset_op) OP_REGISTRY.add_op(empty_data_frame_op) OP_REGISTRY.add_op(int_op) OP_REGISTRY.add_op(str_op) workflow = Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!'))) workflow.add_step(OpStep(dataset_op, node_id='ds')) workflow.add_step(OpStep(data_frame_op, node_id='df')) workflow.add_step(OpStep(geo_data_frame_op, node_id='gdf')) workflow.add_step(OpStep(scalar_dataset_op, node_id='scalar_ds')) workflow.add_step(OpStep(scalar_data_frame_op, node_id='scalar_df')) workflow.add_step(OpStep(scalar_geo_data_frame_op, node_id='scalar_gdf')) workflow.add_step(OpStep(empty_dataset_op, node_id='empty_ds')) workflow.add_step(OpStep(empty_data_frame_op, node_id='empty_df')) workflow.add_step(OpStep(int_op, node_id='i')) workflow.add_step(OpStep(str_op, node_id='s')) ws = Workspace('/path', workflow) ws.execute_workflow() d_ws = ws.to_json_dict() # import pprint # pprint.pprint(d_ws) d_wf = d_ws.get('workflow') self.assertIsNotNone(d_wf) l_res = d_ws.get('resources') self.assertIsNotNone(l_res) self.assertEqual(len(l_res), 10) res_ds = l_res[0] self.assertEqual(res_ds.get('name'), 'ds') self.assertEqual(res_ds.get('dataType'), 'xarray.core.dataset.Dataset') self.assertEqual(res_ds.get('dimSizes'), dict(lat=2, lon=2, time=5)) self.assertEqual(res_ds.get('attributes'), {'history': 'a b c'}) res_ds_vars = res_ds.get('variables') self.assertIsNotNone(res_ds_vars) self.assertEqual(len(res_ds_vars), 2) res_ds_var_1 = res_ds_vars[0] self.assertEqual(res_ds_var_1.get('name'), 'precipitation') self.assertEqual(res_ds_var_1.get('dataType'), 'float64') self.assertEqual(res_ds_var_1.get('numDims'), 3) self.assertEqual(res_ds_var_1.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_1.get('chunkSizes'), None) self.assertEqual(res_ds_var_1.get('isYFlipped'), True) self.assertEqual(res_ds_var_1.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_1.get('attributes'), dict(x=True, comment='wet', _FillValue=-1.)) res_ds_var_2 = res_ds_vars[1] self.assertEqual(res_ds_var_2.get('name'), 'temperature') self.assertEqual(res_ds_var_2.get('dataType'), 'float64') self.assertEqual(res_ds_var_2.get('numDims'), 3) self.assertEqual(res_ds_var_2.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_2.get('chunkSizes'), None) self.assertEqual(res_ds_var_2.get('isYFlipped'), True) self.assertEqual(res_ds_var_2.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_2.get('attributes'), dict(a=[1, 2, 3], comment='hot', _FillValue=np.nan)) res_df = l_res[1] self.assertEqual(res_df.get('name'), 'df') self.assertEqual(res_df.get('dataType'), 'pandas.core.frame.DataFrame') self.assertEqual(res_df.get('attributes'), {'num_rows': 12, 'num_columns': 2}) res_df_vars = res_df.get('variables') self.assertIsNotNone(res_df_vars) self.assertEqual(len(res_df_vars), 2) res_df_var_1 = res_df_vars[0] self.assertEqual(res_df_var_1.get('name'), 'A') self.assertEqual(res_df_var_1.get('dataType'), 'float64') self.assertEqual(res_df_var_1.get('numDims'), 1) self.assertEqual(res_df_var_1.get('shape'), (12,)) self.assertEqual(res_df_var_1.get('isYFlipped'), None) self.assertEqual(res_df_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_1.get('attributes')) res_df_var_2 = res_df_vars[1] self.assertEqual(res_df_var_2.get('name'), 'B') self.assertEqual(res_df_var_2.get('dataType'), 'float64') self.assertEqual(res_df_var_2.get('numDims'), 1) self.assertEqual(res_df_var_2.get('shape'), (12,)) self.assertEqual(res_df_var_2.get('isYFlipped'), None) self.assertEqual(res_df_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_2.get('attributes')) res_gdf = l_res[2] self.assertEqual(res_gdf.get('name'), 'gdf') self.assertEqual(res_gdf.get('dataType'), 'geopandas.geodataframe.GeoDataFrame') self.assertEqual(res_gdf.get('attributes'), {'num_rows': 3, 'num_columns': 4, 'geom_type': 'Point'}) res_gdf_vars = res_gdf.get('variables') self.assertIsNotNone(res_gdf_vars) self.assertEqual(len(res_gdf_vars), 4) res_gdf_var_1 = res_gdf_vars[0] self.assertEqual(res_gdf_var_1.get('name'), 'name') self.assertEqual(res_gdf_var_1.get('dataType'), 'object') self.assertEqual(res_gdf_var_1.get('numDims'), 1) self.assertEqual(res_gdf_var_1.get('shape'), (3,)) self.assertEqual(res_gdf_var_1.get('isYFlipped'), None) self.assertEqual(res_gdf_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_1.get('attributes')) res_gdf_var_2 = res_gdf_vars[1] self.assertEqual(res_gdf_var_2.get('name'), 'lat') self.assertEqual(res_gdf_var_2.get('dataType'), 'float64') self.assertEqual(res_gdf_var_2.get('numDims'), 1) self.assertEqual(res_gdf_var_2.get('shape'), (3,)) self.assertEqual(res_gdf_var_2.get('isYFlipped'), None) self.assertEqual(res_gdf_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_2.get('attributes')) res_gdf_var_3 = res_gdf_vars[2] self.assertEqual(res_gdf_var_3.get('name'), 'lon') self.assertEqual(res_gdf_var_3.get('dataType'), 'float64') self.assertEqual(res_gdf_var_3.get('numDims'), 1) self.assertEqual(res_gdf_var_3.get('shape'), (3,)) self.assertEqual(res_gdf_var_3.get('isYFlipped'), None) self.assertEqual(res_gdf_var_3.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_3.get('attributes')) res_gdf_var_4 = res_gdf_vars[3] self.assertEqual(res_gdf_var_4.get('name'), 'geometry') self.assertEqual(res_gdf_var_4.get('dataType'), 'geometry') self.assertEqual(res_gdf_var_4.get('numDims'), 1) self.assertEqual(res_gdf_var_4.get('shape'), (3,)) self.assertEqual(res_gdf_var_4.get('isYFlipped'), None) self.assertEqual(res_gdf_var_4.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_4.get('attributes')) res_scalar_ds = l_res[3] res_scalar_ds_vars = res_scalar_ds.get('variables') self.assertIsNotNone(res_scalar_ds_vars) self.assertEqual(len(res_scalar_ds_vars), 2) scalar_values = {res_scalar_ds_vars[0].get('name'): res_scalar_ds_vars[0].get('value'), res_scalar_ds_vars[1].get('name'): res_scalar_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': 15.2, 'precipitation': 10.1}) res_scalar_df = l_res[4] res_scalar_df_vars = res_scalar_df.get('variables') self.assertIsNotNone(res_scalar_df_vars) self.assertEqual(len(res_scalar_df_vars), 2) scalar_values = {res_scalar_df_vars[0].get('name'): res_scalar_df_vars[0].get('value'), res_scalar_df_vars[1].get('name'): res_scalar_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': 1.3, 'B': 5.9}) res_scalar_gdf = l_res[5] res_scalar_gdf_vars = res_scalar_gdf.get('variables') self.assertIsNotNone(res_scalar_gdf_vars) self.assertEqual(len(res_scalar_gdf_vars), 4) scalar_values = {res_scalar_gdf_vars[0].get('name'): res_scalar_gdf_vars[0].get('value'), res_scalar_gdf_vars[1].get('name'): res_scalar_gdf_vars[1].get('value'), res_scalar_gdf_vars[2].get('name'): res_scalar_gdf_vars[2].get('value'), res_scalar_gdf_vars[3].get('name'): res_scalar_gdf_vars[3].get('value')} self.assertEqual(scalar_values, {'name': (1000 * 'A') + '...', 'lat': 45, 'lon': -120, 'geometry': 'POINT (-120 45)'}) res_empty_ds = l_res[6] res_empty_ds_vars = res_empty_ds.get('variables') self.assertIsNotNone(res_empty_ds_vars) self.assertEqual(len(res_empty_ds_vars), 2) scalar_values = {res_empty_ds_vars[0].get('name'): res_empty_ds_vars[0].get('value'), res_empty_ds_vars[1].get('name'): res_empty_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': None, 'precipitation': None}) res_empty_df = l_res[7] res_empty_df_vars = res_empty_df.get('variables') self.assertIsNotNone(res_empty_df_vars) self.assertEqual(len(res_empty_df_vars), 2) scalar_values = {res_empty_df_vars[0].get('name'): res_empty_df_vars[0].get('value'), res_empty_df_vars[1].get('name'): res_empty_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': None, 'B': None}) res_int = l_res[8] self.assertEqual(res_int.get('name'), 'i') self.assertEqual(res_int.get('dataType'), 'int') self.assertIsNone(res_int.get('attributes')) self.assertIsNone(res_int.get('variables')) res_str = l_res[9] self.assertEqual(res_str.get('name'), 's') self.assertEqual(res_str.get('dataType'), 'str') self.assertIsNone(res_str.get('attributes')) self.assertIsNone(res_str.get('variables')) finally: OP_REGISTRY.remove_op(dataset_op) OP_REGISTRY.remove_op(data_frame_op) OP_REGISTRY.remove_op(geo_data_frame_op) OP_REGISTRY.remove_op(scalar_dataset_op) OP_REGISTRY.remove_op(scalar_data_frame_op) OP_REGISTRY.remove_op(scalar_geo_data_frame_op) OP_REGISTRY.remove_op(empty_dataset_op) OP_REGISTRY.remove_op(empty_data_frame_op) OP_REGISTRY.remove_op(int_op) OP_REGISTRY.remove_op(str_op)
def test_to_json_dict(self): def dataset_op() -> xr.Dataset: periods = 5 temperature_data = (15 + 8 * np.random.randn(periods, 2, 2)).round( decimals=1) temperature_attrs = { 'a': np.array([1, 2, 3]), 'comment': 'hot', '_FillValue': np.nan } precipitation_data = (10 * np.random.rand(periods, 2, 2)).round( decimals=1) precipitation_attrs = { 'x': True, 'comment': 'wet', '_FillValue': -1.0 } ds = xr.Dataset(data_vars={ 'temperature': (('time', 'lat', 'lon'), temperature_data, temperature_attrs), 'precipitation': (('time', 'lat', 'lon'), precipitation_data, precipitation_attrs) }, coords={ 'lon': np.array([12, 13]), 'lat': np.array([50, 51]), 'time': pd.date_range('2014-09-06', periods=periods) }, attrs={'history': 'a b c'}) return ds def data_frame_op() -> pd.DataFrame: data = { 'A': [1, 2, 3, np.nan, 4, 9, np.nan, np.nan, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, 5, 5, 9, 1, 2, 7, 6] } time = pd.date_range('2000-01-01', freq='MS', periods=12) return pd.DataFrame(data=data, index=time, dtype=float) def int_op() -> int: return 394852 def str_op() -> str: return 'Hi!' from cate.core.op import OP_REGISTRY try: OP_REGISTRY.add_op(dataset_op) OP_REGISTRY.add_op(data_frame_op) OP_REGISTRY.add_op(int_op) OP_REGISTRY.add_op(str_op) workflow = Workflow( OpMetaInfo('workspace_workflow', header=dict(description='Test!'))) workflow.add_step(OpStep(dataset_op, node_id='ds')) workflow.add_step(OpStep(data_frame_op, node_id='df')) workflow.add_step(OpStep(int_op, node_id='i')) workflow.add_step(OpStep(str_op, node_id='s')) ws = Workspace('/path', workflow) ws.execute_workflow() d_ws = ws.to_json_dict() # import pprint # pprint.pprint(d_ws) d_wf = d_ws.get('workflow') self.assertIsNotNone(d_wf) l_res = d_ws.get('resources') self.assertIsNotNone(l_res) self.assertEqual(len(l_res), 4) res_1 = l_res[0] self.assertEqual(res_1.get('name'), 'ds') self.assertEqual(res_1.get('dataType'), 'xarray.core.dataset.Dataset') self.assertEqual(res_1.get('dimSizes'), dict(lat=2, lon=2, time=5)) self.assertEqual(res_1.get('attributes'), {'history': 'a b c'}) res_1_vars = res_1.get('variables') self.assertIsNotNone(res_1_vars) self.assertEqual(len(res_1_vars), 2) var_1 = res_1_vars[0] self.assertEqual(var_1.get('name'), 'precipitation') self.assertEqual(var_1.get('dataType'), 'float64') self.assertEqual(var_1.get('numDims'), 3) self.assertEqual(var_1.get('shape'), (5, 2, 2)) self.assertEqual(var_1.get('chunkSizes'), None) self.assertEqual(var_1.get('isYFlipped'), True) self.assertEqual(var_1.get('isFeatureAttribute'), None) self.assertEqual(var_1.get('attributes'), dict(x=True, comment='wet', _FillValue=-1.)) var_2 = res_1_vars[1] self.assertEqual(var_2.get('name'), 'temperature') self.assertEqual(var_2.get('dataType'), 'float64') self.assertEqual(var_2.get('numDims'), 3) self.assertEqual(var_2.get('shape'), (5, 2, 2)) self.assertEqual(var_2.get('chunkSizes'), None) self.assertEqual(var_2.get('isYFlipped'), True) self.assertEqual(var_2.get('isFeatureAttribute'), None) self.assertEqual( var_2.get('attributes'), dict(a=[1, 2, 3], comment='hot', _FillValue=np.nan)) res_2 = l_res[1] self.assertEqual(res_2.get('name'), 'df') self.assertEqual(res_2.get('dataType'), 'pandas.core.frame.DataFrame') self.assertIsNone(res_2.get('attributes')) res_2_vars = res_2.get('variables') self.assertIsNotNone(res_2_vars) self.assertEqual(len(res_2_vars), 2) var_1 = res_2_vars[0] self.assertEqual(var_1.get('name'), 'A') self.assertEqual(var_1.get('dataType'), 'float64') self.assertEqual(var_1.get('numDims'), 1) self.assertEqual(var_1.get('shape'), (12, )) self.assertEqual(var_1.get('isYFlipped'), None) self.assertEqual(var_1.get('isFeatureAttribute'), None) self.assertIsNone(var_1.get('attributes')) var_2 = res_2_vars[1] self.assertEqual(var_2.get('name'), 'B') self.assertEqual(var_2.get('dataType'), 'float64') self.assertEqual(var_2.get('numDims'), 1) self.assertEqual(var_2.get('shape'), (12, )) self.assertEqual(var_2.get('isYFlipped'), None) self.assertEqual(var_2.get('isFeatureAttribute'), None) self.assertIsNone(var_2.get('attributes')) res_3 = l_res[2] self.assertEqual(res_3.get('name'), 'i') self.assertEqual(res_3.get('dataType'), 'int') self.assertIsNone(res_3.get('attributes')) self.assertIsNone(res_3.get('variables')) res_4 = l_res[3] self.assertEqual(res_4.get('name'), 's') self.assertEqual(res_4.get('dataType'), 'str') self.assertIsNone(res_4.get('attrs')) self.assertIsNone(res_4.get('variables')) finally: OP_REGISTRY.remove_op(dataset_op) OP_REGISTRY.remove_op(data_frame_op) OP_REGISTRY.remove_op(int_op) OP_REGISTRY.remove_op(str_op)
def test_to_json_dict(self): step1 = OpStep(op1, node_id='op1') step2 = OpStep(op2, node_id='op2') step3 = OpStep(op3, node_id='op3') workflow = Workflow(OpMetaInfo('my_workflow', inputs=OrderedDict(p={}), outputs=OrderedDict(q={}))) workflow.add_steps(step1, step2, step3) step1.inputs.x.source = workflow.inputs.p step2.inputs.a.source = step1.outputs.y step3.inputs.u.source = step1.outputs.y step3.inputs.v.source = step2.outputs.b workflow.outputs.q.source = step3.outputs.w step2.persistent = True workflow_dict = workflow.to_json_dict() expected_json_text = """ { "schema_version": 1, "qualified_name": "my_workflow", "header": {}, "inputs": { "p": {} }, "outputs": { "q": { "source": "op3.w" } }, "steps": [ { "id": "op1", "op": "test.core.test_workflow.op1", "inputs": { "x": "my_workflow.p" } }, { "id": "op2", "persistent": true, "op": "test.core.test_workflow.op2", "inputs": { "a": "op1.y" } }, { "id": "op3", "op": "test.core.test_workflow.op3", "inputs": { "v": "op2.b", "u": "op1.y" } } ] } """ actual_json_text = json.dumps(workflow_dict, indent=4) expected_json_obj = json.loads(expected_json_text) actual_json_obj = json.loads(actual_json_text) self.assertEqual(actual_json_obj, expected_json_obj, msg='\nexpected:\n%s\n%s\nbut got:\n%s\n%s\n' % (120 * '-', expected_json_text, 120 * '-', actual_json_text))
def set_resource(self, res_name: str, op_name: str, op_args: List[str], overwrite=False, validate_args=False): assert res_name assert op_name assert op_args op = OP_REGISTRY.get_op(op_name) if not op: raise WorkspaceError('Unknown operation "%s"' % op_name) new_step = OpStep(op, node_id=res_name) workflow = self.workflow # This namespace will allow us to wire the new resource with existing workflow steps # We only add step outputs, so we cannot reference another step's input neither. # Note that workspace workflows never have any inputs to be referenced anyway. namespace = dict() for step in workflow.steps: output_namespace = step.output namespace[step.id] = output_namespace does_exist = res_name in namespace if not overwrite and does_exist: raise WorkspaceError('Resource "%s" already exists' % res_name) if does_exist: # Prevent resource from self-referencing namespace.pop(res_name, None) op_kwargs = self._parse_op_args(op, op_args, namespace, validate_args) return_output_name = OpMetaInfo.RETURN_OUTPUT_NAME # Wire new op_step with outputs from existing steps for input_name, input_value in op_kwargs.items(): if input_name not in new_step.input: raise WorkspaceError('"%s" is not an input of operation "%s"' % (input_name, op_name)) input_port = new_step.input[input_name] if isinstance(input_value, NodePort): # input_value is an output NodePort of another step input_port.source = input_value elif isinstance(input_value, Namespace): # input_value is output_namespace of another step if return_output_name not in input_value: raise WorkspaceError('Illegal value for input "%s"' % input_name) input_port.source = input_value['return'] else: # Neither a Namespace nor a NodePort, it must be a constant value input_port.value = input_value old_step = workflow.find_node(res_name) # Collect keys of invalidated cache entries, initialize with res_name ids_of_invalidated_steps = {res_name} if old_step is not None: # Collect all IDs of steps that depend on old_step, if any for step in workflow.steps: requires = step.requires(old_step) if requires: ids_of_invalidated_steps.add(step.id) print(ids_of_invalidated_steps) workflow = self._workflow # noinspection PyUnusedLocal workflow.add_step(new_step, can_exist=True) self._is_modified = True # Remove any cached resource values, whose steps became invalidated for key in ids_of_invalidated_steps: if key in self._resource_cache: del self._resource_cache[key]
def test_to_json_dict(self): step1 = OpStep(op1, node_id='op1') step2 = OpStep(op2, node_id='op2') step3 = OpStep(op3, node_id='op3') workflow = Workflow( OpMetaInfo('my_workflow', inputs=OrderedDict(p={}), outputs=OrderedDict(q={}))) workflow.add_steps(step1, step2, step3) step1.inputs.x.source = workflow.inputs.p step2.inputs.a.source = step1.outputs.y step3.inputs.u.source = step1.outputs.y step3.inputs.v.source = step2.outputs.b workflow.outputs.q.source = step3.outputs.w step2.persistent = True workflow_dict = workflow.to_json_dict() expected_json_text = """ { "schema_version": 1, "qualified_name": "my_workflow", "header": {}, "inputs": { "p": {} }, "outputs": { "q": { "source": "op3.w" } }, "steps": [ { "id": "op1", "op": "tests.core.test_workflow.op1", "inputs": { "x": "my_workflow.p" } }, { "id": "op2", "persistent": true, "op": "tests.core.test_workflow.op2", "inputs": { "a": "op1.y" } }, { "id": "op3", "op": "tests.core.test_workflow.op3", "inputs": { "v": "op2.b", "u": "op1.y" } } ] } """ actual_json_text = json.dumps(workflow_dict, indent=4) expected_json_obj = json.loads(expected_json_text) actual_json_obj = json.loads(actual_json_text) self.assertEqual( actual_json_obj, expected_json_obj, msg='\nexpected:\n%s\n%s\nbut got:\n%s\n%s\n' % (120 * '-', expected_json_text, 120 * '-', actual_json_text))
def test_from_json_dict(self): step2 = OpStep(op2, node_id='myop2') port2 = NodePort(step2, 'a') port2.from_json(json.loads('{"value": 2.6}')) self.assertEqual(port2._source_ref, None) self.assertEqual(port2._source, None) self.assertEqual(port2._value, 2.6) port2.from_json(json.loads('{"source": "myop1.y"}')) self.assertEqual(port2._source_ref, ('myop1', 'y')) self.assertEqual(port2._source, None) self.assertEqual(port2._value, UNDEFINED) # "myop1.y" is a shorthand for {"source": "myop1.y"} port2.from_json(json.loads('"myop1.y"')) self.assertEqual(port2._source_ref, ('myop1', 'y')) self.assertEqual(port2._source, None) self.assertEqual(port2._value, UNDEFINED) port2.from_json(json.loads('{"source": ".y"}')) self.assertEqual(port2._source_ref, (None, 'y')) self.assertEqual(port2._source, None) self.assertEqual(port2._value, UNDEFINED) # ".x" is a shorthand for {"source": ".x"} port2.from_json(json.loads('".y"')) self.assertEqual(port2._source_ref, (None, 'y')) self.assertEqual(port2._source, None) self.assertEqual(port2._value, UNDEFINED) # "myop1" is a shorthand for {"source": "myop1"} port2.from_json(json.loads('"myop1"')) self.assertEqual(port2._source_ref, ('myop1', None)) self.assertEqual(port2._source, None) self.assertEqual(port2._value, UNDEFINED) # if "a" is defined, but neither "source" nor "value" is given, it will neither have a source nor a value port2.from_json(json.loads('{}')) self.assertEqual(port2._source_ref, None) self.assertEqual(port2._source, None) self.assertEqual(port2._value, UNDEFINED) port2.from_json(json.loads('null')) self.assertEqual(port2._source_ref, None) self.assertEqual(port2._source, None) self.assertEqual(port2._value, UNDEFINED) # if "a" is not defined at all, it will neither have a source nor a value port2.from_json(json.loads('{}')) self.assertEqual(port2._source_ref, None) self.assertEqual(port2._source, None) self.assertEqual(port2._value, UNDEFINED) with self.assertRaises(ValueError) as cm: port2.from_json(json.loads('{"value": 2.6, "source": "y"}')) self.assertEqual( str(cm.exception), "error decoding 'myop2.a' because \"source\" and \"value\" are mutually exclusive" ) expected_msg = "error decoding 'myop2.a' because the \"source\" value format is " \ "neither \"<node-id>.<name>\", \"<node-id>\", nor \".<name>\"" with self.assertRaises(ValueError) as cm: port2.from_json(json.loads('{"source": ""}')) self.assertEqual(str(cm.exception), expected_msg) with self.assertRaises(ValueError) as cm: port2.from_json(json.loads('{"source": "."}')) self.assertEqual(str(cm.exception), expected_msg) with self.assertRaises(ValueError) as cm: port2.from_json(json.loads('{"source": "var."}')) self.assertEqual(str(cm.exception), expected_msg)