def test_run_workflow(self): op_reg = OP_REGISTRY.add_op(timeseries, fail_if_exists=True) workflow_file = os.path.join(os.path.dirname(__file__), 'timeseries.json') self.assertTrue(os.path.exists(workflow_file), msg='missing test file %s' % workflow_file) try: # Run without --monitor and --write self.assert_main(['run', workflow_file, 'lat=13.2', 'lon=52.9'], expected_stdout=['[0.3, 0.25, 0.05, 0.4, 0.2, 0.1, 0.5]']) # Run with --monitor and without --write self.assert_main(['run', '--monitor', workflow_file, 'lat=13.2', 'lon=52.9'], expected_stdout=['[0.3, 0.25, 0.05, 0.4, 0.2, 0.1, 0.5]']) # Run with --monitor and --write self.assert_main(['run', '--monitor', '--write', 'timeseries_data.json', workflow_file, 'lat=13.2', 'lon=52.9'], expected_stdout=['Writing output to timeseries_data.json using JSON format...']) self.assertTrue(os.path.isfile('timeseries_data.json')) os.remove('timeseries_data.json') finally: OP_REGISTRY.remove_op(op_reg, fail_if_not_exists=True)
def test_run_op(self): op_reg = OP_REGISTRY.add_op(timeseries, fail_if_exists=True) try: # Run without --monitor and --write self.assert_main(['run', op_reg.op_meta_info.qualified_name, 'lat=13.2', 'lon=52.9'], expected_stdout=['[0.3, 0.25, 0.05, 0.4, 0.2, 0.1, 0.5]']) # Run with --monitor and without --write self.assert_main(['run', '--monitor', op_reg.op_meta_info.qualified_name, 'lat=13.2', 'lon=52.9'], expected_stdout=['[0.3, 0.25, 0.05, 0.4, 0.2, 0.1, 0.5]']) # Run with --monitor and --write self.assert_main(['run', '--monitor', '--write', 'timeseries_data.txt', op_reg.op_meta_info.qualified_name, 'lat=13.2', 'lon=52.9'], expected_stdout=['Writing output to timeseries_data.txt using TEXT format...']) self.assertTrue(os.path.isfile('timeseries_data.txt')) os.remove('timeseries_data.txt') # Run with invalid keyword self.assert_main(['run', op_reg.op_meta_info.qualified_name, 'l*t=13.2', 'lon=52.9'], expected_status=1, expected_stderr=['cate run: error: "l*t" is not a valid input name'], expected_stdout='') finally: OP_REGISTRY.remove_op(op_reg, fail_if_not_exists=True)
def test_registered(self): """ Test nominal execution through the operations registry. """ reg_op = OP_REGISTRY.get_op( object_to_qualified_name(anomaly.anomaly_internal)) ds = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 12])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 12])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': [datetime(2000, x, 1) for x in range(1, 13)] }) expected = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), 'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': [datetime(2000, x, 1) for x in range(1, 13)] }) actual = reg_op(ds=ds, time_range='2000-01-01, 2000-04-01', region='-50, -50, 50, 50') assert_dataset_equal(expected, actual)
def test_registered(self): reg_op = OP_REGISTRY.get_op(object_to_qualified_name(index.enso_nino34)) dataset = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 24])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 24])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': ([datetime(2001, x, 1) for x in range(1, 13)] + [datetime(2002, x, 1) for x in range(1, 13)])}) lta = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 12])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 12])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': [x for x in range(1, 13)]}) lta = 2 * lta expected_time = ([datetime(2001, x, 1) for x in range(3, 13)] + [datetime(2002, x, 1) for x in range(1, 11)]) expected = pd.DataFrame(data=(np.ones([20]) * -1), columns=['ENSO N3.4 Index'], index=expected_time) with create_tmp_file() as tmp_file: lta.to_netcdf(tmp_file) actual = reg_op(ds=dataset, var='first', file=tmp_file) self.assertTrue(expected.equals(actual))
def test_registered(self): """ Test registered operation execution """ reg_op = OP_REGISTRY.get_op( object_to_qualified_name(temporal_aggregation)) ds = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 366])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 366])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': pd.date_range('2000-01-01', '2000-12-31') }) ds = adjust_temporal_attrs(ds) ex = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 12])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 12])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': pd.date_range('2000-01-01', freq='MS', periods=12) }) ex.first.attrs['cell_methods'] = 'time: mean within years' ex.second.attrs['cell_methods'] = 'time: mean within years' actual = reg_op(ds=ds) self.assertTrue(actual.broadcast_equals(ex))
def test_registered(self): """ Test if it runs as an operation registered in the op registry. """ reg_op = OP_REGISTRY.get_op( object_to_qualified_name(subset.subset_temporal)) dataset = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([180, 360, 6])), 'second': (['lat', 'lon', 'time'], np.ones([180, 360, 6])), 'lat': np.linspace(-89.5, 89.5, 180), 'lon': np.linspace(-179.5, 179.5, 360), 'time': [datetime(2000, x, 1) for x in range(1, 7)] }) actual = reg_op(ds=dataset, time_range='2000-01-10, 2000-04-01') expected = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([180, 360, 3])), 'second': (['lat', 'lon', 'time'], np.ones([180, 360, 3])), 'lat': np.linspace(-89.5, 89.5, 180), 'lon': np.linspace(-179.5, 179.5, 360), 'time': [datetime(2000, x, 1) for x in range(2, 5)] }) assert_dataset_equal(expected, actual)
def test_registered_compute_with_context(self): reg_op = OP_REGISTRY.get_op( object_to_qualified_name(arithmetics.compute)) first = np.ones([45, 90, 3]) second = np.ones([45, 90, 3]) lon = np.linspace(-178, 178, 90) lat = np.linspace(-88, 88, 45) res_1 = xr.Dataset({ 'first': (['lat', 'lon', 'time'], first), 'lat': lat, 'lon': lon }) res_2 = xr.Dataset({ 'second': (['lat', 'lon', 'time'], second), 'lat': lat, 'lon': lon }) # Note, if executed from a workflow, _ctx will be set by the framework _ctx = dict(value_cache=dict(res_1=res_1, res_2=res_2)) actual = reg_op(ds=None, script="third = 6 * res_1.first - 3 * res_2.second", _ctx=_ctx) expected = xr.Dataset({ 'third': (['lat', 'lon', 'time'], 6 * first - 3 * second), 'lat': lat, 'lon': lon }) assert_dataset_equal(expected, actual)
def test_registered(self): """ Test the operation when invoked through the OP_REGISTRY """ reg_op = OP_REGISTRY.get_op( object_to_qualified_name(arithmetics.ds_arithmetics)) dataset = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 3])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 3])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90) }) expected = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 3])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 3])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90) }) actual = reg_op(ds=dataset, op='+2, -2, *3, /3, *4') assert_dataset_equal(expected * 4, actual)
def test_registered_compute(self): reg_op = OP_REGISTRY.get_op(object_to_qualified_name(arithmetics.compute)) first = np.ones([45, 90, 3]) second = np.ones([45, 90, 3]) dataset = xr.Dataset({ 'first': (['lat', 'lon', 'time'], first), 'second': (['lat', 'lon', 'time'], second), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90)}) actual = reg_op(ds=dataset, expr="6 * first - 3 * second", var="third") expected = xr.Dataset({ 'third': (['lat', 'lon', 'time'], 6 * first - 3 * second), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90)}) assert_dataset_equal(expected, actual) actual = reg_op(ds=dataset, expr="6 * first - 3 * second", var="third", copy=True) expected = xr.Dataset({ 'first': (['lat', 'lon', 'time'], first), 'second': (['lat', 'lon', 'time'], second), 'third': (['lat', 'lon', 'time'], 6 * first - 3 * second), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90)}) assert_dataset_equal(expected, actual)
def test_registered(self): """ Test if it runs as an operation registered in the op registry. """ reg_op = OP_REGISTRY.get_op( object_to_qualified_name(subset.subset_temporal_index)) dataset = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([180, 360, 6])), 'second': (['lat', 'lon', 'time'], np.ones([180, 360, 6])), 'lat': np.linspace(-89.5, 89.5, 180), 'lon': np.linspace(-179.5, 179.5, 360), 'time': [ '2000-01-01', '2000-02-01', '2000-03-01', '2000-04-01', '2000-05-01', '2000-06-01' ] }) actual = reg_op(ds=dataset, time_ind_min=2, time_ind_max=4) expected = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([180, 360, 3])), 'second': (['lat', 'lon', 'time'], np.ones([180, 360, 3])), 'lat': np.linspace(-89.5, 89.5, 180), 'lon': np.linspace(-179.5, 179.5, 360), 'time': ['2000-03-01', '2000-04-01', '2000-05-01'] }) assert_dataset_equal(expected, actual)
def test_registered(self): """ Test nominal execution of ONI Index calculation, as a registered operation. """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(index.oni)) dataset = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 24])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 24])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': ([datetime(2001, x, 1) for x in range(1, 13)] + [datetime(2002, x, 1) for x in range(1, 13)])}) lta = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 12])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 12])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': [x for x in range(1, 13)]}) lta = 2 * lta expected_time = ([datetime(2001, x, 1) for x in range(2, 13)] + [datetime(2002, x, 1) for x in range(1, 12)]) expected = pd.DataFrame(data=(np.ones([22]) * -1), columns=['ONI Index'], index=expected_time) with create_tmp_file() as tmp_file: lta.to_netcdf(tmp_file) actual = reg_op(ds=dataset, var='first', file=tmp_file) self.assertTrue(expected.equals(actual))
def test_run_op(self): op_reg = OP_REGISTRY.add_op(timeseries, fail_if_exists=True) try: # Run without --monitor and --write self.assert_main( [ 'run', op_reg.op_meta_info.qualified_name, 'lat=13.2', 'lon=52.9' ], expected_stdout=['[0.3, 0.25, 0.05, 0.4, 0.2, 0.1, 0.5]']) # Run with --monitor and without --write self.assert_main( [ 'run', '--monitor', op_reg.op_meta_info.qualified_name, 'lat=13.2', 'lon=52.9' ], expected_stdout=['[0.3, 0.25, 0.05, 0.4, 0.2, 0.1, 0.5]']) # Run with --monitor and --write self.assert_main( [ 'run', '--monitor', '--write', 'timeseries_data.txt', op_reg.op_meta_info.qualified_name, 'lat=13.2', 'lon=52.9' ], expected_stdout=[ 'Writing output to timeseries_data.txt using TEXT format...' ]) self.assertTrue(os.path.isfile('timeseries_data.txt')) os.remove('timeseries_data.txt') # Run with invalid keyword self.assert_main( [ 'run', op_reg.op_meta_info.qualified_name, 'l*t=13.2', 'lon=52.9' ], expected_status=1, expected_stderr=[ 'cate run: error: "l*t" is not a valid input name' ], expected_stdout='') finally: OP_REGISTRY.remove_op(op_reg, fail_if_not_exists=True)
def test_existing_method(self): op = OP_REGISTRY.get_op('cate.ops.timeseries.tseries_point', True) op_args, op_kwargs = main._parse_op_args(['ds=@ds', 'point=12.2,54.3', 'var=temperature', 'method=bfill'], input_props=op.op_meta_info.inputs) self.assertEqual(op_args, []) self.assertEqual(op_kwargs, OrderedDict([('ds', dict(source='ds')), ('point', dict(value=(12.2, 54.3))), ('var', dict(value='temperature')), ('method', dict(value='bfill'))]))
def test_registered(self): """ Test nominal execution as a registered operation """ op = OP_REGISTRY.get_op(object_to_qualified_name(literal)) self.assertEqual(op(value='True'), True) self.assertEqual(op(value='42'), 42) self.assertEqual(op(value='3.14'), 3.14) self.assertEqual(op(value='"ha"'), 'ha') self.assertEqual(op(value='[3,4,5]'), [3, 4, 5])
def test_registered(self): """ Test nominal execution as a registered operation """ op = OP_REGISTRY.get_op(object_to_qualified_name(identity)) self.assertEqual(op(value=True), True) self.assertEqual(op(value=42), 42) self.assertEqual(op(value=3.14), 3.14) self.assertEqual(op(value='ha'), 'ha') self.assertEqual(op(value=[3, 4, 5]), [3, 4, 5])
def test_registered(self): """ Test nominal execution of the function as a registered operation. """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(plot)) # Test plot dataset = xr.Dataset({'first': np.random.rand(10)}) with create_tmp_file('remove_me', 'jpg') as tmp_file: reg_op(ds=dataset, var='first', file=tmp_file) self.assertTrue(os.path.isfile(tmp_file))
def test_registered(self): """ Test as a registered operation """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(harmonize)) dataset = xr.Dataset( {'first': (['latitude', 'longitude'], [[1, 2, 3], [2, 3, 4]])}) expected = xr.Dataset( {'first': (['lat', 'lon'], [[1, 2, 3], [2, 3, 4]])}) actual = reg_op(ds=dataset) assertDatasetEqual(actual, expected)
def test_registered(self): """ Test nominal execution of the function as a registered operation. """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(plot)) # Test plot dataset = xr.Dataset({ 'first': np.random.rand(10)}) with create_tmp_file('remove_me', 'jpg') as tmp_file: reg_op(ds=dataset, var='first', file=tmp_file) self.assertTrue(os.path.isfile(tmp_file))
def test_workspace_is_part_of_context(self): def some_op(ctx: dict) -> dict: return dict(ctx) from cate.core.op import OP_REGISTRY try: op_reg = OP_REGISTRY.add_op(some_op) op_reg.op_meta_info.inputs['ctx']['context'] = True ws = Workspace('/path', Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!')))) ws.set_resource(op_reg.op_meta_info.qualified_name, {}, res_name='new_ctx') ws.execute_workflow('new_ctx') self.assertTrue('new_ctx' in ws.resource_cache) self.assertTrue('workspace' in ws.resource_cache['new_ctx']) self.assertIs(ws.resource_cache['new_ctx']['workspace'], ws) finally: OP_REGISTRY.remove_op(some_op)
def test_registered(self): """ Test as a registered operation """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(normalize)) dataset = xr.Dataset({'first': (['latitude', 'longitude'], [[1, 2, 3], [2, 3, 4]])}) expected = xr.Dataset({'first': (['lat', 'lon'], [[1, 2, 3], [2, 3, 4]])}) actual = reg_op(ds=dataset) assertDatasetEqual(actual, expected)
def test_invoke_with_context_inputs(self): def some_op(context, workflow, workflow_id, step, step_id, invalid): return dict(context=context, workflow=workflow, workflow_id=workflow_id, step=step, step_id=step_id, invalid=invalid) from cate.core.op import OP_REGISTRY try: op_reg = OP_REGISTRY.add_op(some_op) op_reg.op_meta_info.inputs['context']['context'] = True op_reg.op_meta_info.inputs['workflow']['context'] = 'workflow' op_reg.op_meta_info.inputs['workflow_id'][ 'context'] = 'workflow.id' op_reg.op_meta_info.inputs['step']['context'] = 'step' op_reg.op_meta_info.inputs['step_id']['context'] = 'step.id' op_reg.op_meta_info.inputs['invalid']['context'] = 'gnarz[8]' step = OpStep(op_reg, node_id='test_step') workflow = Workflow(OpMetaInfo('test_workflow')) workflow.add_step(step) workflow.invoke() output = step.outputs['return'].value self.assertIsInstance(output, dict) self.assertIsInstance(output.get('context'), dict) self.assertIs(output.get('workflow'), workflow) self.assertEqual(output.get('workflow_id'), 'test_workflow') self.assertIs(output.get('step'), step) self.assertEqual(output.get('step_id'), 'test_step') self.assertEqual(output.get('invalid', 1), None) finally: OP_REGISTRY.remove_op(some_op)
def test_registered(self): """ Test nominal execution of the function as a registered operation. """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(plot_line)) # Test plot dataset = xr.Dataset({ 'first': (['time'], np.random.rand(10)), 'second': (['time'], np.random.rand(10)), 'time': pd.date_range('2000-01-01', periods=10)}) with create_tmp_file('remove_me', 'jpg') as tmp_file: reg_op(ds=dataset, var_names=['first', 'second'], file=tmp_file) self.assertTrue(os.path.isfile(tmp_file))
def test_registered(self): """ Test the method when run as a registered operation """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(plot_data_frame)) data = {'A': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'B': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]} df = pd.DataFrame(data=data, index=pd.date_range('2000-01-01', periods=10)) with create_tmp_file('remove_me', 'png') as tmp_file: reg_op(df=df, file=tmp_file) self.assertTrue(os.path.isfile(tmp_file))
def test_invoke_with_context_inputs(self): def some_op(context, workflow, workflow_id, step, step_id, invalid): return dict(context=context, workflow=workflow, workflow_id=workflow_id, step=step, step_id=step_id, invalid=invalid) from cate.core.op import OP_REGISTRY try: op_reg = OP_REGISTRY.add_op(some_op) op_reg.op_meta_info.inputs['context']['context'] = True op_reg.op_meta_info.inputs['workflow']['context'] = 'workflow' op_reg.op_meta_info.inputs['workflow_id']['context'] = 'workflow.id' op_reg.op_meta_info.inputs['step']['context'] = 'step' op_reg.op_meta_info.inputs['step_id']['context'] = 'step.id' op_reg.op_meta_info.inputs['invalid']['context'] = 'gnarz[8]' step = OpStep(op_reg, node_id='test_step') workflow = Workflow(OpMetaInfo('test_workflow')) workflow.add_step(step) workflow.invoke() output = step.outputs['return'].value self.assertIsInstance(output, dict) self.assertIsInstance(output.get('context'), dict) self.assertIs(output.get('workflow'), workflow) self.assertEqual(output.get('workflow_id'), 'test_workflow') self.assertIs(output.get('step'), step) self.assertEqual(output.get('step_id'), 'test_step') self.assertEqual(output.get('invalid', 1), None) finally: OP_REGISTRY.remove_op(some_op)
def test_registered(self): """ Test registered operation execution """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(long_term_average)) ds = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 24])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 24])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': pd.date_range('2000-01-01', freq='MS', periods=24)}) ds = adjust_temporal_attrs(ds) reg_op(ds=ds)
def test_registered(self): """ Test nominal execution of the function as a registered operation. """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(plot_line)) # Test plot dataset = xr.Dataset({ 'first': (['time'], np.random.rand(10)), 'second': (['time'], np.random.rand(10)), 'time': pd.date_range('2000-01-01', periods=10) }) with create_tmp_file('remove_me', 'jpg') as tmp_file: reg_op(ds=dataset, var_names=['first', 'second'], file=tmp_file) self.assertTrue(os.path.isfile(tmp_file))
def test_registered(self): """ Test nominal execution of the function as a registered operation. """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(plot_map)) dataset = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.random.rand(5, 10, 2)), 'second': (['lat', 'lon', 'time'], np.random.rand(5, 10, 2)), 'lat': np.linspace(-89.5, 89.5, 5), 'lon': np.linspace(-179.5, 179.5, 10), 'time': pd.date_range('2000-01-01', periods=2)}) with create_tmp_file('remove_me', 'png') as tmp_file: reg_op(ds=dataset, file=tmp_file) self.assertTrue(os.path.isfile(tmp_file))
def test_registered(self): """ Test if it runs as an operation registered in the op registry. """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(subset.subset_spatial)) dataset = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([180, 360, 6])), 'second': (['lat', 'lon', 'time'], np.ones([180, 360, 6])), 'lat': np.linspace(-89.5, 89.5, 180), 'lon': np.linspace(-179.5, 179.5, 360)}) actual = reg_op(ds=dataset, region="-20, -10, 20, 10") expected = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([22, 42, 6])), 'second': (['lat', 'lon', 'time'], np.ones([22, 42, 6])), 'lat': np.linspace(-10.5, 10.5, 22), 'lon': np.linspace(-20.5, 20.5, 42)}) assert_dataset_equal(expected, actual)
def test_registered(self): """ Test operation when run as a registered operation """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(pandas_fillna)) # Test na filling using a given method data = {'A': [1, 2, 3, np.nan, 4, 9, np.nan, np.nan, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, np.nan, np.nan, np.nan, 1, 2, 7, 6]} expected = {'A': [1, 2, 3, 3, 4, 9, 9, 9, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, 5, 5, 5, 1, 2, 7, 6]} time = pd.date_range('2000-01-01', freq='MS', periods=12) expected = pd.DataFrame(data=expected, index=time, dtype=float) df = pd.DataFrame(data=data, index=time, dtype=float) actual = reg_op(df=df, method='ffill') self.assertTrue(actual.equals(expected))
def test_registered(self): """ Test operation when run as a registered operation """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(pandas_fillna)) # Test na filling using a given method data = {'A': [1, 2, 3, np.nan, 4, 9, np.nan, np.nan, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, np.nan, np.nan, np.nan, 1, 2, 7, 6]} expected = {'A': [1, 2, 3, 3, 4, 9, 9, 9, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, 5, 5, 5, 1, 2, 7, 6]} time = pd.date_range('2000-01-01', freq='MS', periods=12, tz=timezone.utc) expected = pd.DataFrame(data=expected, index=time, dtype=float) df = pd.DataFrame(data=data, index=time, dtype=float) actual = reg_op(df=df, method='ffill') self.assertTrue(actual.equals(expected))
def run_op(self, op_name: str, op_args: List[str], validate_args=False, monitor=Monitor.NONE): assert op_name assert op_args op = OP_REGISTRY.get_op(op_name) if not op: raise WorkspaceError('Unknown operation "%s"' % op_name) with monitor.starting("Running operation '%s'" % op_name, 2): self.workflow.invoke(self.resource_cache, monitor=monitor.child(work=1)) op_kwargs = self._parse_op_args(op, op_args, self.resource_cache, validate_args) op(monitor=monitor.child(work=1), **op_kwargs)
def test_registered(self): """ Test the operation when invoked from the OP_REGISTRY """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(arithmetics.diff)) dataset = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 3])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 3])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90)}) expected = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 3])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 3])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90)}) actual = reg_op(ds=dataset, ds2=dataset * 2) assert_dataset_equal(expected * -1, actual)
def test_registered(self): """ Test if it runs as an operation registered in the op registry. """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(subset.subset_temporal)) dataset = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([180, 360, 6])), 'second': (['lat', 'lon', 'time'], np.ones([180, 360, 6])), 'lat': np.linspace(-89.5, 89.5, 180), 'lon': np.linspace(-179.5, 179.5, 360), 'time': [datetime(2000, x, 1) for x in range(1, 7)]}) actual = reg_op(ds=dataset, time_range='2000-01-10, 2000-04-01') expected = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([180, 360, 3])), 'second': (['lat', 'lon', 'time'], np.ones([180, 360, 3])), 'lat': np.linspace(-89.5, 89.5, 180), 'lon': np.linspace(-179.5, 179.5, 360), 'time': [datetime(2000, x, 1) for x in range(2, 5)]}) assert_dataset_equal(expected, actual)
def test_registered(self): """ Test execution as a registered operation """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(sel)) ds = new_ds() sel_ds = reg_op(ds=ds, time='2014-09-06') self.assertEqual(set(sel_ds.coords.keys()), {'lon', 'lat', 'time', 'reference_time'}) self.assertEqual(sel_ds.dims['lon'], 4) self.assertEqual(sel_ds.dims['lat'], 2) self.assertNotIn('time', sel_ds.dims) sel_ds = reg_op(ds=ds, point=(34.51, 10.25)) self.assertEqual(set(sel_ds.coords.keys()), {'lon', 'lat', 'time', 'reference_time'}) self.assertNotIn('lon', sel_ds.dims) self.assertNotIn('lat', sel_ds.dims) self.assertEqual(sel_ds.dims['time'], 10)
def test_registered(self): """ Test nominal execution as a registered operation """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(from_dataframe)) time = pd.date_range('2000-01-01', periods=10) data = {'A': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'B': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'time': time} df = pd.DataFrame(data) df = df.set_index('time') expected = xr.Dataset({ 'A': (['time'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), 'B': (['time'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), 'time': time}) actual = reg_op(df=df) assert_dataset_equal(expected, actual)
def test_registered(self): """ Test nominal execution as a registered operation """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(from_dataframe)) time = pd.date_range('2000-01-01', periods=10, tz=timezone.utc) data = {'A': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'B': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'time': time} df = pd.DataFrame(data) df = df.set_index('time') expected = xr.Dataset({ 'A': (['time'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), 'B': (['time'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), 'time': time}) actual = reg_op(df=df) assert_dataset_equal(expected, actual)
def test_registered(self): """ Test the operation when it is invoked through the operation registry """ reg_op = OP_REGISTRY.get_op( object_to_qualified_name(anomaly.anomaly_external)) ref = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 12])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 12])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90) }) ds = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 24])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 24])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': [datetime(2000, x, 1) for x in range(1, 13)] + [datetime(2001, x, 1) for x in range(1, 13)] }) expected = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 24])), 'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 24])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': [datetime(2000, x, 1) for x in range(1, 13)] + [datetime(2001, x, 1) for x in range(1, 13)] }) with create_tmp_file() as tmp_file: ref.to_netcdf(tmp_file, 'w') actual = reg_op(ds=ds, file=tmp_file) assert_dataset_equal(actual, expected)
def registered(self): """ Test tseries_point as a registered operation """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(tseries_mean)) dataset = xr.Dataset({ 'abs': (['lat', 'lon', 'time'], np.ones([4, 8, 6])), 'bbs': (['lat', 'lon', 'time'], np.ones([4, 8, 6])), 'lat': np.linspace(-67.5, 67.5, 4), 'lon': np.linspace(-157.5, 157.5, 8), 'time': ['2000-01-01', '2000-02-01', '2000-03-01', '2000-04-01', '2000-05-01', '2000-06-01']}) actual = reg_op(ds=dataset, var='*bs') expected = xr.Dataset({ 'abs': (['time'], np.ones([6])), 'bbs': (['time'], np.ones([6])), 'abs_std': (['time'], np.zeros([6])), 'bbs_std': (['time'], np.zeros([6])), 'lat': np.linspace(-67.5, 67.5, 4), 'lon': np.linspace(-157.5, 157.5, 8), 'time': ['2000-01-01', '2000-02-01', '2000-03-01', '2000-04-01', '2000-05-01', '2000-06-01']}) assertDatasetEqual(expected, actual)
def test_registered(self): """ Test if it runs as an operation registered in the op registry. """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(subset.subset_temporal_index)) dataset = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([180, 360, 6])), 'second': (['lat', 'lon', 'time'], np.ones([180, 360, 6])), 'lat': np.linspace(-89.5, 89.5, 180), 'lon': np.linspace(-179.5, 179.5, 360), 'time': ['2000-01-01', '2000-02-01', '2000-03-01', '2000-04-01', '2000-05-01', '2000-06-01']}) actual = reg_op(ds=dataset, time_ind_min=2, time_ind_max=4) expected = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([180, 360, 3])), 'second': (['lat', 'lon', 'time'], np.ones([180, 360, 3])), 'lat': np.linspace(-89.5, 89.5, 180), 'lon': np.linspace(-179.5, 179.5, 360), 'time': ['2000-03-01', '2000-04-01', '2000-05-01']}) assert_dataset_equal(expected, actual)
def test_registered(self): """ Test if it runs as an operation registered in the op registry. """ reg_op = OP_REGISTRY.get_op( object_to_qualified_name(subset.subset_spatial)) dataset = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([180, 360, 6])), 'second': (['lat', 'lon', 'time'], np.ones([180, 360, 6])), 'lat': np.linspace(-89.5, 89.5, 180), 'lon': np.linspace(-179.5, 179.5, 360) }) actual = reg_op(ds=dataset, region="-20, -10, 20, 10") expected = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([20, 40, 6])), 'second': (['lat', 'lon', 'time'], np.ones([20, 40, 6])), 'lat': np.linspace(-9.5, 9.5, 20), 'lon': np.linspace(-19.5, 19.5, 40) }) assert_dataset_equal(expected, actual)
def test_registered(self): """ Test registered operation execution """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(temporal_aggregation)) ds = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 366])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 366])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': pd.date_range('2000-01-01', '2000-12-31')}) ds = adjust_temporal_attrs(ds) ex = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.ones([45, 90, 12])), 'second': (['lat', 'lon', 'time'], np.ones([45, 90, 12])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': pd.date_range('2000-01-01', freq='MS', periods=12)}) ex.first.attrs['cell_methods'] = 'time: mean within years' ex.second.attrs['cell_methods'] = 'time: mean within years' actual = reg_op(ds=ds) self.assertTrue(actual.broadcast_equals(ex))
def test_registered(self): """ Test registered operation execution execution """ reg_op = OP_REGISTRY.get_op(object_to_qualified_name(coregister)) ds_fine = xr.Dataset({ 'first': (['time', 'lat', 'lon'], np.array([np.eye(4, 8), np.eye(4, 8)])), 'second': (['time', 'lat', 'lon'], np.array([np.eye(4, 8), np.eye(4, 8)])), 'lat': np.linspace(-67.5, 67.5, 4), 'lon': np.linspace(-157.5, 157.5, 8), 'time': np.array([1, 2])}) ds_coarse = xr.Dataset({ 'first': (['time', 'lat', 'lon'], np.array([np.eye(3, 6), np.eye(3, 6)])), 'second': (['time', 'lat', 'lon'], np.array([np.eye(3, 6), np.eye(3, 6)])), 'lat': np.linspace(-60, 60, 3), 'lon': np.linspace(-150, 150, 6), 'time': np.array([1, 2])}) # Test that the coarse dataset has been resampled onto the grid # of the finer dataset. ds_coarse_resampled = reg_op(ds_master=ds_fine, ds_replica=ds_coarse) expected = xr.Dataset({ 'first': (['time', 'lat', 'lon'], np.array([[[1., 0.28571429, 0., 0., 0., 0., 0., 0.], [0.33333333, 0.57142857, 0.38095238, 0., 0., 0., 0., 0.], [0., 0.47619048, 0.52380952, 0.28571429, 0.04761905, 0., 0., 0.], [0., 0., 0.42857143, 0.85714286, 0.14285714, 0., 0., 0.]], [[1., 0.28571429, 0., 0., 0., 0., 0., 0.], [0.33333333, 0.57142857, 0.38095238, 0., 0., 0., 0., 0.], [0., 0.47619048, 0.52380952, 0.28571429, 0.04761905, 0., 0., 0.], [0., 0., 0.42857143, 0.85714286, 0.14285714, 0., 0., 0.]]])), 'second': (['time', 'lat', 'lon'], np.array([[[1., 0.28571429, 0., 0., 0., 0., 0., 0.], [0.33333333, 0.57142857, 0.38095238, 0., 0., 0., 0., 0.], [0., 0.47619048, 0.52380952, 0.28571429, 0.04761905, 0., 0., 0.], [0., 0., 0.42857143, 0.85714286, 0.14285714, 0., 0., 0.]], [[1., 0.28571429, 0., 0., 0., 0., 0., 0.], [0.33333333, 0.57142857, 0.38095238, 0., 0., 0., 0., 0.], [0., 0.47619048, 0.52380952, 0.28571429, 0.04761905, 0., 0., 0.], [0., 0., 0.42857143, 0.85714286, 0.14285714, 0., 0., 0.]]])), 'lat': np.linspace(-67.5, 67.5, 4), 'lon': np.linspace(-157.5, 157.5, 8), 'time': np.array([1, 2])}) assert_almost_equal(ds_coarse_resampled['first'].values, expected['first'].values) # Test that the fine dataset has been resampled (aggregated) # onto the grid of the coarse dataset. ds_fine_resampled = reg_op(ds_master=ds_coarse, ds_replica=ds_fine) expected = xr.Dataset({ 'first': (['time', 'lat', 'lon'], np.array([[[0.625, 0.125, 0., 0., 0., 0.], [0.125, 0.5, 0.125, 0., 0., 0.], [0., 0.125, 0.625, 0., 0., 0.]], [[0.625, 0.125, 0., 0., 0., 0.], [0.125, 0.5, 0.125, 0., 0., 0.], [0., 0.125, 0.625, 0., 0., 0.]]])), 'second': (['time', 'lat', 'lon'], np.array([[[0.625, 0.125, 0., 0., 0., 0.], [0.125, 0.5, 0.125, 0., 0., 0.], [0., 0.125, 0.625, 0., 0., 0.]], [[0.625, 0.125, 0., 0., 0., 0.], [0.125, 0.5, 0.125, 0., 0., 0.], [0., 0.125, 0.625, 0., 0., 0.]]])), 'lat': np.linspace(-60, 60, 3), 'lon': np.linspace(-150, 150, 6), 'time': np.array([1, 2])}) assert_almost_equal(ds_fine_resampled['first'].values, expected['first'].values)
def test_to_json_dict(self): def dataset_op() -> xr.Dataset: periods = 5 temperature_data = (15 + 8 * np.random.randn(periods, 2, 2)).round(decimals=1) temperature_attrs = {'a': np.array([1, 2, 3]), 'comment': 'hot', '_FillValue': np.nan} precipitation_data = (10 * np.random.rand(periods, 2, 2)).round(decimals=1) precipitation_attrs = {'x': True, 'comment': 'wet', '_FillValue': -1.0} ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), temperature_data, temperature_attrs), 'precipitation': (('time', 'lat', 'lon'), precipitation_data, precipitation_attrs) }, coords={ 'lon': np.array([12, 13]), 'lat': np.array([50, 51]), 'time': pd.date_range('2014-09-06', periods=periods) }, attrs={ 'history': 'a b c' }) return ds def scalar_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), [[[15.2]]]), 'precipitation': (('time', 'lat', 'lon'), [[[10.1]]]) }, coords={ 'lon': [12.], 'lat': [50.], 'time': [pd.to_datetime('2014-09-06')], }, attrs={ 'history': 'a b c' }) return ds def empty_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)), 'precipitation': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)) }, coords={ 'lon': np.ndarray(shape=(0,), dtype=np.float32), 'lat': np.ndarray(shape=(0,), dtype=np.float32), 'time': np.ndarray(shape=(0,), dtype=np.datetime64), }, attrs={ 'history': 'a b c' }) return ds def data_frame_op() -> pd.DataFrame: data = {'A': [1, 2, 3, np.nan, 4, 9, np.nan, np.nan, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, 5, 5, 9, 1, 2, 7, 6]} time = pd.date_range('2000-01-01', freq='MS', periods=12) return pd.DataFrame(data=data, index=time, dtype=float, columns=['A', 'B']) def scalar_data_frame_op() -> pd.DataFrame: data = {'A': [1.3], 'B': [5.9]} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def empty_data_frame_op() -> pd.DataFrame: data = {'A': [], 'B': []} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': ['A', 'B', 'C'], 'lat': [45, 46, 47.5], 'lon': [-120, -121.2, -122.9]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def scalar_geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': [2000 * 'A'], 'lat': [45], 'lon': [-120]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def int_op() -> int: return 394852 def str_op() -> str: return 'Hi!' from cate.core.op import OP_REGISTRY try: OP_REGISTRY.add_op(dataset_op) OP_REGISTRY.add_op(data_frame_op) OP_REGISTRY.add_op(geo_data_frame_op) OP_REGISTRY.add_op(scalar_dataset_op) OP_REGISTRY.add_op(scalar_data_frame_op) OP_REGISTRY.add_op(scalar_geo_data_frame_op) OP_REGISTRY.add_op(empty_dataset_op) OP_REGISTRY.add_op(empty_data_frame_op) OP_REGISTRY.add_op(int_op) OP_REGISTRY.add_op(str_op) workflow = Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!'))) workflow.add_step(OpStep(dataset_op, node_id='ds')) workflow.add_step(OpStep(data_frame_op, node_id='df')) workflow.add_step(OpStep(geo_data_frame_op, node_id='gdf')) workflow.add_step(OpStep(scalar_dataset_op, node_id='scalar_ds')) workflow.add_step(OpStep(scalar_data_frame_op, node_id='scalar_df')) workflow.add_step(OpStep(scalar_geo_data_frame_op, node_id='scalar_gdf')) workflow.add_step(OpStep(empty_dataset_op, node_id='empty_ds')) workflow.add_step(OpStep(empty_data_frame_op, node_id='empty_df')) workflow.add_step(OpStep(int_op, node_id='i')) workflow.add_step(OpStep(str_op, node_id='s')) ws = Workspace('/path', workflow) ws.execute_workflow() d_ws = ws.to_json_dict() # import pprint # pprint.pprint(d_ws) d_wf = d_ws.get('workflow') self.assertIsNotNone(d_wf) l_res = d_ws.get('resources') self.assertIsNotNone(l_res) self.assertEqual(len(l_res), 10) res_ds = l_res[0] self.assertEqual(res_ds.get('name'), 'ds') self.assertEqual(res_ds.get('dataType'), 'xarray.core.dataset.Dataset') self.assertEqual(res_ds.get('dimSizes'), dict(lat=2, lon=2, time=5)) self.assertEqual(res_ds.get('attributes'), {'history': 'a b c'}) res_ds_vars = res_ds.get('variables') self.assertIsNotNone(res_ds_vars) self.assertEqual(len(res_ds_vars), 2) res_ds_var_1 = res_ds_vars[0] self.assertEqual(res_ds_var_1.get('name'), 'precipitation') self.assertEqual(res_ds_var_1.get('dataType'), 'float64') self.assertEqual(res_ds_var_1.get('numDims'), 3) self.assertEqual(res_ds_var_1.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_1.get('chunkSizes'), None) self.assertEqual(res_ds_var_1.get('isYFlipped'), True) self.assertEqual(res_ds_var_1.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_1.get('attributes'), dict(x=True, comment='wet', _FillValue=-1.)) res_ds_var_2 = res_ds_vars[1] self.assertEqual(res_ds_var_2.get('name'), 'temperature') self.assertEqual(res_ds_var_2.get('dataType'), 'float64') self.assertEqual(res_ds_var_2.get('numDims'), 3) self.assertEqual(res_ds_var_2.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_2.get('chunkSizes'), None) self.assertEqual(res_ds_var_2.get('isYFlipped'), True) self.assertEqual(res_ds_var_2.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_2.get('attributes'), dict(a=[1, 2, 3], comment='hot', _FillValue=np.nan)) res_df = l_res[1] self.assertEqual(res_df.get('name'), 'df') self.assertEqual(res_df.get('dataType'), 'pandas.core.frame.DataFrame') self.assertEqual(res_df.get('attributes'), {'num_rows': 12, 'num_columns': 2}) res_df_vars = res_df.get('variables') self.assertIsNotNone(res_df_vars) self.assertEqual(len(res_df_vars), 2) res_df_var_1 = res_df_vars[0] self.assertEqual(res_df_var_1.get('name'), 'A') self.assertEqual(res_df_var_1.get('dataType'), 'float64') self.assertEqual(res_df_var_1.get('numDims'), 1) self.assertEqual(res_df_var_1.get('shape'), (12,)) self.assertEqual(res_df_var_1.get('isYFlipped'), None) self.assertEqual(res_df_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_1.get('attributes')) res_df_var_2 = res_df_vars[1] self.assertEqual(res_df_var_2.get('name'), 'B') self.assertEqual(res_df_var_2.get('dataType'), 'float64') self.assertEqual(res_df_var_2.get('numDims'), 1) self.assertEqual(res_df_var_2.get('shape'), (12,)) self.assertEqual(res_df_var_2.get('isYFlipped'), None) self.assertEqual(res_df_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_2.get('attributes')) res_gdf = l_res[2] self.assertEqual(res_gdf.get('name'), 'gdf') self.assertEqual(res_gdf.get('dataType'), 'geopandas.geodataframe.GeoDataFrame') self.assertEqual(res_gdf.get('attributes'), {'num_rows': 3, 'num_columns': 4, 'geom_type': 'Point'}) res_gdf_vars = res_gdf.get('variables') self.assertIsNotNone(res_gdf_vars) self.assertEqual(len(res_gdf_vars), 4) res_gdf_var_1 = res_gdf_vars[0] self.assertEqual(res_gdf_var_1.get('name'), 'name') self.assertEqual(res_gdf_var_1.get('dataType'), 'object') self.assertEqual(res_gdf_var_1.get('numDims'), 1) self.assertEqual(res_gdf_var_1.get('shape'), (3,)) self.assertEqual(res_gdf_var_1.get('isYFlipped'), None) self.assertEqual(res_gdf_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_1.get('attributes')) res_gdf_var_2 = res_gdf_vars[1] self.assertEqual(res_gdf_var_2.get('name'), 'lat') self.assertEqual(res_gdf_var_2.get('dataType'), 'float64') self.assertEqual(res_gdf_var_2.get('numDims'), 1) self.assertEqual(res_gdf_var_2.get('shape'), (3,)) self.assertEqual(res_gdf_var_2.get('isYFlipped'), None) self.assertEqual(res_gdf_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_2.get('attributes')) res_gdf_var_3 = res_gdf_vars[2] self.assertEqual(res_gdf_var_3.get('name'), 'lon') self.assertEqual(res_gdf_var_3.get('dataType'), 'float64') self.assertEqual(res_gdf_var_3.get('numDims'), 1) self.assertEqual(res_gdf_var_3.get('shape'), (3,)) self.assertEqual(res_gdf_var_3.get('isYFlipped'), None) self.assertEqual(res_gdf_var_3.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_3.get('attributes')) res_gdf_var_4 = res_gdf_vars[3] self.assertEqual(res_gdf_var_4.get('name'), 'geometry') self.assertEqual(res_gdf_var_4.get('dataType'), 'geometry') self.assertEqual(res_gdf_var_4.get('numDims'), 1) self.assertEqual(res_gdf_var_4.get('shape'), (3,)) self.assertEqual(res_gdf_var_4.get('isYFlipped'), None) self.assertEqual(res_gdf_var_4.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_4.get('attributes')) res_scalar_ds = l_res[3] res_scalar_ds_vars = res_scalar_ds.get('variables') self.assertIsNotNone(res_scalar_ds_vars) self.assertEqual(len(res_scalar_ds_vars), 2) scalar_values = {res_scalar_ds_vars[0].get('name'): res_scalar_ds_vars[0].get('value'), res_scalar_ds_vars[1].get('name'): res_scalar_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': 15.2, 'precipitation': 10.1}) res_scalar_df = l_res[4] res_scalar_df_vars = res_scalar_df.get('variables') self.assertIsNotNone(res_scalar_df_vars) self.assertEqual(len(res_scalar_df_vars), 2) scalar_values = {res_scalar_df_vars[0].get('name'): res_scalar_df_vars[0].get('value'), res_scalar_df_vars[1].get('name'): res_scalar_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': 1.3, 'B': 5.9}) res_scalar_gdf = l_res[5] res_scalar_gdf_vars = res_scalar_gdf.get('variables') self.assertIsNotNone(res_scalar_gdf_vars) self.assertEqual(len(res_scalar_gdf_vars), 4) scalar_values = {res_scalar_gdf_vars[0].get('name'): res_scalar_gdf_vars[0].get('value'), res_scalar_gdf_vars[1].get('name'): res_scalar_gdf_vars[1].get('value'), res_scalar_gdf_vars[2].get('name'): res_scalar_gdf_vars[2].get('value'), res_scalar_gdf_vars[3].get('name'): res_scalar_gdf_vars[3].get('value')} self.assertEqual(scalar_values, {'name': (1000 * 'A') + '...', 'lat': 45, 'lon': -120, 'geometry': 'POINT (-120 45)'}) res_empty_ds = l_res[6] res_empty_ds_vars = res_empty_ds.get('variables') self.assertIsNotNone(res_empty_ds_vars) self.assertEqual(len(res_empty_ds_vars), 2) scalar_values = {res_empty_ds_vars[0].get('name'): res_empty_ds_vars[0].get('value'), res_empty_ds_vars[1].get('name'): res_empty_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': None, 'precipitation': None}) res_empty_df = l_res[7] res_empty_df_vars = res_empty_df.get('variables') self.assertIsNotNone(res_empty_df_vars) self.assertEqual(len(res_empty_df_vars), 2) scalar_values = {res_empty_df_vars[0].get('name'): res_empty_df_vars[0].get('value'), res_empty_df_vars[1].get('name'): res_empty_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': None, 'B': None}) res_int = l_res[8] self.assertEqual(res_int.get('name'), 'i') self.assertEqual(res_int.get('dataType'), 'int') self.assertIsNone(res_int.get('attributes')) self.assertIsNone(res_int.get('variables')) res_str = l_res[9] self.assertEqual(res_str.get('name'), 's') self.assertEqual(res_str.get('dataType'), 'str') self.assertIsNone(res_str.get('attributes')) self.assertIsNone(res_str.get('variables')) finally: OP_REGISTRY.remove_op(dataset_op) OP_REGISTRY.remove_op(data_frame_op) OP_REGISTRY.remove_op(geo_data_frame_op) OP_REGISTRY.remove_op(scalar_dataset_op) OP_REGISTRY.remove_op(scalar_data_frame_op) OP_REGISTRY.remove_op(scalar_geo_data_frame_op) OP_REGISTRY.remove_op(empty_dataset_op) OP_REGISTRY.remove_op(empty_data_frame_op) OP_REGISTRY.remove_op(int_op) OP_REGISTRY.remove_op(str_op)
def write_op(self): return OP_REGISTRY.get_op('write_netcdf4')
def test_to_json_dict(self): def dataset_op() -> xr.Dataset: periods = 5 temperature_data = (15 + 8 * np.random.randn(periods, 2, 2)).round(decimals=1) temperature_attrs = {'a': np.array([1, 2, 3]), 'comment': 'hot', '_FillValue': np.nan} precipitation_data = (10 * np.random.rand(periods, 2, 2)).round(decimals=1) precipitation_attrs = {'x': True, 'comment': 'wet', '_FillValue': -1.0} ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), temperature_data, temperature_attrs), 'precipitation': (('time', 'lat', 'lon'), precipitation_data, precipitation_attrs) }, coords={ 'lon': np.array([12, 13]), 'lat': np.array([50, 51]), 'time': pd.date_range('2014-09-06', periods=periods) }, attrs={ 'history': 'a b c' }) return ds def scalar_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), [[[15.2]]]), 'precipitation': (('time', 'lat', 'lon'), [[[10.1]]]) }, coords={ 'lon': [12.], 'lat': [50.], 'time': [pd.to_datetime('2014-09-06')], }, attrs={ 'history': 'a b c' }) return ds def empty_dataset_op() -> xr.Dataset: ds = xr.Dataset( data_vars={ 'temperature': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)), 'precipitation': (('time', 'lat', 'lon'), np.ndarray(shape=(0, 0, 0), dtype=np.float32)) }, coords={ 'lon': np.ndarray(shape=(0,), dtype=np.float32), 'lat': np.ndarray(shape=(0,), dtype=np.float32), 'time': np.ndarray(shape=(0,), dtype=np.datetime64), }, attrs={ 'history': 'a b c' }) return ds def data_frame_op() -> pd.DataFrame: data = {'A': [1, 2, 3, np.nan, 4, 9, np.nan, np.nan, 1, 0, 4, 6], 'B': [5, 6, 8, 7, 5, 5, 5, 9, 1, 2, 7, 6]} time = pd.date_range('2000-01-01', freq='MS', periods=12) return pd.DataFrame(data=data, index=time, dtype=float, columns=['A', 'B']) def scalar_data_frame_op() -> pd.DataFrame: data = {'A': [1.3], 'B': [5.9]} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def empty_data_frame_op() -> pd.DataFrame: data = {'A': [], 'B': []} return pd.DataFrame(data=data, dtype=float, columns=['A', 'B']) def geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': ['A', 'B', 'C'], 'lat': [45, 46, 47.5], 'lon': [-120, -121.2, -122.9]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def scalar_geo_data_frame_op() -> gpd.GeoDataFrame: data = {'name': [2000 * 'A'], 'lat': [45], 'lon': [-120]} df = pd.DataFrame(data, columns=['name', 'lat', 'lon']) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] return gpd.GeoDataFrame(df, geometry=geometry) def int_op() -> int: return 394852 def str_op() -> str: return 'Hi!' from cate.core.op import OP_REGISTRY try: OP_REGISTRY.add_op(dataset_op) OP_REGISTRY.add_op(data_frame_op) OP_REGISTRY.add_op(geo_data_frame_op) OP_REGISTRY.add_op(scalar_dataset_op) OP_REGISTRY.add_op(scalar_data_frame_op) OP_REGISTRY.add_op(scalar_geo_data_frame_op) OP_REGISTRY.add_op(empty_dataset_op) OP_REGISTRY.add_op(empty_data_frame_op) OP_REGISTRY.add_op(int_op) OP_REGISTRY.add_op(str_op) workflow = Workflow(OpMetaInfo('workspace_workflow', header=dict(description='Test!'))) workflow.add_step(OpStep(dataset_op, node_id='ds')) workflow.add_step(OpStep(data_frame_op, node_id='df')) workflow.add_step(OpStep(geo_data_frame_op, node_id='gdf')) workflow.add_step(OpStep(scalar_dataset_op, node_id='scalar_ds')) workflow.add_step(OpStep(scalar_data_frame_op, node_id='scalar_df')) workflow.add_step(OpStep(scalar_geo_data_frame_op, node_id='scalar_gdf')) workflow.add_step(OpStep(empty_dataset_op, node_id='empty_ds')) workflow.add_step(OpStep(empty_data_frame_op, node_id='empty_df')) workflow.add_step(OpStep(int_op, node_id='i')) workflow.add_step(OpStep(str_op, node_id='s')) ws = Workspace('/path', workflow) ws.execute_workflow() d_ws = ws.to_json_dict() # import pprint # pprint.pprint(d_ws) d_wf = d_ws.get('workflow') self.assertIsNotNone(d_wf) l_res = d_ws.get('resources') self.assertIsNotNone(l_res) self.assertEqual(len(l_res), 10) res_ds = l_res[0] self.assertEqual(res_ds.get('name'), 'ds') self.assertEqual(res_ds.get('dataType'), 'xarray.core.dataset.Dataset') self.assertEqual(res_ds.get('dimSizes'), dict(lat=2, lon=2, time=5)) self.assertEqual(res_ds.get('attributes'), {'history': 'a b c'}) res_ds_vars = res_ds.get('variables') self.assertIsNotNone(res_ds_vars) self.assertEqual(len(res_ds_vars), 2) res_ds_var_1 = res_ds_vars[0] self.assertEqual(res_ds_var_1.get('name'), 'precipitation') self.assertEqual(res_ds_var_1.get('dataType'), 'float64') self.assertEqual(res_ds_var_1.get('numDims'), 3) self.assertEqual(res_ds_var_1.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_1.get('chunkSizes'), None) self.assertEqual(res_ds_var_1.get('isYFlipped'), True) self.assertEqual(res_ds_var_1.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_1.get('attributes'), dict(x=True, comment='wet', _FillValue=-1.)) res_ds_var_2 = res_ds_vars[1] self.assertEqual(res_ds_var_2.get('name'), 'temperature') self.assertEqual(res_ds_var_2.get('dataType'), 'float64') self.assertEqual(res_ds_var_2.get('numDims'), 3) self.assertEqual(res_ds_var_2.get('shape'), (5, 2, 2)) self.assertEqual(res_ds_var_2.get('chunkSizes'), None) self.assertEqual(res_ds_var_2.get('isYFlipped'), True) self.assertEqual(res_ds_var_2.get('isFeatureAttribute'), None) self.assertEqual(res_ds_var_2.get('attributes'), dict(a=[1, 2, 3], comment='hot', _FillValue=np.nan)) res_df = l_res[1] self.assertEqual(res_df.get('name'), 'df') self.assertEqual(res_df.get('dataType'), 'pandas.core.frame.DataFrame') self.assertEqual(res_df.get('attributes'), {'num_rows': 12, 'num_columns': 2}) res_df_vars = res_df.get('variables') self.assertIsNotNone(res_df_vars) self.assertEqual(len(res_df_vars), 2) res_df_var_1 = res_df_vars[0] self.assertEqual(res_df_var_1.get('name'), 'A') self.assertEqual(res_df_var_1.get('dataType'), 'float64') self.assertEqual(res_df_var_1.get('numDims'), 1) self.assertEqual(res_df_var_1.get('shape'), (12,)) self.assertEqual(res_df_var_1.get('isYFlipped'), None) self.assertEqual(res_df_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_1.get('attributes')) res_df_var_2 = res_df_vars[1] self.assertEqual(res_df_var_2.get('name'), 'B') self.assertEqual(res_df_var_2.get('dataType'), 'float64') self.assertEqual(res_df_var_2.get('numDims'), 1) self.assertEqual(res_df_var_2.get('shape'), (12,)) self.assertEqual(res_df_var_2.get('isYFlipped'), None) self.assertEqual(res_df_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_df_var_2.get('attributes')) res_gdf = l_res[2] self.assertEqual(res_gdf.get('name'), 'gdf') self.assertEqual(res_gdf.get('dataType'), 'geopandas.geodataframe.GeoDataFrame') self.assertEqual(res_gdf.get('attributes'), {'num_rows': 3, 'num_columns': 4, 'geom_type': 'Point'}) res_gdf_vars = res_gdf.get('variables') self.assertIsNotNone(res_gdf_vars) self.assertEqual(len(res_gdf_vars), 4) res_gdf_var_1 = res_gdf_vars[0] self.assertEqual(res_gdf_var_1.get('name'), 'name') self.assertEqual(res_gdf_var_1.get('dataType'), 'object') self.assertEqual(res_gdf_var_1.get('numDims'), 1) self.assertEqual(res_gdf_var_1.get('shape'), (3,)) self.assertEqual(res_gdf_var_1.get('isYFlipped'), None) self.assertEqual(res_gdf_var_1.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_1.get('attributes')) res_gdf_var_2 = res_gdf_vars[1] self.assertEqual(res_gdf_var_2.get('name'), 'lat') self.assertEqual(res_gdf_var_2.get('dataType'), 'float64') self.assertEqual(res_gdf_var_2.get('numDims'), 1) self.assertEqual(res_gdf_var_2.get('shape'), (3,)) self.assertEqual(res_gdf_var_2.get('isYFlipped'), None) self.assertEqual(res_gdf_var_2.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_2.get('attributes')) res_gdf_var_3 = res_gdf_vars[2] self.assertEqual(res_gdf_var_3.get('name'), 'lon') self.assertEqual(res_gdf_var_3.get('dataType'), 'float64') self.assertEqual(res_gdf_var_3.get('numDims'), 1) self.assertEqual(res_gdf_var_3.get('shape'), (3,)) self.assertEqual(res_gdf_var_3.get('isYFlipped'), None) self.assertEqual(res_gdf_var_3.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_3.get('attributes')) res_gdf_var_4 = res_gdf_vars[3] self.assertEqual(res_gdf_var_4.get('name'), 'geometry') self.assertEqual(res_gdf_var_4.get('dataType'), 'object') self.assertEqual(res_gdf_var_4.get('numDims'), 1) self.assertEqual(res_gdf_var_4.get('shape'), (3,)) self.assertEqual(res_gdf_var_4.get('isYFlipped'), None) self.assertEqual(res_gdf_var_4.get('isFeatureAttribute'), True) self.assertIsNone(res_gdf_var_4.get('attributes')) res_scalar_ds = l_res[3] res_scalar_ds_vars = res_scalar_ds.get('variables') self.assertIsNotNone(res_scalar_ds_vars) self.assertEqual(len(res_scalar_ds_vars), 2) scalar_values = {res_scalar_ds_vars[0].get('name'): res_scalar_ds_vars[0].get('value'), res_scalar_ds_vars[1].get('name'): res_scalar_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': 15.2, 'precipitation': 10.1}) res_scalar_df = l_res[4] res_scalar_df_vars = res_scalar_df.get('variables') self.assertIsNotNone(res_scalar_df_vars) self.assertEqual(len(res_scalar_df_vars), 2) scalar_values = {res_scalar_df_vars[0].get('name'): res_scalar_df_vars[0].get('value'), res_scalar_df_vars[1].get('name'): res_scalar_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': 1.3, 'B': 5.9}) res_scalar_gdf = l_res[5] res_scalar_gdf_vars = res_scalar_gdf.get('variables') self.assertIsNotNone(res_scalar_gdf_vars) self.assertEqual(len(res_scalar_gdf_vars), 4) scalar_values = {res_scalar_gdf_vars[0].get('name'): res_scalar_gdf_vars[0].get('value'), res_scalar_gdf_vars[1].get('name'): res_scalar_gdf_vars[1].get('value'), res_scalar_gdf_vars[2].get('name'): res_scalar_gdf_vars[2].get('value'), res_scalar_gdf_vars[3].get('name'): res_scalar_gdf_vars[3].get('value')} self.assertEqual(scalar_values, {'name': (1000 * 'A') + '...', 'lat': 45, 'lon': -120, 'geometry': 'POINT (-120 45)'}) res_empty_ds = l_res[6] res_empty_ds_vars = res_empty_ds.get('variables') self.assertIsNotNone(res_empty_ds_vars) self.assertEqual(len(res_empty_ds_vars), 2) scalar_values = {res_empty_ds_vars[0].get('name'): res_empty_ds_vars[0].get('value'), res_empty_ds_vars[1].get('name'): res_empty_ds_vars[1].get('value')} self.assertEqual(scalar_values, {'temperature': None, 'precipitation': None}) res_empty_df = l_res[7] res_empty_df_vars = res_empty_df.get('variables') self.assertIsNotNone(res_empty_df_vars) self.assertEqual(len(res_empty_df_vars), 2) scalar_values = {res_empty_df_vars[0].get('name'): res_empty_df_vars[0].get('value'), res_empty_df_vars[1].get('name'): res_empty_df_vars[1].get('value')} self.assertEqual(scalar_values, {'A': None, 'B': None}) res_int = l_res[8] self.assertEqual(res_int.get('name'), 'i') self.assertEqual(res_int.get('dataType'), 'int') self.assertIsNone(res_int.get('attributes')) self.assertIsNone(res_int.get('variables')) res_str = l_res[9] self.assertEqual(res_str.get('name'), 's') self.assertEqual(res_str.get('dataType'), 'str') self.assertIsNone(res_str.get('attributes')) self.assertIsNone(res_str.get('variables')) finally: OP_REGISTRY.remove_op(dataset_op) OP_REGISTRY.remove_op(data_frame_op) OP_REGISTRY.remove_op(geo_data_frame_op) OP_REGISTRY.remove_op(scalar_dataset_op) OP_REGISTRY.remove_op(scalar_data_frame_op) OP_REGISTRY.remove_op(scalar_geo_data_frame_op) OP_REGISTRY.remove_op(empty_dataset_op) OP_REGISTRY.remove_op(empty_data_frame_op) OP_REGISTRY.remove_op(int_op) OP_REGISTRY.remove_op(str_op)