def test_multi_column_assignment_failure(na, nt): # Test multi-column assignment failure with too few/many columns config = {'tables': { 'multi_column_test_table': { 'a,b,c': Call(lambda nrows, ncols: np.ones((nrows, ncols)), [7, na]), 'd,e,f': Call(lambda nrows, ncols: (np.ones(nrows),) * ncols, [7, nt])}}} pipeline = Pipeline(config) with pytest.raises(ValueError): pipeline.execute()
def test_multi_column_assignment(): # Test multi-column assignment from 2d arrays and tuples of 1d arrays config = {'tables': { 'multi_column_test_table': { 'a,b ,c, d': Call(lambda nrows, ncols: np.ones((nrows, ncols)), [7, 4]), 'e , f, g': Call(lambda nrows, ncols: (np.ones(nrows),) * ncols, [7, 3]), 'h': Call(list, [Ref('multi_column_test_table.a')]), 'i': Call(list, [Ref('multi_column_test_table.f')])}}} pipeline = Pipeline(config) pipeline.execute()
def test_hdf5(): size = 100 string = size*'a' config = {'tables': { 'test_table': { 'column1': Call(np.random.uniform, [], { 'size': size}), 'column2': Call(np.random.uniform, [], { 'low': Ref('test_table.column1')}), 'column3': Call(list, [string], {})}}} pipeline = Pipeline(config) pipeline.execute() pipeline.write('output.hdf5') hdf_table = read_table_hdf5('output.hdf5', 'tables/test_table', character_as_bytes=False) assert np.all(hdf_table == pipeline['test_table'])
def test_column_quantity(): # Regression test for pull request #356 # Previously Pipeline.__getitem__ would return column data from tables as # an astropy.table.Column object. However, most functions take either # numpy.ndarray or astropy.units.Quantity objects as arguments. As of # astropy version 4.1.0 Column does not support all of the same class # methods as Quantity e.g. to_value. This test ensures that column data in # a Pipeline is accessed as either an ndarray or Quantity (depending on # units). It also checks that functions using methods not supported by # Column can be called on column data inside a Pipeline. def value_in_cm(q): return q.to_value(unit='cm') config = { 'tables': { 'test_table': { 'lengths': Quantity(np.random.uniform(size=50), unit='m'), 'lengths_in_cm': Call(value_in_cm, [Ref('test_table.lengths')])}}} pipeline = Pipeline(config) pipeline.execute() assert isinstance(pipeline['test_table.lengths'], Quantity) assert isinstance(pipeline['test_table.lengths_in_cm'], np.ndarray) np.testing.assert_array_less(0, pipeline['test_table.lengths_in_cm']) np.testing.assert_array_less(pipeline['test_table.lengths_in_cm'], 100)
def test_logging(capsys, tmp_path): # Run skypy with default verbosity and check log is empty config_filename = get_pkg_data_filename('data/test_config.yml') output_filename = str(tmp_path / 'logging.fits') skypy.main([config_filename, output_filename]) out, err = capsys.readouterr() assert (not err) # Run again with increased verbosity and capture log. Force an exception by # not using the "--overwrite" flag when the output file already exists. with pytest.raises(SystemExit): skypy.main([config_filename, output_filename, '--verbose']) out, err = capsys.readouterr() # Determine all DAG jobs and function calls from config config = load_skypy_yaml(config_filename) cosmology = config.pop('cosmology', None) tables = config.pop('tables', {}) config.update({k: v.pop('.init', Call(Table)) for k, v in tables.items()}) columns = [f'{t}.{c}' for t, cols in tables.items() for c in cols] functions = [f for f in config.values() if isinstance(f, Call)] functions += [ f for t, cols in tables.items() for f in cols.values() if isinstance(f, Call) ] # Check all jobs appear in the log for job in list(config) + list(tables) + columns: log_string = f"[INFO] skypy.pipeline: Generating {job}" assert (log_string in err) # Check all functions appear in the log for f in functions: log_string = f"[INFO] skypy.pipeline: Calling {f.function.__name__}" assert (log_string in err) # Check cosmology appears in the log if cosmology: assert ("[INFO] skypy.pipeline: Setting cosmology" in err) # Check writing output file is in the log assert (f"[INFO] skypy: Writing {output_filename}" in err) # Check error for existing output file is in the log try: # New error message introduced in astropy PR #12179 from astropy.utils.misc import NOT_OVERWRITING_MSG error_string = NOT_OVERWRITING_MSG.format(output_filename) except ImportError: # Fallback on old error message from astropy v4.x error_string = f"[ERROR] skypy: File {output_filename!r} already exists." assert (error_string in err) # Run again with decreased verbosity and check the log is empty with pytest.raises(SystemExit): skypy.main([config_filename, output_filename, '-qq']) out, err = capsys.readouterr() assert (not err)
def test_pipeline_cosmology(): def return_cosmology(cosmology): return cosmology # Test pipeline correctly sets default cosmology from parameters # N.B. astropy cosmology class has not implemented __eq__ for comparison H0, Om0 = 70, 0.3 config = {'parameters': {'H0': H0, 'Om0': Om0}, 'cosmology': Call(FlatLambdaCDM, [Ref('H0'), Ref('Om0')]), 'test': Call(return_cosmology), } pipeline = Pipeline(config) pipeline.execute() assert pipeline['test'] is pipeline['cosmology'] # Test pipeline correctly updates cosmology from new parameters H0_new, Om0_new = 75, 0.25 pipeline.execute({'H0': H0_new, 'Om0': Om0_new}) assert pipeline['test'] is pipeline['cosmology']
def test_depends(): # Regression test for GitHub Issue #464 # Previously the .depends keyword was also being passed to functions as a # keyword argument. This was because Pipeline was executing Item.infer to # handle additional function arguments from context before handling # additional dependencies specified using the .depends keyword. The # .depends keyword is now handled first. config = {'tables': { 'table_1': { 'column1': Call(np.random.uniform, [0, 1, 10])}, 'table_2': { '.init': Call(vstack, [], { 'tables': [Ref('table_1')], '.depends': ['table_1.complete']})}}} pipeline = Pipeline(config) pipeline.execute() assert np.all(pipeline['table_1'] == pipeline['table_2'])
def test_logging(capsys): # Run skypy with default verbosity and check log is empty filename = get_pkg_data_filename('data/test_config.yml') output_file = 'logging.fits' skypy.main([filename, output_file]) out, err = capsys.readouterr() assert (not err) # Run again with increased verbosity and capture log. Force an exception by # not using the "--overwrite" flag when the output file already exists. with pytest.raises(SystemExit): skypy.main([filename, output_file, '--verbose']) out, err = capsys.readouterr() # Determine all DAG jobs and function calls from config config = load_skypy_yaml(filename) cosmology = config.pop('cosmology', None) tables = config.pop('tables', {}) config.update({k: v.pop('.init', Call(Table)) for k, v in tables.items()}) columns = [f'{t}.{c}' for t, cols in tables.items() for c in cols] functions = [f for f in config.values() if isinstance(f, Call)] functions += [ f for t, cols in tables.items() for f in cols.values() if isinstance(f, Call) ] # Check all jobs appear in the log for job in list(config) + list(tables) + columns: log_string = f"[INFO] skypy.pipeline: Generating {job}" assert (log_string in err) # Check all functions appear in the log for f in functions: log_string = f"[INFO] skypy.pipeline: Calling {f.function.__name__}" assert (log_string in err) # Check cosmology appears in the log if cosmology: assert ("[INFO] skypy.pipeline: Setting cosmology" in err) # Check writing output file is in the log assert (f"[INFO] skypy: Writing {output_file}" in err) # Check error for existing output file is in the log assert (f"[ERROR] skypy: File '{output_file}' already exists." in err) # Run again with decreased verbosity and check the log is empty with pytest.raises(SystemExit): skypy.main([filename, output_file, '-qq']) out, err = capsys.readouterr() assert (not err)
def test_pipeline(): # Evaluate and store the default astropy cosmology. config = {'test_cosmology': Call(default_cosmology.get)} pipeline = Pipeline(config) pipeline.execute() assert pipeline['test_cosmology'] == default_cosmology.get() # Generate a simple two column table with a dependency. Also write the # table to a fits file and check it's contents. size = 100 string = size*'a' config = {'tables': { 'test_table': { 'column1': Call(np.random.uniform, [], { 'size': size}), 'column2': Call(np.random.uniform, [], { 'low': Ref('test_table.column1')}), 'column3': Call(list, [string], {})}}} pipeline = Pipeline(config) pipeline.execute() output_filename = 'output.fits' pipeline.write(output_filename) assert len(pipeline['test_table']) == size assert np.all(pipeline['test_table.column1'] < pipeline['test_table.column2']) with fits.open(output_filename) as hdu: assert np.all(Table(hdu['test_table'].data) == pipeline['test_table']) # Test invalid file extension with pytest.raises(ValueError): pipeline.write('output.invalid') # Check for failure if output files already exist and overwrite is False pipeline = Pipeline(config) pipeline.execute() with pytest.raises(OSError): pipeline.write(output_filename, overwrite=False) # Check that the existing output files are modified if overwrite is True new_size = 2 * size new_string = new_size*'a' config['tables']['test_table']['column1'].kwargs = {'size': new_size} config['tables']['test_table']['column3'].args = [new_string] pipeline = Pipeline(config) pipeline.execute() pipeline.write(output_filename, overwrite=True) with fits.open(output_filename) as hdu: assert len(hdu[1].data) == new_size # Check for failure if 'column1' requires itself creating a cyclic # dependency graph config['tables']['test_table']['column1'] = Call(list, [Ref('test_table.column1')]) with pytest.raises(networkx.NetworkXUnfeasible): Pipeline(config).execute() # Check for failure if 'column1' and 'column2' both require each other # creating a cyclic dependency graph config['tables']['test_table']['column1'] = Call(list, [Ref('test_table.column2')]) with pytest.raises(networkx.NetworkXUnfeasible): Pipeline(config).execute() # Check for failure if 'column1' is removed from the config so that the # requirements for 'column2' are not satisfied. del config['tables']['test_table']['column1'] with pytest.raises(KeyError): Pipeline(config).execute() # Check variables intialised by value config = {'test_int': 1, 'test_float': 1.0, 'test_string': 'hello world', 'test_list': [0, 'one', 2.], 'test_dict': {'a': 'b'}} pipeline = Pipeline(config) pipeline.execute() assert isinstance(pipeline['test_int'], int) assert isinstance(pipeline['test_float'], float) assert isinstance(pipeline['test_string'], str) assert isinstance(pipeline['test_list'], list) assert isinstance(pipeline['test_dict'], dict) assert pipeline['test_int'] == 1 assert pipeline['test_float'] == 1.0 assert pipeline['test_string'] == 'hello world' assert pipeline['test_list'] == [0, 'one', 2.] assert pipeline['test_dict'] == {'a': 'b'} # Check variables intialised by function config = {'test_func': Call(list, ['hello world']), 'len_of_test_func': Call(len, [Ref('test_func')]), 'nested_references': Call(sum, [ [Ref('test_func'), [' '], Ref('test_func')], []]), 'nested_functions': Call(list, [Call(range, [Call(len, [Ref('test_func')])])])} pipeline = Pipeline(config) pipeline.execute() assert pipeline['test_func'] == list('hello world') assert pipeline['len_of_test_func'] == len('hello world') assert pipeline['nested_references'] == list('hello world hello world') assert pipeline['nested_functions'] == list(range(len('hello world'))) # Check parameter initialisation config = {'parameters': { 'param1': 1.0}} pipeline = Pipeline(config) pipeline.execute() assert pipeline['param1'] == 1.0 # Update parameter and re-run new_parameters = {'param1': 5.0} pipeline.execute(parameters=new_parameters) assert pipeline['param1'] == new_parameters['param1']
def test_call(): from skypy.pipeline import Pipeline from skypy.pipeline._items import Call, Ref # set up a mock pipeline pipeline = Pipeline({}) # function we will call def tester(arg1, arg2, *, kwarg1, kwarg2): return arg1, arg2, kwarg1, kwarg2 # invalid construction with pytest.raises(TypeError, match='function is not callable'): Call(None, [], {}) with pytest.raises(TypeError, match='args is not a sequence'): Call(tester, None, {}) with pytest.raises(TypeError, match='kwargs is not a mapping'): Call(tester, [], None) # good construction with no args or kwargs call = Call(tester, [], {}) # call has incomplete args with pytest.raises(TypeError, match=r'tester\(\)'): call.evaluate(pipeline) # good construction with arg1 and kwarg1 call = Call(tester, [1], {'kwarg1': 3}) # call still has incomplete args with pytest.raises(TypeError, match=r'tester\(\)'): call.evaluate(pipeline) # infer required arg2 and kwarg2 from context context = { 'arg2': 2, 'kwarg2': 4, } call.infer(context) # call should be evaluatable now result = call.evaluate(pipeline) assert result == (1, 2, 3, 4) # set up a call with references call = Call(tester, [Ref('var1'), 2], {'kwarg1': Ref('var3'), 'kwarg2': 4}) # set up a pipeline with variables and a call that references them pipeline = Pipeline({'var1': 1, 'var3': 3}) # check dependencies are resolved deps = call.depend(pipeline) assert deps == ['var1', 'var3'] # execute the pipeline (sets state) and evaluate the call pipeline.execute() result = call.evaluate(pipeline) assert result == (1, 2, 3, 4)