Beispiel #1
0
def test_multi_column_assignment_failure(na, nt):

    # Test multi-column assignment failure with too few/many columns
    config = {'tables': {
                'multi_column_test_table': {
                  'a,b,c': Call(lambda nrows, ncols: np.ones((nrows, ncols)), [7, na]),
                  'd,e,f': Call(lambda nrows, ncols: (np.ones(nrows),) * ncols, [7, nt])}}}

    pipeline = Pipeline(config)
    with pytest.raises(ValueError):
        pipeline.execute()
Beispiel #2
0
def test_multi_column_assignment():

    # Test multi-column assignment from 2d arrays and tuples of 1d arrays
    config = {'tables': {
                'multi_column_test_table': {
                  'a,b ,c, d': Call(lambda nrows, ncols: np.ones((nrows, ncols)), [7, 4]),
                  'e , f,  g': Call(lambda nrows, ncols: (np.ones(nrows),) * ncols, [7, 3]),
                  'h': Call(list, [Ref('multi_column_test_table.a')]),
                  'i': Call(list, [Ref('multi_column_test_table.f')])}}}

    pipeline = Pipeline(config)
    pipeline.execute()
Beispiel #3
0
def test_hdf5():
    size = 100
    string = size*'a'
    config = {'tables': {
              'test_table': {
                'column1': Call(np.random.uniform, [], {
                  'size': size}),
                'column2': Call(np.random.uniform, [], {
                  'low': Ref('test_table.column1')}),
                'column3': Call(list, [string], {})}}}

    pipeline = Pipeline(config)
    pipeline.execute()
    pipeline.write('output.hdf5')
    hdf_table = read_table_hdf5('output.hdf5', 'tables/test_table', character_as_bytes=False)
    assert np.all(hdf_table == pipeline['test_table'])
Beispiel #4
0
def test_column_quantity():

    # Regression test for pull request #356
    # Previously Pipeline.__getitem__ would return column data from tables as
    # an astropy.table.Column object. However, most functions take either
    # numpy.ndarray or astropy.units.Quantity objects as arguments. As of
    # astropy version 4.1.0 Column does not support all of the same class
    # methods as Quantity e.g. to_value. This test ensures that column data in
    # a Pipeline is accessed as either an ndarray or Quantity (depending on
    # units). It also checks that functions using methods not supported by
    # Column can be called on column data inside a Pipeline.

    def value_in_cm(q):
        return q.to_value(unit='cm')

    config = {
        'tables': {
            'test_table': {
                'lengths': Quantity(np.random.uniform(size=50), unit='m'),
                'lengths_in_cm': Call(value_in_cm, [Ref('test_table.lengths')])}}}

    pipeline = Pipeline(config)
    pipeline.execute()

    assert isinstance(pipeline['test_table.lengths'], Quantity)
    assert isinstance(pipeline['test_table.lengths_in_cm'], np.ndarray)
    np.testing.assert_array_less(0, pipeline['test_table.lengths_in_cm'])
    np.testing.assert_array_less(pipeline['test_table.lengths_in_cm'], 100)
Beispiel #5
0
def test_logging(capsys, tmp_path):

    # Run skypy with default verbosity and check log is empty
    config_filename = get_pkg_data_filename('data/test_config.yml')
    output_filename = str(tmp_path / 'logging.fits')
    skypy.main([config_filename, output_filename])
    out, err = capsys.readouterr()
    assert (not err)

    # Run again with increased verbosity and capture log. Force an exception by
    # not using the "--overwrite" flag when the output file already exists.
    with pytest.raises(SystemExit):
        skypy.main([config_filename, output_filename, '--verbose'])
    out, err = capsys.readouterr()

    # Determine all DAG jobs and function calls from config
    config = load_skypy_yaml(config_filename)
    cosmology = config.pop('cosmology', None)
    tables = config.pop('tables', {})
    config.update({k: v.pop('.init', Call(Table)) for k, v in tables.items()})
    columns = [f'{t}.{c}' for t, cols in tables.items() for c in cols]
    functions = [f for f in config.values() if isinstance(f, Call)]
    functions += [
        f for t, cols in tables.items() for f in cols.values()
        if isinstance(f, Call)
    ]

    # Check all jobs appear in the log
    for job in list(config) + list(tables) + columns:
        log_string = f"[INFO] skypy.pipeline: Generating {job}"
        assert (log_string in err)

    # Check all functions appear in the log
    for f in functions:
        log_string = f"[INFO] skypy.pipeline: Calling {f.function.__name__}"
        assert (log_string in err)

    # Check cosmology appears in the log
    if cosmology:
        assert ("[INFO] skypy.pipeline: Setting cosmology" in err)

    # Check writing output file is in the log
    assert (f"[INFO] skypy: Writing {output_filename}" in err)

    # Check error for existing output file is in the log
    try:
        # New error message introduced in astropy PR #12179
        from astropy.utils.misc import NOT_OVERWRITING_MSG
        error_string = NOT_OVERWRITING_MSG.format(output_filename)
    except ImportError:
        # Fallback on old error message from astropy v4.x
        error_string = f"[ERROR] skypy: File {output_filename!r} already exists."
    assert (error_string in err)

    # Run again with decreased verbosity and check the log is empty
    with pytest.raises(SystemExit):
        skypy.main([config_filename, output_filename, '-qq'])
    out, err = capsys.readouterr()
    assert (not err)
Beispiel #6
0
def test_pipeline_cosmology():

    def return_cosmology(cosmology):
        return cosmology

    # Test pipeline correctly sets default cosmology from parameters
    # N.B. astropy cosmology class has not implemented __eq__ for comparison
    H0, Om0 = 70, 0.3
    config = {'parameters': {'H0': H0, 'Om0': Om0},
              'cosmology': Call(FlatLambdaCDM, [Ref('H0'), Ref('Om0')]),
              'test': Call(return_cosmology),
              }
    pipeline = Pipeline(config)
    pipeline.execute()
    assert pipeline['test'] is pipeline['cosmology']

    # Test pipeline correctly updates cosmology from new parameters
    H0_new, Om0_new = 75, 0.25
    pipeline.execute({'H0': H0_new, 'Om0': Om0_new})
    assert pipeline['test'] is pipeline['cosmology']
Beispiel #7
0
def test_depends():

    # Regression test for GitHub Issue #464
    # Previously the .depends keyword was also being passed to functions as a
    # keyword argument. This was because Pipeline was executing Item.infer to
    # handle additional function arguments from context before handling
    # additional dependencies specified using the .depends keyword. The
    # .depends keyword is now handled first.

    config = {'tables': {
                'table_1': {
                  'column1': Call(np.random.uniform, [0, 1, 10])},
                'table_2': {
                    '.init': Call(vstack, [], {
                      'tables': [Ref('table_1')],
                      '.depends': ['table_1.complete']})}}}

    pipeline = Pipeline(config)
    pipeline.execute()
    assert np.all(pipeline['table_1'] == pipeline['table_2'])
Beispiel #8
0
def test_logging(capsys):

    # Run skypy with default verbosity and check log is empty
    filename = get_pkg_data_filename('data/test_config.yml')
    output_file = 'logging.fits'
    skypy.main([filename, output_file])
    out, err = capsys.readouterr()
    assert (not err)

    # Run again with increased verbosity and capture log. Force an exception by
    # not using the "--overwrite" flag when the output file already exists.
    with pytest.raises(SystemExit):
        skypy.main([filename, output_file, '--verbose'])
    out, err = capsys.readouterr()

    # Determine all DAG jobs and function calls from config
    config = load_skypy_yaml(filename)
    cosmology = config.pop('cosmology', None)
    tables = config.pop('tables', {})
    config.update({k: v.pop('.init', Call(Table)) for k, v in tables.items()})
    columns = [f'{t}.{c}' for t, cols in tables.items() for c in cols]
    functions = [f for f in config.values() if isinstance(f, Call)]
    functions += [
        f for t, cols in tables.items() for f in cols.values()
        if isinstance(f, Call)
    ]

    # Check all jobs appear in the log
    for job in list(config) + list(tables) + columns:
        log_string = f"[INFO] skypy.pipeline: Generating {job}"
        assert (log_string in err)

    # Check all functions appear in the log
    for f in functions:
        log_string = f"[INFO] skypy.pipeline: Calling {f.function.__name__}"
        assert (log_string in err)

    # Check cosmology appears in the log
    if cosmology:
        assert ("[INFO] skypy.pipeline: Setting cosmology" in err)

    # Check writing output file is in the log
    assert (f"[INFO] skypy: Writing {output_file}" in err)

    # Check error for existing output file is in the log
    assert (f"[ERROR] skypy: File '{output_file}' already exists." in err)

    # Run again with decreased verbosity and check the log is empty
    with pytest.raises(SystemExit):
        skypy.main([filename, output_file, '-qq'])
    out, err = capsys.readouterr()
    assert (not err)
Beispiel #9
0
def test_pipeline():

    # Evaluate and store the default astropy cosmology.
    config = {'test_cosmology': Call(default_cosmology.get)}

    pipeline = Pipeline(config)
    pipeline.execute()
    assert pipeline['test_cosmology'] == default_cosmology.get()

    # Generate a simple two column table with a dependency. Also write the
    # table to a fits file and check it's contents.
    size = 100
    string = size*'a'
    config = {'tables': {
                'test_table': {
                  'column1': Call(np.random.uniform, [], {
                      'size': size}),
                  'column2': Call(np.random.uniform, [], {
                      'low': Ref('test_table.column1')}),
                  'column3': Call(list, [string], {})}}}

    pipeline = Pipeline(config)
    pipeline.execute()
    output_filename = 'output.fits'
    pipeline.write(output_filename)
    assert len(pipeline['test_table']) == size
    assert np.all(pipeline['test_table.column1'] < pipeline['test_table.column2'])
    with fits.open(output_filename) as hdu:
        assert np.all(Table(hdu['test_table'].data) == pipeline['test_table'])

    # Test invalid file extension
    with pytest.raises(ValueError):
        pipeline.write('output.invalid')

    # Check for failure if output files already exist and overwrite is False
    pipeline = Pipeline(config)
    pipeline.execute()
    with pytest.raises(OSError):
        pipeline.write(output_filename, overwrite=False)

    # Check that the existing output files are modified if overwrite is True
    new_size = 2 * size
    new_string = new_size*'a'
    config['tables']['test_table']['column1'].kwargs = {'size': new_size}
    config['tables']['test_table']['column3'].args = [new_string]
    pipeline = Pipeline(config)
    pipeline.execute()
    pipeline.write(output_filename, overwrite=True)
    with fits.open(output_filename) as hdu:
        assert len(hdu[1].data) == new_size

    # Check for failure if 'column1' requires itself creating a cyclic
    # dependency graph
    config['tables']['test_table']['column1'] = Call(list, [Ref('test_table.column1')])
    with pytest.raises(networkx.NetworkXUnfeasible):
        Pipeline(config).execute()

    # Check for failure if 'column1' and 'column2' both require each other
    # creating a cyclic dependency graph
    config['tables']['test_table']['column1'] = Call(list, [Ref('test_table.column2')])
    with pytest.raises(networkx.NetworkXUnfeasible):
        Pipeline(config).execute()

    # Check for failure if 'column1' is removed from the config so that the
    # requirements for 'column2' are not satisfied.
    del config['tables']['test_table']['column1']
    with pytest.raises(KeyError):
        Pipeline(config).execute()

    # Check variables intialised by value
    config = {'test_int': 1,
              'test_float': 1.0,
              'test_string': 'hello world',
              'test_list': [0, 'one', 2.],
              'test_dict': {'a': 'b'}}
    pipeline = Pipeline(config)
    pipeline.execute()
    assert isinstance(pipeline['test_int'], int)
    assert isinstance(pipeline['test_float'], float)
    assert isinstance(pipeline['test_string'], str)
    assert isinstance(pipeline['test_list'], list)
    assert isinstance(pipeline['test_dict'], dict)
    assert pipeline['test_int'] == 1
    assert pipeline['test_float'] == 1.0
    assert pipeline['test_string'] == 'hello world'
    assert pipeline['test_list'] == [0, 'one', 2.]
    assert pipeline['test_dict'] == {'a': 'b'}

    # Check variables intialised by function
    config = {'test_func': Call(list, ['hello world']),
              'len_of_test_func': Call(len, [Ref('test_func')]),
              'nested_references': Call(sum, [
                [Ref('test_func'), [' '], Ref('test_func')], []]),
              'nested_functions': Call(list, [Call(range, [Call(len, [Ref('test_func')])])])}
    pipeline = Pipeline(config)
    pipeline.execute()
    assert pipeline['test_func'] == list('hello world')
    assert pipeline['len_of_test_func'] == len('hello world')
    assert pipeline['nested_references'] == list('hello world hello world')
    assert pipeline['nested_functions'] == list(range(len('hello world')))

    # Check parameter initialisation
    config = {'parameters': {
                'param1': 1.0}}
    pipeline = Pipeline(config)
    pipeline.execute()
    assert pipeline['param1'] == 1.0

    # Update parameter and re-run
    new_parameters = {'param1': 5.0}
    pipeline.execute(parameters=new_parameters)
    assert pipeline['param1'] == new_parameters['param1']
Beispiel #10
0
def test_call():
    from skypy.pipeline import Pipeline
    from skypy.pipeline._items import Call, Ref

    # set up a mock pipeline
    pipeline = Pipeline({})

    # function we will call
    def tester(arg1, arg2, *, kwarg1, kwarg2):
        return arg1, arg2, kwarg1, kwarg2

    # invalid construction
    with pytest.raises(TypeError, match='function is not callable'):
        Call(None, [], {})
    with pytest.raises(TypeError, match='args is not a sequence'):
        Call(tester, None, {})
    with pytest.raises(TypeError, match='kwargs is not a mapping'):
        Call(tester, [], None)

    # good construction with no args or kwargs
    call = Call(tester, [], {})

    # call has incomplete args
    with pytest.raises(TypeError, match=r'tester\(\)'):
        call.evaluate(pipeline)

    # good construction with arg1 and kwarg1
    call = Call(tester, [1], {'kwarg1': 3})

    # call still has incomplete args
    with pytest.raises(TypeError, match=r'tester\(\)'):
        call.evaluate(pipeline)

    # infer required arg2 and kwarg2 from context
    context = {
        'arg2': 2,
        'kwarg2': 4,
    }
    call.infer(context)

    # call should be evaluatable now
    result = call.evaluate(pipeline)
    assert result == (1, 2, 3, 4)

    # set up a call with references
    call = Call(tester, [Ref('var1'), 2], {'kwarg1': Ref('var3'), 'kwarg2': 4})

    # set up a pipeline with variables and a call that references them
    pipeline = Pipeline({'var1': 1, 'var3': 3})

    # check dependencies are resolved
    deps = call.depend(pipeline)
    assert deps == ['var1', 'var3']

    # execute the pipeline (sets state) and evaluate the call
    pipeline.execute()
    result = call.evaluate(pipeline)
    assert result == (1, 2, 3, 4)