Exemple #1
0
def test_merge(builder):
    # This is just a basic test; there's a more thorough test suite in
    # test_merge.py.

    builder.assign("a", 1)
    builder.declare("b")

    @builder
    def h(a, b):
        return a + b

    builder2 = bn.FlowBuilder("flow2")
    builder2.assign("b", 2)
    builder.merge(builder2.build())

    assert builder.build().get("h") == 3

    builder3 = bn.FlowBuilder("flow3")
    builder3.declare("a")
    builder3.declare("b")

    @builder3  # noqa: F811
    def h(a, b):
        return a * b

    builder.merge(builder3.build(), keep="new")

    # Notice: we correctly find the new value for `h`, rather than the cached
    # version.
    assert builder.build().get("h") == 2
Exemple #2
0
def test_cache_dir_not_set():
    old_flow = bn.FlowBuilder('old').build()
    new_flow = bn.FlowBuilder('new').build()

    for keep in ALL_KEEP_VALUES:
        assert old_flow.merging(new_flow, keep=keep)\
            .get(CACHE_DIR_ENT) == 'bndata'
Exemple #3
0
def test_cache_dir_set_on_incoming():
    old_flow = bn.FlowBuilder("old").build()
    new_flow = bn.FlowBuilder("new").build().setting(CACHE_DIR_ENT, "new_dir")

    for keep in ALL_KEEP_VALUES:
        assert old_flow.merging(new_flow,
                                keep=keep).get(CACHE_DIR_ENT) == "new_dir"
Exemple #4
0
def test_merge(builder):
    # This is just a basic test; there's a more thorough test suite in
    # test_merge.py.

    builder.assign('a', 1)
    builder.declare('b')

    @builder
    def h(a, b):
        return a + b

    builder2 = bn.FlowBuilder('flow2')
    builder2.assign('b', 2)
    builder.merge(builder2.build())

    assert builder.build().get('h') == 3

    builder3 = bn.FlowBuilder('flow3')
    builder3.declare('a')
    builder3.declare('b')

    @builder3  # noqa: F811
    def h(a, b):
        return a * b

    builder.merge(builder3.build(), keep='new')

    # Notice: we correctly find the new value for `h`, rather than the cached
    # version.
    assert builder.build().get('h') == 2
Exemple #5
0
def test_cache_dir_set_on_incoming():
    old_flow = bn.FlowBuilder('old').build()
    new_flow = bn.FlowBuilder('new').build()\
        .setting(CACHE_DIR_ENT, 'new_dir')

    for keep in ALL_KEEP_VALUES:
        assert old_flow.merging(new_flow, keep=keep)\
            .get(CACHE_DIR_ENT) == 'new_dir'
Exemple #6
0
def test_cache_dir_conflicts():
    old_flow = bn.FlowBuilder("old").build().setting(CACHE_DIR_ENT, "old_dir")
    new_flow = bn.FlowBuilder("new").build().setting(CACHE_DIR_ENT, "new_dir")

    with pytest.raises(AlreadyDefinedEntityError):
        old_flow.merging(new_flow, keep="error")
    assert old_flow.merging(new_flow,
                            keep="old").get(CACHE_DIR_ENT) == "old_dir"
    assert old_flow.merging(new_flow,
                            keep="new").get(CACHE_DIR_ENT) == "new_dir"
Exemple #7
0
def test_cache_dir_conflicts():
    old_flow = bn.FlowBuilder('old').build()\
        .setting(CACHE_DIR_ENT, 'old_dir')
    new_flow = bn.FlowBuilder('new').build()\
        .setting(CACHE_DIR_ENT, 'new_dir')

    with pytest.raises(AlreadyDefinedEntityError):
        old_flow.merging(new_flow, keep='error')
    assert old_flow.merging(new_flow, keep='old').get(CACHE_DIR_ENT) ==\
        'old_dir'
    assert old_flow.merging(new_flow, keep='new').get(CACHE_DIR_ENT) ==\
        'new_dir'
Exemple #8
0
def builder(parallel_execution_enabled, tmp_path):
    builder = bn.FlowBuilder("test")
    builder.set("core__persistent_cache__flow_dir",
                str(tmp_path / "BNTESTDATA"))
    builder.set("core__parallel_execution__enabled",
                parallel_execution_enabled)
    return builder
Exemple #9
0
    def __init__(self, cache_dir, make_counter):
        builder = bn.FlowBuilder("test")

        builder.set("core__persistent_cache__flow_dir", cache_dir)
        builder.assign("x", 2)
        builder.assign("y", 3)
        builder.assign("z", 4)

        xy_counter = make_counter()

        @builder
        @count_calls(xy_counter)
        def xy(x, y):
            return x * y

        yz_counter = make_counter()

        @builder
        @count_calls(yz_counter)
        def yz(y, z):
            return y * z

        xy_plus_yz_counter = make_counter()

        @builder
        @count_calls(xy_plus_yz_counter)
        def xy_plus_yz(xy, yz):
            return xy + yz

        self.flow = builder.build()
        self.xy_counter = xy_counter
        self.yz_counter = yz_counter
        self.xy_plus_yz_counter = xy_plus_yz_counter
Exemple #10
0
    def __init__(self, cache_dir):
        builder = bn.FlowBuilder('test')

        builder.set('core__persistent_cache__flow_dir', cache_dir)
        builder.assign('x', 2)
        builder.assign('y', 3)
        builder.assign('z', 4)

        @builder
        @count_calls
        def xy(x, y):
            return x * y

        @builder
        @count_calls
        def yz(y, z):
            return y * z

        @builder
        @count_calls
        def xy_plus_yz(xy, yz):
            return xy + yz

        self.flow = builder.build()
        self.xy = xy
        self.yz = yz
        self.xy_plus_yz = xy_plus_yz
Exemple #11
0
def test_protocol_is_overwritten(builder):
    builder.declare("x", protocol=bn.protocol.type(int))

    incoming_builder = bn.FlowBuilder("new_name")
    incoming_builder.assign("x", "blue", protocol=bn.protocol.type(str))

    builder.merge(incoming_builder.build(), keep="new")

    with pytest.raises(AssertionError):
        builder.set("x", 3)

    assert builder.build().get("x") == "blue"
Exemple #12
0
def test_protocol_is_overwritten(builder):
    builder.declare('x', protocol=bn.protocol.type(int))

    incoming_builder = bn.FlowBuilder('new_name')
    incoming_builder.assign('x', 'blue', protocol=bn.protocol.type(str))

    builder.merge(incoming_builder.build(), keep='new')

    with pytest.raises(AssertionError):
        builder.set('x', 3)

    assert builder.build().get('x') == 'blue'
Exemple #13
0
def test_merging(preset_flow):
    flow = preset_flow

    new_flow = bn.FlowBuilder("new_flow").build().assigning("x", 5).assigning("y", 6)

    assert flow.get("f", set) == set()

    with pytest.raises(AlreadyDefinedEntityError):
        assert flow.merging(new_flow)

    assert flow.merging(new_flow, keep="old").get("f") == 6
    assert flow.merging(new_flow, keep="self").get("f") == 6
    assert flow.merging(new_flow, keep="new").get("f") == 11
    assert flow.merging(new_flow, keep="arg").get("f") == 11
Exemple #14
0
def test_merging(preset_flow):
    flow = preset_flow

    new_flow = (bn.FlowBuilder('new_flow').build().assigning('x', 5).assigning(
        'y', 6))

    assert flow.get('f', set) == set()

    with pytest.raises(AlreadyDefinedEntityError):
        assert flow.merging(new_flow)

    assert flow.merging(new_flow, keep='old').get('f') == 6
    assert flow.merging(new_flow, keep='self').get('f') == 6
    assert flow.merging(new_flow, keep='new').get('f') == 11
    assert flow.merging(new_flow, keep='arg').get('f') == 11
Exemple #15
0
def test_protocols_conflict(builder):
    builder.declare("x", protocol=bn.protocol.type(int))

    incoming_builder = bn.FlowBuilder("new_name")
    incoming_builder.declare("x", protocol=bn.protocol.type(str))

    with pytest.raises(AlreadyDefinedEntityError):
        builder.merge(incoming_builder.build())

    builder.merge(incoming_builder.build(), keep="old")
    builder.set("x", 1)
    with pytest.raises(AssertionError):
        builder.set("x", "blue")

    builder.merge(incoming_builder.build(), keep="new")
    builder.set("x", "blue")
    with pytest.raises(AssertionError):
        builder.set("x", 1)
Exemple #16
0
def test_protocols_conflict(builder):
    builder.declare('x', protocol=bn.protocol.type(int))

    incoming_builder = bn.FlowBuilder('new_name')
    incoming_builder.declare('x', protocol=bn.protocol.type(str))

    with pytest.raises(AlreadyDefinedEntityError):
        builder.merge(incoming_builder.build())

    builder.merge(incoming_builder.build(), keep='old')
    builder.set('x', 1)
    with pytest.raises(AssertionError):
        builder.set('x', 'blue')

    builder.merge(incoming_builder.build(), keep='new')
    builder.set('x', 'blue')
    with pytest.raises(AssertionError):
        builder.set('x', 1)
Exemple #17
0
def builder(process_executor, process_manager, tmp_path):
    builder = bn.FlowBuilder("test")
    builder.set("core__persistent_cache__flow_dir",
                str(tmp_path / "BNTESTDATA"))

    # We can't use builder.set here because that uses ValueProvider which tries to
    # tokenize the value by writing / pickling it. We go around that issue by making
    # them use FunctionProvider.
    @builder
    @persist(False)
    def core__process_executor():
        return process_executor

    @builder
    @persist(False)
    def core__process_manager():
        return process_manager

    return builder
    def __init__(self, cache_dir, make_counter):
        lowercase_sum_counter = make_counter()
        uppercase_sum_counter = make_counter()
        total_sum_counter = make_counter()

        builder = bn.FlowBuilder("test")

        builder.set("core__persistent_cache__flow_dir", cache_dir)

        # It's important that this test uses sets, because we want to check that sets
        # are hashed deterministically. (Set iteration is non-deterministic, but it's
        # always the same within one Python process, so a simpler test where we just
        # run a flow multiple times won't work for this.)
        builder.assign("lowercase_chars", set("abcdef"))
        builder.assign("uppercase_chars", frozenset("ABCDEF"))

        @builder
        @bn.version_no_warnings
        def lowercase_sum(lowercase_chars):
            lowercase_sum_counter.mark()
            return sum(ord(char) for char in lowercase_chars)

        @builder
        @bn.version_no_warnings
        def uppercase_sum(uppercase_chars):
            uppercase_sum_counter.mark()
            return sum(ord(char) for char in uppercase_chars)

        @builder
        @bn.version_no_warnings
        def total_sum(lowercase_sum, uppercase_sum):
            total_sum_counter.mark()
            return lowercase_sum + uppercase_sum

        self.lowercase_sum_counter = lowercase_sum_counter
        self.uppercase_sum_counter = uppercase_sum_counter
        self.total_sum_counter = total_sum_counter

        self.manual_flow = builder.build()
        builder.set("core__versioning_mode", "auto")
        self.auto_flow = builder.build()
Exemple #19
0
def builder(tmp_path):
    builder = bn.FlowBuilder('test')
    builder.set('core__persistent_cache__flow_dir',
                str(tmp_path / 'BNTESTDATA'))
    return builder
Exemple #20
0
import bionic as bn

builder = bn.FlowBuilder("basic_workflow")

builder.assign("x", values=[2, 3])
builder.assign("y", values=[5, 7])


@builder
def x_plus_y(x, y):
    return x + y


flow = builder.build()

if __name__ == "__main__":
    bn.utils.misc.init_basic_logging()

    for _, row in flow.get("x_plus_y", "series").reset_index().iterrows():
        print(f"{row['x']} + {row['y']} = {row['x_plus_y']}")
Exemple #21
0
def flow():
    """Create FlowImage fixture for testing"""
    builder = bionic.FlowBuilder("hello_world")
    builder.assign("greeting", "hello world", doc="a friendly greeting")
    return builder.build()
Exemple #22
0
def test_old_name_is_kept_even_on_explicit_rename():
    old_flow = bn.FlowBuilder("old").build()
    new_flow = bn.FlowBuilder("new").build().setting("core__flow_name", "NEW")

    for keep in ALL_KEEP_VALUES:
        assert old_flow.merging(new_flow, keep=keep).name == "old"
Exemple #23
0
def test_old_name_is_kept_even_on_explicit_rename():
    old_flow = bn.FlowBuilder('old').build()
    new_flow = bn.FlowBuilder('new').build().setting('core__flow_name', 'NEW')

    for keep in ALL_KEEP_VALUES:
        assert old_flow.merging(new_flow, keep=keep).name == 'old'
Exemple #24
0
def test_old_name_is_kept():
    old_flow = bn.FlowBuilder('old').build()
    new_flow = bn.FlowBuilder('new').build()

    for keep in ALL_KEEP_VALUES:
        assert old_flow.merging(new_flow, keep=keep).name == 'old'
Exemple #25
0
def merge_tester(builder):
    f = builder.build()

    tester = MergeTester()

    tester.add('Missing', f)
    tester.add('Declared', f.declaring('x'))
    tester.add('FixedSingle', f.assigning('x', 2))
    tester.add('FixedMulti', f.assigning('x', values=[3, 4]))

    fb = f.to_builder()
    fb.assign('root_x', 3)

    @fb
    def x(root_x):
        return root_x ** 2

    tester.add('DerivedSingle', fb.build())

    fb = f.to_builder()
    fb.assign('x_y', (5, 6))

    @fb  # noqa: F811
    @bn.outputs('x', 'y')
    def x(x_y):
        return x_y

    tester.add('DerivedJoint', fb.build())

    tester.add(
        'FixedJoint',
        f.declaring('x').declaring('y').adding_case('x', 7, 'y', 8))

    # This new flow will use the default cache directory, which is bad, because
    # it could pick up data from previous test runs.  Unfortunately it's tricky
    # to set another cache directory on this flow, because it will cause a
    # conflict when merging the two flows (even if the two cache directories
    # are the same).  To work around this, we disable persistence for all
    # derived entities in this flow, so the cache shouldn't be used at all.
    # Longer-term, we may want a smarter way of merging that either recognizes
    # when two values are the same, or handles "infrastructure" entities like
    # this differently.  Or a way to just run a flow without caching.
    f = bn.FlowBuilder('new_flow').build()
    tester.add('M', f)
    tester.add('D', f.declaring('x'))
    tester.add('FS', f.assigning('x', 12))
    tester.add('FM', f.assigning('x', values=[13, 14]))

    fb = f.to_builder()
    fb.assign('root_x', 3)

    @fb  # noqa: F811
    @bn.persist(False)
    def x(root_x):
        return root_x ** 2

    tester.add('DS', fb.build())

    fb = f.to_builder()
    fb.assign('x_y', (5, 6))

    @fb  # noqa: F811
    @bn.outputs('x', 'y')
    @bn.persist(False)
    def x(x_y):
        return x_y

    tester.add('DJ', fb.build())

    tester.add(
        'FJ',
        f.declaring('x').declaring('y').adding_case('x', 17, 'y', 18))

    return tester
Exemple #26
0
'''
A toy ML workflow intended to demonstrate basic Bionic features.  Trains a
logistic regression model on the UCI ML Breast Cancer Wisconsin (Diagnostic)
dataset.
'''

import re

from sklearn import datasets, model_selection, linear_model, metrics
import pandas as pd

import bionic as bn

# Initialize our builder.
builder = bn.FlowBuilder('ml_workflow')

# Define some basic parameters.
builder.assign('random_seed', 0)
builder.assign('test_split_fraction', 0.3)
builder.assign('hyperparams_dict', {'C': 1})
builder.assign('feature_inclusion_regex', '.*')


# Load the raw data.
@builder
def raw_frame():
    dataset = datasets.load_breast_cancer()
    df = pd.DataFrame(
        data=dataset.data,
        columns=dataset.feature_names,
    )
Exemple #27
0
def builder(tmp_path):
    builder = bn.FlowBuilder("test")
    builder.set("core__persistent_cache__flow_dir",
                str(tmp_path / "BNTESTDATA"))
    return builder
Exemple #28
0
from __future__ import print_function

import bionic as bn

# Initialize the builder object we'll use to construct our flow.
builder = bn.FlowBuilder('hello_world')

# Define new entities "greeting" and "subject" with fixed values.
builder.assign('greeting', 'Hello')
builder.assign('subject', 'world')


# Define a "message" entity, constructed by taking the values of "greeting" and
# "subject" and combining them in a sentence.
# The `@builder` decorator tells Bionic to define a new derived entity; Bionic
# infers the name of the new entity ("message") and the names of its
# dependencies ("greeting" and "subject").
@builder
def message(greeting, subject):
    return '{0} {1}!'.format(greeting, subject)


# Assemble the flow object, which is capable of computing any of the entities
# we've defined.
flow = builder.build()

if __name__ == '__main__':
    # Use our flow to compute the message "Hello world!"
    print(flow.get('message'))
Exemple #29
0
import numpy as np
import pandas as pd
from scipy.stats import multivariate_normal
from sklearn.linear_model import LinearRegression

import bionic as bn

builder = bn.FlowBuilder("intro")

builder.assign("random_seed", 0)
builder.assign("variance", 2)
builder.assign("correlation", 0.5)
builder.assign("n_samples", 1000)


@builder
def my_random_df(random_seed, variance, correlation, n_samples):
    data = multivariate_normal(
        mean=[0, 0],
        cov=[[variance, correlation * variance], [correlation * variance, variance]],
    ).rvs(size=n_samples, random_state=random_seed)
    return pd.DataFrame(columns=["x", "y"], data=data)


@builder
def my_model(my_random_df):
    model = LinearRegression()
    model.fit(my_random_df[["x"]], my_random_df["y"])
    return model

Exemple #30
0
from __future__ import print_function

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from scipy.stats import multivariate_normal

import bionic as bn

builder = bn.FlowBuilder('intro')

builder.assign('random_seed', 0)
builder.assign('variance', 2)
builder.assign('correlation', 0.5)
builder.assign('n_samples', 1000)


@builder
def my_random_df(random_seed, variance, correlation, n_samples):
    data = multivariate_normal(
        mean=[0, 0],
        cov=[
            [variance, correlation * variance],
            [correlation * variance, variance],
        ],
    ).rvs(
        size=n_samples,
        random_state=random_seed,
    )
    return pd.DataFrame(columns=['x', 'y'], data=data)