def test_merge(builder): # This is just a basic test; there's a more thorough test suite in # test_merge.py. builder.assign("a", 1) builder.declare("b") @builder def h(a, b): return a + b builder2 = bn.FlowBuilder("flow2") builder2.assign("b", 2) builder.merge(builder2.build()) assert builder.build().get("h") == 3 builder3 = bn.FlowBuilder("flow3") builder3.declare("a") builder3.declare("b") @builder3 # noqa: F811 def h(a, b): return a * b builder.merge(builder3.build(), keep="new") # Notice: we correctly find the new value for `h`, rather than the cached # version. assert builder.build().get("h") == 2
def test_cache_dir_not_set(): old_flow = bn.FlowBuilder('old').build() new_flow = bn.FlowBuilder('new').build() for keep in ALL_KEEP_VALUES: assert old_flow.merging(new_flow, keep=keep)\ .get(CACHE_DIR_ENT) == 'bndata'
def test_cache_dir_set_on_incoming(): old_flow = bn.FlowBuilder("old").build() new_flow = bn.FlowBuilder("new").build().setting(CACHE_DIR_ENT, "new_dir") for keep in ALL_KEEP_VALUES: assert old_flow.merging(new_flow, keep=keep).get(CACHE_DIR_ENT) == "new_dir"
def test_merge(builder): # This is just a basic test; there's a more thorough test suite in # test_merge.py. builder.assign('a', 1) builder.declare('b') @builder def h(a, b): return a + b builder2 = bn.FlowBuilder('flow2') builder2.assign('b', 2) builder.merge(builder2.build()) assert builder.build().get('h') == 3 builder3 = bn.FlowBuilder('flow3') builder3.declare('a') builder3.declare('b') @builder3 # noqa: F811 def h(a, b): return a * b builder.merge(builder3.build(), keep='new') # Notice: we correctly find the new value for `h`, rather than the cached # version. assert builder.build().get('h') == 2
def test_cache_dir_set_on_incoming(): old_flow = bn.FlowBuilder('old').build() new_flow = bn.FlowBuilder('new').build()\ .setting(CACHE_DIR_ENT, 'new_dir') for keep in ALL_KEEP_VALUES: assert old_flow.merging(new_flow, keep=keep)\ .get(CACHE_DIR_ENT) == 'new_dir'
def test_cache_dir_conflicts(): old_flow = bn.FlowBuilder("old").build().setting(CACHE_DIR_ENT, "old_dir") new_flow = bn.FlowBuilder("new").build().setting(CACHE_DIR_ENT, "new_dir") with pytest.raises(AlreadyDefinedEntityError): old_flow.merging(new_flow, keep="error") assert old_flow.merging(new_flow, keep="old").get(CACHE_DIR_ENT) == "old_dir" assert old_flow.merging(new_flow, keep="new").get(CACHE_DIR_ENT) == "new_dir"
def test_cache_dir_conflicts(): old_flow = bn.FlowBuilder('old').build()\ .setting(CACHE_DIR_ENT, 'old_dir') new_flow = bn.FlowBuilder('new').build()\ .setting(CACHE_DIR_ENT, 'new_dir') with pytest.raises(AlreadyDefinedEntityError): old_flow.merging(new_flow, keep='error') assert old_flow.merging(new_flow, keep='old').get(CACHE_DIR_ENT) ==\ 'old_dir' assert old_flow.merging(new_flow, keep='new').get(CACHE_DIR_ENT) ==\ 'new_dir'
def builder(parallel_execution_enabled, tmp_path): builder = bn.FlowBuilder("test") builder.set("core__persistent_cache__flow_dir", str(tmp_path / "BNTESTDATA")) builder.set("core__parallel_execution__enabled", parallel_execution_enabled) return builder
def __init__(self, cache_dir, make_counter): builder = bn.FlowBuilder("test") builder.set("core__persistent_cache__flow_dir", cache_dir) builder.assign("x", 2) builder.assign("y", 3) builder.assign("z", 4) xy_counter = make_counter() @builder @count_calls(xy_counter) def xy(x, y): return x * y yz_counter = make_counter() @builder @count_calls(yz_counter) def yz(y, z): return y * z xy_plus_yz_counter = make_counter() @builder @count_calls(xy_plus_yz_counter) def xy_plus_yz(xy, yz): return xy + yz self.flow = builder.build() self.xy_counter = xy_counter self.yz_counter = yz_counter self.xy_plus_yz_counter = xy_plus_yz_counter
def __init__(self, cache_dir): builder = bn.FlowBuilder('test') builder.set('core__persistent_cache__flow_dir', cache_dir) builder.assign('x', 2) builder.assign('y', 3) builder.assign('z', 4) @builder @count_calls def xy(x, y): return x * y @builder @count_calls def yz(y, z): return y * z @builder @count_calls def xy_plus_yz(xy, yz): return xy + yz self.flow = builder.build() self.xy = xy self.yz = yz self.xy_plus_yz = xy_plus_yz
def test_protocol_is_overwritten(builder): builder.declare("x", protocol=bn.protocol.type(int)) incoming_builder = bn.FlowBuilder("new_name") incoming_builder.assign("x", "blue", protocol=bn.protocol.type(str)) builder.merge(incoming_builder.build(), keep="new") with pytest.raises(AssertionError): builder.set("x", 3) assert builder.build().get("x") == "blue"
def test_protocol_is_overwritten(builder): builder.declare('x', protocol=bn.protocol.type(int)) incoming_builder = bn.FlowBuilder('new_name') incoming_builder.assign('x', 'blue', protocol=bn.protocol.type(str)) builder.merge(incoming_builder.build(), keep='new') with pytest.raises(AssertionError): builder.set('x', 3) assert builder.build().get('x') == 'blue'
def test_merging(preset_flow): flow = preset_flow new_flow = bn.FlowBuilder("new_flow").build().assigning("x", 5).assigning("y", 6) assert flow.get("f", set) == set() with pytest.raises(AlreadyDefinedEntityError): assert flow.merging(new_flow) assert flow.merging(new_flow, keep="old").get("f") == 6 assert flow.merging(new_flow, keep="self").get("f") == 6 assert flow.merging(new_flow, keep="new").get("f") == 11 assert flow.merging(new_flow, keep="arg").get("f") == 11
def test_merging(preset_flow): flow = preset_flow new_flow = (bn.FlowBuilder('new_flow').build().assigning('x', 5).assigning( 'y', 6)) assert flow.get('f', set) == set() with pytest.raises(AlreadyDefinedEntityError): assert flow.merging(new_flow) assert flow.merging(new_flow, keep='old').get('f') == 6 assert flow.merging(new_flow, keep='self').get('f') == 6 assert flow.merging(new_flow, keep='new').get('f') == 11 assert flow.merging(new_flow, keep='arg').get('f') == 11
def test_protocols_conflict(builder): builder.declare("x", protocol=bn.protocol.type(int)) incoming_builder = bn.FlowBuilder("new_name") incoming_builder.declare("x", protocol=bn.protocol.type(str)) with pytest.raises(AlreadyDefinedEntityError): builder.merge(incoming_builder.build()) builder.merge(incoming_builder.build(), keep="old") builder.set("x", 1) with pytest.raises(AssertionError): builder.set("x", "blue") builder.merge(incoming_builder.build(), keep="new") builder.set("x", "blue") with pytest.raises(AssertionError): builder.set("x", 1)
def test_protocols_conflict(builder): builder.declare('x', protocol=bn.protocol.type(int)) incoming_builder = bn.FlowBuilder('new_name') incoming_builder.declare('x', protocol=bn.protocol.type(str)) with pytest.raises(AlreadyDefinedEntityError): builder.merge(incoming_builder.build()) builder.merge(incoming_builder.build(), keep='old') builder.set('x', 1) with pytest.raises(AssertionError): builder.set('x', 'blue') builder.merge(incoming_builder.build(), keep='new') builder.set('x', 'blue') with pytest.raises(AssertionError): builder.set('x', 1)
def builder(process_executor, process_manager, tmp_path): builder = bn.FlowBuilder("test") builder.set("core__persistent_cache__flow_dir", str(tmp_path / "BNTESTDATA")) # We can't use builder.set here because that uses ValueProvider which tries to # tokenize the value by writing / pickling it. We go around that issue by making # them use FunctionProvider. @builder @persist(False) def core__process_executor(): return process_executor @builder @persist(False) def core__process_manager(): return process_manager return builder
def __init__(self, cache_dir, make_counter): lowercase_sum_counter = make_counter() uppercase_sum_counter = make_counter() total_sum_counter = make_counter() builder = bn.FlowBuilder("test") builder.set("core__persistent_cache__flow_dir", cache_dir) # It's important that this test uses sets, because we want to check that sets # are hashed deterministically. (Set iteration is non-deterministic, but it's # always the same within one Python process, so a simpler test where we just # run a flow multiple times won't work for this.) builder.assign("lowercase_chars", set("abcdef")) builder.assign("uppercase_chars", frozenset("ABCDEF")) @builder @bn.version_no_warnings def lowercase_sum(lowercase_chars): lowercase_sum_counter.mark() return sum(ord(char) for char in lowercase_chars) @builder @bn.version_no_warnings def uppercase_sum(uppercase_chars): uppercase_sum_counter.mark() return sum(ord(char) for char in uppercase_chars) @builder @bn.version_no_warnings def total_sum(lowercase_sum, uppercase_sum): total_sum_counter.mark() return lowercase_sum + uppercase_sum self.lowercase_sum_counter = lowercase_sum_counter self.uppercase_sum_counter = uppercase_sum_counter self.total_sum_counter = total_sum_counter self.manual_flow = builder.build() builder.set("core__versioning_mode", "auto") self.auto_flow = builder.build()
def builder(tmp_path): builder = bn.FlowBuilder('test') builder.set('core__persistent_cache__flow_dir', str(tmp_path / 'BNTESTDATA')) return builder
import bionic as bn builder = bn.FlowBuilder("basic_workflow") builder.assign("x", values=[2, 3]) builder.assign("y", values=[5, 7]) @builder def x_plus_y(x, y): return x + y flow = builder.build() if __name__ == "__main__": bn.utils.misc.init_basic_logging() for _, row in flow.get("x_plus_y", "series").reset_index().iterrows(): print(f"{row['x']} + {row['y']} = {row['x_plus_y']}")
def flow(): """Create FlowImage fixture for testing""" builder = bionic.FlowBuilder("hello_world") builder.assign("greeting", "hello world", doc="a friendly greeting") return builder.build()
def test_old_name_is_kept_even_on_explicit_rename(): old_flow = bn.FlowBuilder("old").build() new_flow = bn.FlowBuilder("new").build().setting("core__flow_name", "NEW") for keep in ALL_KEEP_VALUES: assert old_flow.merging(new_flow, keep=keep).name == "old"
def test_old_name_is_kept_even_on_explicit_rename(): old_flow = bn.FlowBuilder('old').build() new_flow = bn.FlowBuilder('new').build().setting('core__flow_name', 'NEW') for keep in ALL_KEEP_VALUES: assert old_flow.merging(new_flow, keep=keep).name == 'old'
def test_old_name_is_kept(): old_flow = bn.FlowBuilder('old').build() new_flow = bn.FlowBuilder('new').build() for keep in ALL_KEEP_VALUES: assert old_flow.merging(new_flow, keep=keep).name == 'old'
def merge_tester(builder): f = builder.build() tester = MergeTester() tester.add('Missing', f) tester.add('Declared', f.declaring('x')) tester.add('FixedSingle', f.assigning('x', 2)) tester.add('FixedMulti', f.assigning('x', values=[3, 4])) fb = f.to_builder() fb.assign('root_x', 3) @fb def x(root_x): return root_x ** 2 tester.add('DerivedSingle', fb.build()) fb = f.to_builder() fb.assign('x_y', (5, 6)) @fb # noqa: F811 @bn.outputs('x', 'y') def x(x_y): return x_y tester.add('DerivedJoint', fb.build()) tester.add( 'FixedJoint', f.declaring('x').declaring('y').adding_case('x', 7, 'y', 8)) # This new flow will use the default cache directory, which is bad, because # it could pick up data from previous test runs. Unfortunately it's tricky # to set another cache directory on this flow, because it will cause a # conflict when merging the two flows (even if the two cache directories # are the same). To work around this, we disable persistence for all # derived entities in this flow, so the cache shouldn't be used at all. # Longer-term, we may want a smarter way of merging that either recognizes # when two values are the same, or handles "infrastructure" entities like # this differently. Or a way to just run a flow without caching. f = bn.FlowBuilder('new_flow').build() tester.add('M', f) tester.add('D', f.declaring('x')) tester.add('FS', f.assigning('x', 12)) tester.add('FM', f.assigning('x', values=[13, 14])) fb = f.to_builder() fb.assign('root_x', 3) @fb # noqa: F811 @bn.persist(False) def x(root_x): return root_x ** 2 tester.add('DS', fb.build()) fb = f.to_builder() fb.assign('x_y', (5, 6)) @fb # noqa: F811 @bn.outputs('x', 'y') @bn.persist(False) def x(x_y): return x_y tester.add('DJ', fb.build()) tester.add( 'FJ', f.declaring('x').declaring('y').adding_case('x', 17, 'y', 18)) return tester
''' A toy ML workflow intended to demonstrate basic Bionic features. Trains a logistic regression model on the UCI ML Breast Cancer Wisconsin (Diagnostic) dataset. ''' import re from sklearn import datasets, model_selection, linear_model, metrics import pandas as pd import bionic as bn # Initialize our builder. builder = bn.FlowBuilder('ml_workflow') # Define some basic parameters. builder.assign('random_seed', 0) builder.assign('test_split_fraction', 0.3) builder.assign('hyperparams_dict', {'C': 1}) builder.assign('feature_inclusion_regex', '.*') # Load the raw data. @builder def raw_frame(): dataset = datasets.load_breast_cancer() df = pd.DataFrame( data=dataset.data, columns=dataset.feature_names, )
def builder(tmp_path): builder = bn.FlowBuilder("test") builder.set("core__persistent_cache__flow_dir", str(tmp_path / "BNTESTDATA")) return builder
from __future__ import print_function import bionic as bn # Initialize the builder object we'll use to construct our flow. builder = bn.FlowBuilder('hello_world') # Define new entities "greeting" and "subject" with fixed values. builder.assign('greeting', 'Hello') builder.assign('subject', 'world') # Define a "message" entity, constructed by taking the values of "greeting" and # "subject" and combining them in a sentence. # The `@builder` decorator tells Bionic to define a new derived entity; Bionic # infers the name of the new entity ("message") and the names of its # dependencies ("greeting" and "subject"). @builder def message(greeting, subject): return '{0} {1}!'.format(greeting, subject) # Assemble the flow object, which is capable of computing any of the entities # we've defined. flow = builder.build() if __name__ == '__main__': # Use our flow to compute the message "Hello world!" print(flow.get('message'))
import numpy as np import pandas as pd from scipy.stats import multivariate_normal from sklearn.linear_model import LinearRegression import bionic as bn builder = bn.FlowBuilder("intro") builder.assign("random_seed", 0) builder.assign("variance", 2) builder.assign("correlation", 0.5) builder.assign("n_samples", 1000) @builder def my_random_df(random_seed, variance, correlation, n_samples): data = multivariate_normal( mean=[0, 0], cov=[[variance, correlation * variance], [correlation * variance, variance]], ).rvs(size=n_samples, random_state=random_seed) return pd.DataFrame(columns=["x", "y"], data=data) @builder def my_model(my_random_df): model = LinearRegression() model.fit(my_random_df[["x"]], my_random_df["y"]) return model
from __future__ import print_function import pandas as pd import numpy as np from sklearn.linear_model import LinearRegression from scipy.stats import multivariate_normal import bionic as bn builder = bn.FlowBuilder('intro') builder.assign('random_seed', 0) builder.assign('variance', 2) builder.assign('correlation', 0.5) builder.assign('n_samples', 1000) @builder def my_random_df(random_seed, variance, correlation, n_samples): data = multivariate_normal( mean=[0, 0], cov=[ [variance, correlation * variance], [correlation * variance, variance], ], ).rvs( size=n_samples, random_state=random_seed, ) return pd.DataFrame(columns=['x', 'y'], data=data)