Beispiel #1
0
def test_filesets_dont_work_for_sequences():
    class One(Transform):
        input_tags = {"foo"}
        output_tags = {fileset("files")}

        def script(self):
            pass

    class Two(Transform):
        input_tags = {fileset("files")}
        output_tags = {"bar"}

        def script(self):
            pass

    with pytest.raises(BakerUnsupportedError):
        sequence([One, Two])(
            input_paths={
                "foo": "./tests/__data__/foo.txt"
            },
            output_paths={
                "bar": "./tests/__data__/bar.txt"
            },
            overwrite=True,
        ).run()
def test_gap_in_output_and_input():
    class StepOne(Transform):
        input_tags = {"foo"}
        output_tags = {"bar", "bingo"}

        def script(self):
            pass

    class StepTwo(Transform):
        input_tags = {"bar"}
        output_tags = {"baz"}

        def script(self):
            pass

    class StepThree(Transform):
        input_tags = {"baz", "bingo"}
        output_tags = {"bop"}

        def script(self):
            pass

    Seq = sequence([StepOne, StepTwo, StepThree])
    assert Seq.input_tags == {"foo"}
    assert Seq.output_tags == {"bop"}
def test_exposed_intermediates():
    class StepOne(Transform):
        input_tags = {"foo"}
        output_tags = {"bar"}

        def script(self):
            pass

    class StepTwo(Transform):
        input_tags = {"bar"}
        output_tags = {"baz"}

        def script(self):
            pass

    class StepThree(Transform):
        input_tags = {"baz"}
        output_tags = {"bop"}

        def script(self):
            pass

    class StepFour(Transform):
        input_tags = {"bop"}
        output_tags = {"bip"}

        def script(self):
            pass

    Seq = sequence([StepOne, StepTwo, StepThree, StepFour],
                   exposed_intermediates={"bar", "baz"})
    assert Seq.output_tags == {"bip", "bar", "baz"}
def test_complicated_dep_path():
    class StepOne(Transform):
        input_tags = {"foo"}
        output_tags = {"bar"}

        def script(self):
            pass

    class StepTwo(Transform):
        input_tags = {"bar", "bongo"}
        output_tags = {"baz"}

        def script(self):
            pass

    class StepThree(Transform):
        input_tags = {"baz", "bingo"}
        output_tags = {"bop"}

        def script(self):
            pass

    Seq = sequence([StepOne, StepTwo, StepThree])
    assert Seq.input_tags == {"foo", "bingo", "bongo"}
    assert Seq.output_tags == {"bop"}
Beispiel #5
0
def test_gap_in_output_and_input():
    class StepOne(Transform):
        foo = InputTag("foo")
        bar = OutputTag("bar")
        bingo = OutputTag("bingo")

        def script(self):
            pass

    class StepTwo(Transform):
        bar = InputTag("bar")
        baz = OutputTag("baz")

        def script(self):
            pass

    class StepThree(Transform):
        baz = InputTag("baz")
        bingo = InputTag("bingo")
        bop = OutputTag("bop")

        def script(self):
            pass

    Seq = sequence([StepOne, StepTwo, StepThree])
    assert Seq.input_tags == {"foo"}
    assert Seq.output_tags == {"bop"}
Beispiel #6
0
def test_exposed_intermediates():
    class StepOne(Transform):
        foo = InputTag("foo")
        bar = OutputTag("bar")

        def script(self):
            pass

    class StepTwo(Transform):
        bar = InputTag("bar")
        baz = OutputTag("baz")

        def script(self):
            pass

    class StepThree(Transform):
        baz = InputTag("baz")
        bop = OutputTag("bop")

        def script(self):
            pass

    class StepFour(Transform):
        bop = InputTag("bop")
        bip = OutputTag("bip")

        def script(self):
            pass

    Seq = sequence([StepOne, StepTwo, StepThree, StepFour],
                   exposed_intermediates={"bar", "baz"})
    assert Seq.output_tags == {"bip", "bar", "baz"}
Beispiel #7
0
def test_complicated_dep_path():
    class StepOne(Transform):
        foo = InputTag("foo")
        bar = OutputTag("bar")

        def script(self):
            pass

    class StepTwo(Transform):
        bar = InputTag("bar")
        bongo = InputTag("bongo")
        baz = OutputTag("baz")

        def script(self):
            pass

    class StepThree(Transform):
        baz = InputTag("baz")
        bleep = InputTag("bingo")
        boppo = OutputTag("bop")

        def script(self):
            pass

    Seq = sequence([StepOne, StepTwo, StepThree])
    assert Seq.input_tags == {"foo", "bingo", "bongo"}
    assert Seq.output_tags == {"bop"}
Beispiel #8
0
def test_real_world():
    Pipeline = sequence([
        merge([sequence([BuildTrainDf, Train]), BuildTestDf]),
        Predict,
        EvaluateResults,
    ])
    Pipeline(
        input_paths={
            "raw_train_images": "./tests/__data__/mnist/optdigits.tra",
            "raw_test_images": "./tests/__data__/mnist/optdigits.tes",
        },
        output_paths={
            "accuracy": "/tmp/accuracy"
        },
        overwrite=True,
    ).run()

    with open("/tmp/accuracy", "r") as f:
        assert f.read() == "0.9032"
def test_in_memory_intermediates():
    in_memory_context = BakerContext(fs_for_intermediates="mem")

    class StepOne(Transform):
        input_tags = {"foo"}
        output_tags = {"bar"}

        def script(self):
            with self.input_files["foo"].open() as f:
                data = f.read()
            with self.output_files["bar"].open() as f:
                f.write(data)

    class StepTwo(Transform):
        input_tags = {"bar"}
        output_tags = {"baz"}

        def script(self):
            with self.input_files["bar"].open() as f:
                data = f.read()
            with self.output_files["baz"].open() as f:
                f.write(data + " processed")

    class StepThree(Transform):
        input_tags = {"baz", "bleep"}
        output_tags = {"boppo"}

        def script(self):
            with self.input_files["baz"].open() as f:
                data = f.read()
            with self.input_files["bleep"].open() as f:
                data2 = f.read()
            with self.output_files["boppo"].open() as f:
                f.write(data + " " + data2)

    Seq = sequence([StepOne, StepTwo, StepThree])

    Seq(
        input_paths={
            "foo": "./tests/__data__/foo.txt",
            "bleep": "./tests/__data__/bleep.txt",
        },
        output_paths={
            "boppo": "/tmp/boppo"
        },
        context=in_memory_context,
        overwrite=True,
    ).run()

    with open("/tmp/boppo", "r") as f:
        assert f.read() == "foo contents processed bleep contents"
Beispiel #10
0
def test_in_memory_intermediates():
    in_memory_context = BakerDriverContext(fs_for_intermediates="memory")

    class StepOne(Transform):
        foo = InputTag("foo")
        bar = OutputTag("bar")

        def script(self):
            with self.foo.open() as f:
                data = f.read()
            with self.bar.open() as f:
                f.write(data)

    class StepTwo(Transform):
        bar = InputTag("bar")
        baz = OutputTag("baz")

        def script(self):
            with self.bar.open() as f:
                data = f.read()
            with self.baz.open() as f:
                f.write(data + " processed")

    class StepThree(Transform):
        baz = InputTag("baz")
        bleep = InputTag("bleep")
        boppo = OutputTag("boppo")

        def script(self):
            with self.baz.open() as f:
                data = f.read()
            with self.bleep.open() as f:
                data2 = f.read()
            with self.boppo.open() as f:
                f.write(data + " " + data2)

    Seq = sequence([StepOne, StepTwo, StepThree])

    in_memory_context.run(
        Seq(
            input_paths={
                "foo": "./tests/__data__/foo.txt",
                "bleep": "./tests/__data__/bleep.txt",
            },
            output_paths={"boppo": "/tmp/boppo"},
            overwrite=True,
        ))

    with open("/tmp/boppo", "r") as f:
        assert f.read() == "foo contents processed bleep contents"
Beispiel #11
0
def test_basic_sequence():
    class Foo(Transform):
        input_tags = {"yo"}
        output_tags = {"hi"}

        def script():
            pass

    class Bar(Transform):
        input_tags = {"hi", "there"}
        output_tags = {"sup", "dude"}

        def script():
            pass

    assert sequence([Foo, Bar], name="Baz").structure() == {
        "type":
        "sequence",
        "name":
        "Baz",
        "input_tags": ["there", "yo"],
        "output_tags": ["dude", "sup"],
        "steps": [
            {
                "input_tags": ["yo"],
                "name": "Foo",
                "output_tags": ["hi"],
                "type": "leaf",
            },
            {
                "input_tags": ["hi", "there"],
                "name": "Bar",
                "output_tags": ["dude", "sup"],
                "type": "leaf",
            },
        ],
    }
Beispiel #12
0
class StepThree(Transform):
    baz = InputTag("baz")
    bleep = InputTag("bleep")
    boppo = OutputTag("boppo")

    def script(self):
        with self.baz.open() as f:
            data = f.read()
        with self.bleep.open() as f:
            data2 = f.read()
        with self.boppo.open() as f:
            f.write(data + " " + data2)


BaseSeq = sequence([StepOne, sequence([StepTwo, StepThree])])


def test_pickle_nested_sequence():
    Seq = pickle.loads(pickle.dumps(BaseSeq))

    Seq(
        input_paths={
            "foo": "./tests/__data__/foo.txt",
            "bleep": "./tests/__data__/bleep.txt",
        },
        output_paths={
            "boppo": "/tmp/boppo"
        },
        overwrite=True,
    ).run()
Beispiel #13
0
    },
)
BuildTestDf = map_tags(
    BuildDf,
    input_mapping={"raw_images": "raw_test_images"},
    output_mapping={
        "df": "to_predict_on",
        "labels": "test_labels"
    },
)

Pipeline = sequence(
    [
        merge([sequence([BuildTrainDf, Train]), BuildTestDf]),
        Predict,
        EvaluateResults,
    ],
    exposed_intermediates={"model"},
    name="MNISTPipeline",
)


def test_real_world():
    # Try pickling and unpickling first -- ensure these things work.
    PickledPipeline = pickle.loads(pickle.dumps(Pipeline))
    PickledPipeline(
        input_paths={
            "raw_train_images": "./tests/__data__/mnist/optdigits.tra",
            "raw_test_images": "./tests/__data__/mnist/optdigits.tes",
        },
        output_paths={
Beispiel #14
0
def test_sequence_defined_name():
    class Lol(Transform):
        def script(self):
            pass

    assert sequence([Lol, Lol], name="TootToot").name == "TootToot"
Beispiel #15
0
def test_sequence_generated_name():
    class Lol(Transform):
        def script(self):
            pass

    assert sequence([Lol, Lol]).name == "Seq(Lol,Lol)"