Exemple #1
0
def test_detour_stop_flow(memory_jobstore, clean_dir, detour_stop_flow):
    from jobflow import run_locally

    flow = detour_stop_flow()
    uuid1 = flow.jobs[0].uuid
    uuid3 = flow.jobs[1].uuid

    # run with log
    responses = run_locally(flow, store=memory_jobstore)
    uuid2 = [u for u in responses.keys() if u != uuid1 and u != uuid3][0]

    # check responses has been filled
    assert len(responses) == 2
    assert responses[uuid1][1].output == 11
    assert responses[uuid1][1].detour is not None
    assert responses[uuid2][1].output == "1234"

    # check store has the activity output
    result1 = memory_jobstore.query_one({"uuid": uuid1})
    result2 = memory_jobstore.query_one({"uuid": uuid2})
    result3 = memory_jobstore.query_one({"uuid": uuid3})

    assert result1["output"] == 11
    assert result2["output"] == "1234"
    assert result3 is None
Exemple #2
0
def test_nested_flow(memory_jobstore, clean_dir, nested_flow):
    from jobflow import run_locally

    flow = nested_flow()
    uuid1 = flow.jobs[0].jobs[0].uuid
    uuid2 = flow.jobs[0].jobs[1].uuid
    uuid3 = flow.jobs[1].jobs[0].uuid
    uuid4 = flow.jobs[1].jobs[1].uuid

    # run with log
    responses = run_locally(flow, store=memory_jobstore)

    # check responses has been filled
    assert len(responses) == 4
    assert responses[uuid1][1].output == "12345_end"
    assert responses[uuid2][1].output == "12345_end_end"
    assert responses[uuid3][1].output == "12345_end_end_end"
    assert responses[uuid4][1].output == "12345_end_end_end_end"

    # check store has the activity output
    result1 = memory_jobstore.query_one({"uuid": uuid1})
    result2 = memory_jobstore.query_one({"uuid": uuid2})
    result3 = memory_jobstore.query_one({"uuid": uuid3})
    result4 = memory_jobstore.query_one({"uuid": uuid4})

    assert result1["output"] == "12345_end"
    assert result2["output"] == "12345_end_end"
    assert result3["output"] == "12345_end_end_end"
    assert result4["output"] == "12345_end_end_end_end"
Exemple #3
0
def test_error_flow(memory_jobstore, clean_dir, error_flow, capsys):
    from jobflow import run_locally

    flow = error_flow()

    # run with log
    responses = run_locally(flow, store=memory_jobstore)

    # check responses has been filled
    assert len(responses) == 0

    captured = capsys.readouterr()
    assert "error_func failed with exception" in captured.out

    with pytest.raises(RuntimeError):
        run_locally(flow, store=memory_jobstore, ensure_success=True)
Exemple #4
0
def test_stop_children_flow(memory_jobstore, clean_dir, stop_children_flow):
    from jobflow import run_locally

    flow = stop_children_flow()
    uuid1 = flow.jobs[0].uuid
    uuid2 = flow.jobs[1].uuid
    uuid3 = flow.jobs[2].uuid

    # run with log
    responses = run_locally(flow, store=memory_jobstore)

    # check responses has been filled
    assert len(responses) == 2
    assert len(responses[uuid1]) == 1
    assert uuid2 not in responses
    assert responses[uuid1][1].output == "1234"
    assert responses[uuid1][1].stop_children is True
    assert responses[uuid3][1].output == "12345_end"

    # check store has the activity output
    result1 = memory_jobstore.query_one({"uuid": uuid1})
    result2 = memory_jobstore.query_one({"uuid": uuid2})
    result3 = memory_jobstore.query_one({"uuid": uuid3})

    assert result1["output"] == "1234"
    assert result2 is None
    assert result3["output"] == "12345_end"
Exemple #5
0
def test_replace_flow_nested(memory_jobstore, clean_dir, replace_flow_nested):
    from jobflow import run_locally

    flow = replace_flow_nested()
    uuid1 = flow.jobs[0].uuid
    uuid2 = flow.jobs[1].uuid

    # run with log
    responses = run_locally(flow, store=memory_jobstore)

    # check responses has been filled
    assert len(responses) == 4
    assert len(responses[uuid1]) == 2
    assert responses[uuid1][1].output == 11
    assert responses[uuid1][1].replace is not None
    assert responses[uuid1][2].output[
        "first"].__class__.__name__ == "OutputReference"
    assert responses[uuid2][1].output == "12345_end"

    # check store has the activity output
    result1 = memory_jobstore.query_one({"uuid": uuid1, "index": 1})
    result2 = memory_jobstore.query_one({"uuid": uuid1, "index": 2})
    result3 = memory_jobstore.query_one({"uuid": uuid2, "index": 1})

    assert result1["output"] == 11
    assert result2["output"]["first"]["@class"] == "OutputReference"
    assert result3["output"] == "12345_end"

    # assert job2 (replaced job) ran before job3
    assert result2["completed_at"] < result3["completed_at"]
Exemple #6
0
def test_simple_job(memory_jobstore, clean_dir, simple_job):
    from jobflow import run_locally

    # run with log
    job = simple_job("12345")
    uuid = job.uuid
    responses = run_locally(job, store=memory_jobstore)

    # check responses has been filled
    assert responses[uuid][1].output == "12345_end"

    # check store has the activity output
    result = memory_jobstore.query_one({"uuid": uuid})
    assert result["output"] == "12345_end"

    # test run no store
    job = simple_job("12345")
    uuid = job.uuid
    responses = run_locally(job)
    assert responses[uuid][1].output == "12345_end"
Exemple #7
0
def test_stored_data_flow(memory_jobstore, clean_dir, stored_data_flow,
                          capsys):
    from jobflow import run_locally

    flow = stored_data_flow()

    responses = run_locally(flow, store=memory_jobstore)
    captured = capsys.readouterr()

    # check responses has been filled
    assert len(responses) == 1
    assert "Response.stored_data is not supported" in captured.out
Exemple #8
0
def test_simple_flow(memory_jobstore, clean_dir, simple_flow, capsys):
    from pathlib import Path

    from jobflow import run_locally

    flow = simple_flow()
    uuid = flow.jobs[0].uuid

    # run without log
    run_locally(flow, store=memory_jobstore, log=False)
    captured = capsys.readouterr()
    assert "INFO Started executing jobs locally" not in captured.out
    assert "INFO Finished executing jobs locally" not in captured.out

    # run with log
    responses = run_locally(flow, store=memory_jobstore)

    # check responses has been filled
    assert responses[uuid][1].output == "12345_end"

    # check store has the activity output
    result = memory_jobstore.query_one({"uuid": uuid})
    assert result["output"] == "12345_end"

    # check no folders were written
    folders = list(Path(".").glob("job_*/"))
    assert len(folders) == 0

    # check logs printed
    captured = capsys.readouterr()
    assert "INFO Started executing jobs locally" in captured.out
    assert "INFO Finished executing jobs locally" in captured.out

    # run with folders
    responses = run_locally(flow, store=memory_jobstore, create_folders=True)
    assert responses[uuid][1].output == "12345_end"
    folders = list(Path(".").glob("job_*/"))
    assert len(folders) == 1
Exemple #9
0
def test_stop_jobflow_job(memory_jobstore, clean_dir, stop_jobflow_job):
    from jobflow import run_locally

    job = stop_jobflow_job()
    uuid1 = job.uuid

    # run with log
    responses = run_locally(job, store=memory_jobstore)

    # check responses has been filled
    assert len(responses) == 1
    assert len(responses[uuid1]) == 1
    assert responses[uuid1][1].output == "1234"
    assert responses[uuid1][1].stop_jobflow is True

    # check store has the activity output
    result1 = memory_jobstore.query_one({"uuid": uuid1})

    assert result1["output"] == "1234"
Exemple #10
0
def test_addition_flow(memory_jobstore, clean_dir, addition_flow):
    from jobflow import run_locally

    flow = addition_flow()
    uuid1 = flow.jobs[0].uuid

    # run with log
    responses = run_locally(flow, store=memory_jobstore)
    uuid2 = [u for u in responses.keys() if u != uuid1][0]

    # check responses has been filled
    assert len(responses) == 2
    assert responses[uuid1][1].output == 11
    assert responses[uuid1][1].addition is not None
    assert responses[uuid2][1].output == "11_end"

    # check store has the activity output
    result1 = memory_jobstore.query_one({"uuid": uuid1})
    result2 = memory_jobstore.query_one({"uuid": uuid2})

    assert result1["output"] == 11
    assert result2["output"] == "11_end"
Exemple #11
0
def connect_name(first_name, second_name):
    return f"{first_name} {second_name}"


@job
def print_inputs(inputs):
    print(inputs)


def get_name_flow():
    first_name = generate_first_name()
    second_name = generate_second_name()
    full_name = connect_name(first_name.output, second_name.output)
    return Flow([first_name, second_name, full_name],
                full_name.output,
                name="Get Name")


name_flow_a = get_name_flow()
name_flow_b = get_name_flow()
print_job = print_inputs([name_flow_a.output, name_flow_b.output])

# create a new flow to contain the nested flow
outer_flow = Flow([name_flow_a, name_flow_b, print_job])

# draw the flow graph
outer_flow.draw_graph().show()

# run the flow
run_locally(outer_flow)
Exemple #12
0
@job
def encode_message(message):
    """Encode a message using base64."""
    from base64 import b64encode

    return b64encode(message.encode()).decode()


@job
def decode_message(message):
    """Decode a message from base64."""
    from base64 import b64decode

    return b64decode(message.encode()).decode()


# Create two jobs, the first to encode a message and the second to decode it.
encode = encode_message("Lo, a shadow of horror is risen")
decode = decode_message(encode.output)

# Create a flow containing the jobs. The order of the jobs doesn't matter and will be
# determined by the connectivity of the jobs.
flow = Flow([encode, decode])

# draw the flow graph
flow.draw_graph().show()

# run the flow, "output" contains the output of all jobs
output = run_locally(flow)
print(output)
Exemple #13
0
    jobs = []
    for website in websites:
        time_job = time_website(website)
        time_job.name = f"time {website}"
        jobs.append(time_job)

    output = [j.output for j in jobs]
    return Response(replace=Flow(jobs, output))


@job
def sum_times(times: List[float]):
    return sum(times)


# create a flow that will:
# 1. load a list of websites from a file
# 2. generate one new job for each website to time the website loading
# 3. sum all the times together
read_websites_job = read_websites()
timings_job = start_timing_jobs(read_websites_job.output)
sum_job = sum_times(timings_job.output)
flow = Flow([read_websites_job, timings_job, sum_job])

# draw the flow graph
flow.draw_graph().show()

# run the flow, "responses" contains the output of all jobs
responses = run_locally(flow)
print(responses)
Exemple #14
0
from maggma.stores import MemoryStore

from jobflow import JobStore, job, run_locally


@job(data=True)
def generate_big_data():
    """
    Generate some data.

    The data=True in the job decorator tells jobflow to store all outputs in the "data"
    additional store.
    """
    mydata = list(range(1000))
    return mydata


big_data_job = generate_big_data()

# in this example, we use different memory stores for the documents and "data"
# additional store. In practice, any Maggma Store subclass can be used for either store.
docs_store = MemoryStore()
data_store = MemoryStore()
store = JobStore(docs_store, additional_stores={"data": data_store})

# Because our job requires an additional store named "data" we have to use our
# custom store when running the job.
output = run_locally(big_data_job, store=store)

print(output)
Exemple #15
0
"""A dynamic workflow that calculates the Fibonacci sequence."""
from jobflow import Response, job, run_locally


@job
def fibonacci(smaller, larger, stop_point=1000):
    """Calculate the next number in the Fibonacci sequence.

    If the number is larger than stop_point, the job will stop the workflow
    execution, otherwise, a new job will be submitted to calculate the next number.
    """
    total = smaller + larger

    if total > stop_point:
        return total

    new_job = fibonacci(larger, total, stop_point=stop_point)
    return Response(output=total, addition=new_job)


fibonacci_job = fibonacci(1, 1)

# run the job; responses will contain the output from all jobs
responses = run_locally(fibonacci_job)
print(responses)