Beispiel #1
0
    def setUp(self):
        self.mock_component_run = ComponentRun("mock_component_run")
        self.mock_component_run_dict = {
            "component_name": "mock_component_run",
            "notes": "",
            "inputs": [],
            "outputs": [],
            "git_hash": None,
            "git_tags": None,
            "code_snapshot": None,
            "start_timestamp": None,
            "end_timestamp": None,
            "dependencies": [],
            "id": None,
            "stale": [],
            "test_result": None,
            "mlflow_run_id": None,
            "mlflow_run_params": None,
            "mlflow_run_metrics": None,
        }

        self.mock_inputs = [
            IOPointer("mock_input_1"),
            IOPointer("mock_input_2"),
        ]
        self.mock_outputs = [
            IOPointer("mock_output_1"),
            IOPointer("mock_output_2"),
        ]
Beispiel #2
0
def get_history(component_name: str,
                limit: int = 10) -> typing.List[ComponentRun]:
    """Returns a list of ComponentRuns that are part of the component's
    history."""
    store = Store(_db_uri)

    history = store.get_history(component_name, limit)

    # Convert to client-facing ComponentRuns
    component_runs = []
    for cr in history:
        inputs = [
            IOPointer.from_dictionary(iop.__dict__).to_dictionary()
            for iop in cr.inputs
        ]
        outputs = [
            IOPointer.from_dictionary(iop.__dict__).to_dictionary()
            for iop in cr.outputs
        ]
        dependencies = [dep.component_name for dep in cr.dependencies]
        d = copy.deepcopy(cr.__dict__)
        d.update({
            "inputs": inputs,
            "outputs": outputs,
            "dependencies": dependencies
        })
        component_runs.append(ComponentRun.from_dictionary(d))

    return component_runs
Beispiel #3
0
    def setUp(self):
        self.mock_component_run = ComponentRun("mock_component_run")
        self.mock_component_run_dict = {
            "component_name": "mock_component_run",
            "inputs": [],
            "outputs": [],
            "git_hash": None,
            "code_snapshot": None,
            "start_timestamp": None,
            "end_timestamp": None,
            "dependencies": [],
            "id": None,
            "stale": [],
        }

        self.mock_inputs = [IOPointer("mock_input_1"), IOPointer("mock_input_2")]
        self.mock_outputs = [IOPointer("mock_output_1"), IOPointer("mock_output_2")]
Beispiel #4
0
def log_component_run(
        component_run: ComponentRun,
        set_dependencies_from_inputs=True,
        staleness_threshold: int = (60 * 60 * 24 * 30),
):
    """Takes client-facing ComponentRun object and logs it to the DB."""
    store = Store(_db_uri)

    # Make dictionary object
    component_run_dict = component_run.to_dictionary()

    component_run_sql = store.initialize_empty_component_run(
        component_run.component_name)

    # Add relevant attributes
    if component_run_dict["start_timestamp"]:
        component_run_sql.set_start_timestamp(
            component_run_dict["start_timestamp"])

    if component_run_dict["end_timestamp"]:
        component_run_sql.set_end_timestamp(
            component_run_dict["end_timestamp"])

    if component_run_dict["notes"]:
        component_run_sql.add_notes(component_run_dict["notes"])

    component_run_sql.set_git_hash(component_run_dict["git_hash"])
    component_run_sql.set_git_tags(component_run_dict["git_tags"])
    component_run_sql.set_code_snapshot(component_run_dict["code_snapshot"])

    # Add I/O
    component_run_sql.add_inputs([
        store.get_io_pointer(inp.name,
                             inp.value,
                             pointer_type=inp.pointer_type)
        for inp in component_run_dict["inputs"]
    ])
    component_run_sql.add_outputs([
        store.get_io_pointer(out.name,
                             out.value,
                             pointer_type=out.pointer_type)
        for out in component_run_dict["outputs"]
    ])

    # Create component if it does not exist
    create_component(component_run.component_name, "", "")

    # Add dependencies if there is flag to automatically set
    if set_dependencies_from_inputs:
        store.set_dependencies_from_inputs(component_run_sql)

    # Add dependencies explicitly stored in the component run
    for dependency in component_run_dict["dependencies"]:
        cr = store.get_history(dependency, 1)[0]
        component_run_sql.set_upstream(cr)

    store.commit_component_run(component_run_sql,
                               staleness_threshold=staleness_threshold)
Beispiel #5
0
    def testLogEmptyComponentRun(self):
        # Create component then log a run of it
        create_component("test_component", "test_description", "shreya")

        # Create a ComponentRun
        cr = ComponentRun("test_component")

        with self.assertRaises(RuntimeError):
            log_component_run(cr)
Beispiel #6
0
    def testLogKVComponentRun(self):
        # Tests implementation of values in iopointer
        create_component(
            name="valtest",
            description="Tests implementation of values in iopointer.",
            owner="me",
        )

        iop1 = ["this", "is", "the", "first"]
        iop2 = ["this", "is", "the", "second"]

        # Create iopointers and CR
        iop1 = IOPointer(name="iop1", value=iop1)
        iop2 = IOPointer(name="iop2", value=iop2)

        cr = ComponentRun("valtest")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop1)
        cr.add_output(iop2)
        log_component_run(cr)
Beispiel #7
0
def backtrace(output_pointer: str):
    """Prints trace for an output id.
    Returns list of tuples (level, ComponentRun) where level is how
    many hops away the node is from the node that produced the output_id."""
    store = Store(_db_uri)
    trace = store.trace(output_pointer)

    # Convert to entities.ComponentRun
    component_runs = []
    for depth, cr in trace:
        inputs = [IOPointer.from_dictionary(iop.__dict__) for iop in cr.inputs]
        outputs = [IOPointer.from_dictionary(iop.__dict__) for iop in cr.outputs]
        dependencies = [dep.component_name for dep in cr.dependencies]
        d = copy.deepcopy(cr.__dict__)
        d.update({"inputs": inputs, "outputs": outputs, "dependencies": dependencies})
        component_runs.append((depth, ComponentRun.from_dictionary(d)))

    return component_runs
Beispiel #8
0
def get_component_run_information(component_run_id: str) -> ComponentRun:
    """Returns a ComponentRun object."""
    store = Store(_db_uri)
    cr = store.get_component_run(component_run_id)
    if not cr:
        raise RuntimeError(f"Component run with id {id} not found.")
    inputs = [
        IOPointer.from_dictionary(iop.__dict__).to_dictionary() for iop in cr.inputs
    ]
    outputs = [
        IOPointer.from_dictionary(iop.__dict__).to_dictionary() for iop in cr.outputs
    ]
    dependencies = [dep.component_name for dep in cr.dependencies]
    d = copy.deepcopy(cr.__dict__)
    if cr.code_snapshot:
        d.update({"code_snapshot": str(cr.code_snapshot.decode("utf-8"))})
    d.update({"inputs": inputs, "outputs": outputs, "dependencies": dependencies})
    return ComponentRun.from_dictionary(d)
Beispiel #9
0
def get_history(
    component_name: str,
    limit: int = 10,
    date_lower: typing.Union[datetime, str] = datetime.min,
    date_upper: typing.Union[datetime, str] = datetime.max,
) -> typing.List[ComponentRun]:
    """Returns a list of ComponentRuns that are part of the component's
    history."""
    store = Store(_db_uri)

    # Check if none
    if not date_lower:
        date_lower = datetime.min
    if not date_upper:
        date_upper = datetime.max

    history = store.get_history(component_name, limit, date_lower, date_upper)

    # Convert to client-facing ComponentRuns
    component_runs = []
    for cr in history:
        inputs = [
            IOPointer.from_dictionary(iop.__dict__).to_dictionary()
            for iop in cr.inputs
        ]
        outputs = [
            IOPointer.from_dictionary(iop.__dict__).to_dictionary()
            for iop in cr.outputs
        ]
        dependencies = [dep.component_name for dep in cr.dependencies]
        d = copy.deepcopy(cr.__dict__)
        d.update({
            "inputs": inputs,
            "outputs": outputs,
            "dependencies": dependencies,
        })
        component_runs.append(ComponentRun.from_dictionary(d))

    return component_runs
Beispiel #10
0
class TestComponentRun(unittest.TestCase):
    def setUp(self):
        self.mock_component_run = ComponentRun("mock_component_run")
        self.mock_component_run_dict = {
            "component_name": "mock_component_run",
            "notes": "",
            "inputs": [],
            "outputs": [],
            "git_hash": None,
            "git_tags": None,
            "code_snapshot": None,
            "start_timestamp": None,
            "end_timestamp": None,
            "dependencies": [],
            "id": None,
            "stale": [],
            "test_result": None,
            "mlflow_run_id": None,
            "mlflow_run_params": None,
            "mlflow_run_metrics": None,
        }

        self.mock_inputs = [
            IOPointer("mock_input_1"),
            IOPointer("mock_input_2"),
        ]
        self.mock_outputs = [
            IOPointer("mock_output_1"),
            IOPointer("mock_output_2"),
        ]

    def testSerialize(self):
        """
        Test the serialization functionality.
        """
        self.assertEqual(
            self.mock_component_run.to_dictionary(),
            self.mock_component_run_dict,
        )

    def testSetStartEndError(self):
        """
        Test that setting start and end ts as non
        datetime types throws an error.
        """

        with self.assertRaises(TypeError):
            self.mock_component_run.set_start_timestamp("incorrect_type")

        with self.assertRaises(TypeError):
            self.mock_component_run.set_end_timestamp("incorrect_type")

    def testAddInputOutput(self):
        cr = copy.deepcopy(self.mock_component_run)
        for inp in self.mock_inputs:
            cr.add_input(inp)
        for out in self.mock_outputs:
            cr.add_output(out)

        self.assertEqual(cr.inputs, list(set(self.mock_inputs)))
        self.assertEqual(cr.outputs, list(set(self.mock_outputs)))

    def testAddInputsOutputs(self):
        cr = copy.deepcopy(self.mock_component_run)
        cr.add_inputs(self.mock_inputs)
        cr.add_outputs(self.mock_outputs)

        self.assertEqual(cr.inputs, list(set(self.mock_inputs)))
        self.assertEqual(cr.outputs, list(set(self.mock_outputs)))

    def testAddDuplicateInputs(self):
        cr = copy.deepcopy(self.mock_component_run)
        cr.add_inputs(self.mock_inputs)
        cr.add_inputs(self.mock_inputs)

        self.assertEqual(cr.inputs, list(set(self.mock_inputs)))

    def testAddNotes(self):
        cr = copy.deepcopy(self.mock_component_run)
        expected_output = "this is a test note"
        cr.notes = "this is a test note"

        self.assertEqual(cr.notes, expected_output)

    def testAddNotesError(self):
        """
        Test that adding non-str input to the notes attribute
        gives a TypeError
        """
        with self.assertRaises(TypeError):
            self.mock_component_run.notes = ["incorrect_type"]
Beispiel #11
0
def inference(model_files) -> str:
    identifier = "".join(
        random.choice(string.ascii_lowercase) for i in range(10))
    return identifier


if __name__ == "__main__":
    # Run training once
    version = "0"
    first_model_file = training(version)

    # Fake a component run from 2 months ago
    now = datetime.utcnow()
    cr = ComponentRun(
        "some_old_component",
        start_timestamp=now.replace(month=now.month - 2),
        end_timestamp=now,
    )
    second_model_file = "model_1"
    cr.add_input("1")
    cr.add_output(second_model_file)
    log_component_run(cr)

    # Run training again
    version = "2"
    third_model_file = training(version)

    # Run inference on old model file. This should be stale!
    first_identifier = inference([first_model_file, second_model_file])
    print(first_identifier)
Beispiel #12
0
class TestComponentRun(unittest.TestCase):
    def setUp(self):
        self.mock_component_run = ComponentRun("mock_component_run")
        self.mock_component_run_dict = {
            "component_name": "mock_component_run",
            "inputs": [],
            "outputs": [],
            "git_hash": None,
            "code_snapshot": None,
            "start_timestamp": None,
            "end_timestamp": None,
            "dependencies": [],
            "id": None,
            "stale": [],
        }

        self.mock_inputs = [IOPointer("mock_input_1"), IOPointer("mock_input_2")]
        self.mock_outputs = [IOPointer("mock_output_1"), IOPointer("mock_output_2")]

    def testSerialize(self):
        """
        Test the serialization functionality.
        """
        self.assertEqual(
            self.mock_component_run.to_dictionary(), self.mock_component_run_dict
        )

    def testSetStartEndError(self):
        """
        Test that setting start and end ts as non datetime types throws an error.
        """

        with self.assertRaises(TypeError):
            self.mock_component_run.set_start_timestamp("incorrect_type")

        with self.assertRaises(TypeError):
            self.mock_component_run.set_end_timestamp("incorrect_type")

    def testAddInputOutput(self):
        cr = copy.deepcopy(self.mock_component_run)
        for inp in self.mock_inputs:
            cr.add_input(inp)
        for out in self.mock_outputs:
            cr.add_output(out)

        self.assertEqual(cr.inputs, list(set(self.mock_inputs)))
        self.assertEqual(cr.outputs, list(set(self.mock_outputs)))

    def testAddInputsOutputs(self):
        cr = copy.deepcopy(self.mock_component_run)
        cr.add_inputs(self.mock_inputs)
        cr.add_outputs(self.mock_outputs)

        self.assertEqual(cr.inputs, list(set(self.mock_inputs)))
        self.assertEqual(cr.outputs, list(set(self.mock_outputs)))

    def testAddDuplicateInputs(self):
        cr = copy.deepcopy(self.mock_component_run)
        cr.add_inputs(self.mock_inputs)
        cr.add_inputs(self.mock_inputs)

        self.assertEqual(cr.inputs, list(set(self.mock_inputs)))
Beispiel #13
0
    def testLogBasicComponentRun(self):
        # Create component then log a run of it
        create_component("test_component", "test_description", "shreya")

        # Create a ComponentRun
        cr = ComponentRun(component_name="test_component")
        cr.set_start_timestamp()
        cr.code_snapshot = b"def main(): return"
        cr.add_inputs(["duplicate_input", "duplicate_input"])
        cr.add_outputs(["duplicate_output", "duplicate_output"])
        cr.set_end_timestamp()

        # Log component run
        log_component_run(cr)