Beispiel #1
0
def log_component_run(
        component_run: ComponentRun,
        set_dependencies_from_inputs=True,
        staleness_threshold: int = (60 * 60 * 24 * 30),
):
    """Takes client-facing ComponentRun object and logs it to the DB."""
    store = Store(_db_uri)

    # Make dictionary object
    component_run_dict = component_run.to_dictionary()

    component_run_sql = store.initialize_empty_component_run(
        component_run.component_name)

    # Add relevant attributes
    if component_run_dict["start_timestamp"]:
        component_run_sql.set_start_timestamp(
            component_run_dict["start_timestamp"])

    if component_run_dict["end_timestamp"]:
        component_run_sql.set_end_timestamp(
            component_run_dict["end_timestamp"])

    if component_run_dict["notes"]:
        component_run_sql.add_notes(component_run_dict["notes"])

    component_run_sql.set_git_hash(component_run_dict["git_hash"])
    component_run_sql.set_git_tags(component_run_dict["git_tags"])
    component_run_sql.set_code_snapshot(component_run_dict["code_snapshot"])

    # Add I/O
    component_run_sql.add_inputs([
        store.get_io_pointer(inp.name,
                             inp.value,
                             pointer_type=inp.pointer_type)
        for inp in component_run_dict["inputs"]
    ])
    component_run_sql.add_outputs([
        store.get_io_pointer(out.name,
                             out.value,
                             pointer_type=out.pointer_type)
        for out in component_run_dict["outputs"]
    ])

    # Create component if it does not exist
    create_component(component_run.component_name, "", "")

    # Add dependencies if there is flag to automatically set
    if set_dependencies_from_inputs:
        store.set_dependencies_from_inputs(component_run_sql)

    # Add dependencies explicitly stored in the component run
    for dependency in component_run_dict["dependencies"]:
        cr = store.get_history(dependency, 1)[0]
        component_run_sql.set_upstream(cr)

    store.commit_component_run(component_run_sql,
                               staleness_threshold=staleness_threshold)
Beispiel #2
0
class TestDags(unittest.TestCase):
    def setUp(self):
        self.store = Store("test")

    def testLinkedList(self):
        # Create chain of component runs
        expected_result = []
        num_runs = 10
        for i in range(1, num_runs + 1):
            self.store.create_component(f"mock_component_{i}", "", "")
            inp = self.store.get_io_pointer(f"iop_{i}")
            out = self.store.get_io_pointer(f"iop_{i + 1}")
            cr = self.store.initialize_empty_component_run(
                f"mock_component_{i}")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(out)
            self.store.set_dependencies_from_inputs(cr)
            self.store.commit_component_run(cr)
            expected_result.append((num_runs - i, i))

        # Reverse the expected result
        expected_result.reverse()

        # Trace the final output
        trace = self.store.trace("iop_11")
        level_id = [(level, cr.id) for level, cr in trace]
        self.assertEqual(expected_result, level_id)

    def testVersionedComputation(self):
        # Run the same computation many times
        self.store.create_component("mock_component", "", "")
        num_runs = 10
        for i in range(1, num_runs + 1):
            inp = self.store.get_io_pointer("inp")
            out = self.store.get_io_pointer("out")
            cr = self.store.initialize_empty_component_run("mock_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(out)
            self.store.set_dependencies_from_inputs(cr)
            self.store.commit_component_run(cr)

        # Trace the out pointer. Only most recent run ID should show.
        trace = self.store.trace("out")
        self.assertEqual(len(trace), 1)
        self.assertEqual(trace[0][0], 0)
        self.assertEqual(trace[0][1].id, num_runs)

    def testTree(self):
        # Create a tree of component runs, 5 levels deep
        num_levels = 2
        global cr_counter
        global iop_counter
        cr_counter = 1
        iop_counter = 1

        def create_tree(level, inp):
            if level == num_levels:
                return

            global cr_counter
            global iop_counter

            self.store.create_component(f"mock_component_{cr_counter}", "", "")
            cr = self.store.initialize_empty_component_run(
                f"mock_component_{cr_counter}")
            cr_counter += 1
            cr.set_start_timestamp()
            cr.set_end_timestamp()

            # Create output pointers
            out1 = self.store.get_io_pointer(f"iop_{iop_counter}")
            iop_counter += 1
            out2 = self.store.get_io_pointer(f"iop_{iop_counter}")
            iop_counter += 1

            # Add and commit component run
            cr.add_input(inp)
            cr.add_outputs([out1, out2])
            self.store.set_dependencies_from_inputs(cr)
            self.store.commit_component_run(cr)

            # Create left and right trees
            create_tree(level + 1, out1)
            create_tree(level + 1, out2)

        # Create first input pointer and tree of computation
        inp = self.store.get_io_pointer(f"iop_{iop_counter}")
        iop_counter += 1
        create_tree(0, inp)

        # Grab last iop id and trace it
        last_iop_id = f"iop_{iop_counter - 1}"
        trace = self.store.trace(last_iop_id)
        level_id = [(level, cr.id) for level, cr in trace]
        self.assertEqual(level_id, [(0, 3), (1, 1)])

    def testCycle(self):
        # Create cycle. Since dependencies are versioned, we shouldn't run
        # into problems.
        # Create io pointers and components
        iop1 = self.store.get_io_pointer("iop1")
        iop2 = self.store.get_io_pointer("iop2")
        self.store.create_component("component_1", "", "")
        self.store.create_component("component_2", "", "")

        # Create component runs
        cr = self.store.initialize_empty_component_run("component_1")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop1)
        cr.add_output(iop2)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        cr = self.store.initialize_empty_component_run("component_2")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop2)
        cr.add_output(iop1)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Trace iop1
        trace_1 = [(level, cr.id) for level, cr in self.store.trace("iop1")]
        trace_2 = [(level, cr.id) for level, cr in self.store.trace("iop2")]
        self.assertEqual(trace_1, [(0, 2), (1, 1)])
        self.assertEqual(trace_2, [(0, 1)])

    def testStaleUpdate(self):
        # Create computation with stale update.
        iop1 = self.store.get_io_pointer("iop1")
        iop2 = self.store.get_io_pointer("iop2")
        iop3 = self.store.get_io_pointer("iop3")
        iop4 = self.store.get_io_pointer("iop4")
        self.store.create_component("component_1", "", "")
        self.store.create_component("component_2", "", "")

        # Create first component
        cr = self.store.initialize_empty_component_run("component_1")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop1)
        cr.add_output(iop2)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Create second component run
        cr = self.store.initialize_empty_component_run("component_1")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop1)
        cr.add_output(iop3)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Create third component run that depends on the first (stale update)
        cr = self.store.initialize_empty_component_run("component_2")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop2)
        cr.add_output(iop4)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Trace iop4
        trace = [(level, cr.id, cr.stale)
                 for level, cr in self.store.trace("iop4")]
        res = [
            (
                0,
                3,
                [
                    "component_1 (ID 1) has 1 fresher run that began " +
                    "before this component run started."
                ],
            ),
            (1, 1, []),
        ]
        self.assertEqual(trace, res)

    def testStaleTime(self):
        # Create computation with stale update.
        iop1 = self.store.get_io_pointer("iop1")
        iop2 = self.store.get_io_pointer("iop2")
        iop3 = self.store.get_io_pointer("iop3")
        self.store.create_component("component_1", "", "")
        self.store.create_component("component_2", "", "")
        now = datetime.utcnow()

        # Create first component
        cr = self.store.initialize_empty_component_run("component_1")
        start_month = now.month - 2 if now.month > 2 else (12 + now.month) - 2
        start_year = now.year if now.month > 2 else now.year - 1
        start_date = now.replace(month=start_month, year=start_year)
        cr.set_start_timestamp(start_date)
        cr.set_end_timestamp()
        cr.add_input(iop1)
        cr.add_output(iop2)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Create second component run
        cr = self.store.initialize_empty_component_run("component_2")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop2)
        cr.add_output(iop3)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Trace
        trace = [(level, cr.id, cr.stale)
                 for level, cr in self.store.trace("iop3")]
        res = [
            (
                0,
                2,
                [
                    "component_1 (ID 1) was run " +
                    f"{(now - start_date).days} days" + " ago."
                ],
            ),
            (1, 1, []),
        ]
        self.assertEqual(trace, res)
Beispiel #3
0
class TestStore(unittest.TestCase):
    def setUp(self):
        self.store = Store("test")

    def testComponent(self):
        self.store.create_component("test_component", "test_description",
                                    "shreya")
        component = self.store.get_component("test_component")
        self.assertEqual(component.name, "test_component")

        # Retrieve components with owner
        components = self.store.get_components(owner="shreya")
        self.assertEqual(1, len(components))

    def testCompleteComponentRun(self):
        # Create component
        self.store.create_component("test_component", "test_description",
                                    "shreya")

        # Create component run
        cr = self.store.initialize_empty_component_run("test_component")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(IOPointer("inp"))
        cr.add_output(IOPointer("out"))
        self.store.commit_component_run(cr)

        # Test retrieval
        component_runs = self.store.get_history("test_component", limit=None)
        self.assertEqual(1, len(component_runs))
        self.assertEqual(component_runs[0], cr)

    def testLogComponentRunWithoutComponentCreated(self):
        # Create a ComponentRun
        cr = self.store.initialize_empty_component_run("test_component_new")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(IOPointer("inp"))
        cr.add_output(IOPointer("out"))
        self.store.commit_component_run(cr)

        # Test retrieval
        component_runs = self.store.get_history("test_component_new",
                                                limit=None)
        self.assertEqual(1, len(component_runs))
        self.assertEqual(component_runs[0], cr)

    def testIncompleteComponentRun(self):
        # Create component
        self.store.create_component("test_component", "test_description",
                                    "shreya")

        # Create incomplete component run
        cr = self.store.initialize_empty_component_run("test_component")
        with self.assertRaises(RuntimeError):
            self.store.commit_component_run(cr)

    def testTags(self):
        # Create component without tags
        self.store.create_component("test_component", "test_description",
                                    "shreya")

        # Add tags
        self.store.add_tags_to_component("test_component", ["tag1", "tag2"])

        # Test retrieval
        component = self.store.get_component("test_component")
        tags = [t.name for t in component.tags]
        self.assertEqual(component.name, "test_component")
        self.assertEqual(set(tags), set(["tag1", "tag2"]))

    def testDuplicateTags(self):
        # Create component without tags
        self.store.create_component("test_component", "test_description",
                                    "shreya")

        # Add duplicate tags
        self.store.add_tags_to_component("test_component", ["tag1", "tag1"])

        # Test retrieval
        component = self.store.get_component("test_component")
        tags = [t.name for t in component.tags]
        self.assertEqual(component.name, "test_component")
        self.assertEqual(tags, ["tag1"])

    def testIOPointer(self):
        # Test there is no IOPointer
        with self.assertRaises(RuntimeError):
            self.store.get_io_pointer("iop", create=False)

        # Create IOPointer
        iop = self.store.get_io_pointer("iop")
        iop2 = self.store.get_io_pointer("iop")

        self.assertEqual(iop, iop2)

    def testIOPointers(self):
        # Create new IOPointers from scratch
        iop_names = [f"iop_{i}" for i in range(100)]
        iops = self.store.get_io_pointers(iop_names)
        iops2 = self.store.get_io_pointers(iop_names)

        self.assertEqual(set(iops), set(iops2))

    def testKVIOPointer(self):
        iop_name = "name"
        iop_value = "value"

        iop = self.store.get_io_pointer(iop_name, iop_value)
        iop2 = self.store.get_io_pointer(iop_name, iop_value)

        self.assertEqual(iop, iop2)

    def testSetDependenciesFromInputs(self):
        # Create IO pointers
        inp = self.store.get_io_pointer("inp")
        out = self.store.get_io_pointer("out")
        another_out = self.store.get_io_pointer("another_out")

        # Create two component runs that have the same output
        self.store.create_component("test_component", "test_description",
                                    "shreya")
        for idx in range(2):
            cr = self.store.initialize_empty_component_run("test_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(out)
            self.store.commit_component_run(cr)

        # Create another two component runs that have the same output
        self.store.create_component("test_component", "test_description",
                                    "shreya")
        for idx in range(2):
            cr = self.store.initialize_empty_component_run("test_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(another_out)
            self.store.commit_component_run(cr)

        # Create new component run that depends on "out" pointer
        cr = self.store.initialize_empty_component_run("test_component")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_inputs([out, another_out])
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Retrieve latest component run and check dependencies
        component_runs = self.store.get_history("test_component", limit=None)
        self.assertTrue(component_runs[1] in component_runs[0].dependencies)
        self.assertTrue(component_runs[3] in component_runs[0].dependencies)

    def _set_up_computation(self):
        # Create dag of computation
        # Create component and IOPointers
        for i in range(1, 5):
            self.store.create_component(f"test_component_{i}",
                                        "test_description", "shreya")

        iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 5)]

        # Create component runs
        cr1 = self.store.initialize_empty_component_run("test_component_1")
        cr1.set_start_timestamp()
        cr1.set_end_timestamp()
        cr1.add_output(iop[0])
        self.store.set_dependencies_from_inputs(cr1)
        self.store.commit_component_run(cr1)

        cr2 = self.store.initialize_empty_component_run("test_component_2")
        cr2.set_start_timestamp()
        cr2.set_end_timestamp()
        cr2.add_output(iop[0])
        self.store.set_dependencies_from_inputs(cr2)
        self.store.commit_component_run(cr2)

        cr3 = self.store.initialize_empty_component_run("test_component_3")
        cr3.set_start_timestamp()
        cr3.set_end_timestamp()
        cr3.add_input(iop[0])
        cr3.add_outputs([iop[1], iop[2]])
        self.store.set_dependencies_from_inputs(cr3)
        self.store.commit_component_run(cr3)

        cr4 = self.store.initialize_empty_component_run("test_component_4")
        cr4.set_start_timestamp()
        cr4.set_end_timestamp()
        cr4.add_input(iop[2])
        cr4.add_output(iop[3])
        self.store.set_dependencies_from_inputs(cr4)
        self.store.commit_component_run(cr4)

    def testTrace(self):
        self._set_up_computation()

        # Call trace functionality
        trace = self.store.trace("iop_4")
        level_id = [(level, cr.id) for level, cr in trace]

        self.assertEqual(level_id, [(0, 4), (1, 3), (2, 2)])

    def testEmptyTrace(self):
        with self.assertRaises(RuntimeError):
            self.store.trace("some_weird_pointer")
        with self.assertRaises(RuntimeError):
            self.store.web_trace("some_weird_pointer")

    def testWebTrace(self):
        self._set_up_computation()

        # Call web trace functionality. The ordering is nondeterministic.
        expected_res = [{
            "id":
            "componentrun_4",
            "label":
            "test_component_4",
            "hasCaret":
            True,
            "isExpanded":
            True,
            "stale": [],
            "childNodes": [
                {
                    "id": "iopointer_iop_4",
                    "label": "iop_4",
                    "hasCaret": False,
                    "parent": "componentrun_4",
                },
                {
                    "id":
                    "componentrun_3",
                    "label":
                    "test_component_3",
                    "hasCaret":
                    True,
                    "isExpanded":
                    True,
                    "stale": [],
                    "childNodes": [
                        {
                            "id": "iopointer_iop_2",
                            "label": "iop_2",
                            "hasCaret": False,
                            "parent": "componentrun_3",
                        },
                        {
                            "id": "iopointer_iop_3",
                            "label": "iop_3",
                            "hasCaret": False,
                            "parent": "componentrun_3",
                        },
                        {
                            "id":
                            "componentrun_2",
                            "label":
                            "test_component_2",
                            "hasCaret":
                            True,
                            "isExpanded":
                            True,
                            "stale": [],
                            "childNodes": [{
                                "id": "iopointer_iop_1",
                                "label": "iop_1",
                                "hasCaret": False,
                                "parent": "componentrun_2",
                            }],
                        },
                    ],
                },
            ],
        }]
        web_trace = self.store.web_trace("iop_4")

        self.assertEqual(web_trace, expected_res)

    def testBasicFlaggedOutputs(self):
        # Create components and iopointers
        self.store.create_component("test_component_A", "test_description",
                                    "shreya")
        self.store.create_component("test_component_B", "test_description",
                                    "shreya")

        iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 5)]

        # Create component runs
        # First pipeline
        cr_A1 = self.store.initialize_empty_component_run("test_component_A")
        cr_A1.set_start_timestamp()
        cr_A1.set_end_timestamp()
        cr_A1.add_outputs([iop[0], iop[1]])
        self.store.set_dependencies_from_inputs(cr_A1)
        self.store.commit_component_run(cr_A1)
        cr_B1 = self.store.initialize_empty_component_run("test_component_B")
        cr_B1.set_start_timestamp()
        cr_B1.set_end_timestamp()
        cr_B1.add_input(iop[0])
        cr_B1.add_output(iop[2])
        self.store.set_dependencies_from_inputs(cr_B1)
        self.store.commit_component_run(cr_B1)
        # Second pipeline, which builds off iop2
        cr_B2 = self.store.initialize_empty_component_run("test_component_B")
        cr_B2.set_start_timestamp()
        cr_B2.set_end_timestamp()
        cr_B2.add_input(iop[1])
        cr_B2.add_output(iop[3])
        self.store.set_dependencies_from_inputs(cr_B2)
        self.store.commit_component_run(cr_B2)

        # Flag iop_3 and iop_4
        self.store.set_io_pointer_flag("iop_3", True)
        self.store.set_io_pointer_flag("iop_4", True)

        # Run diagnose. It should output
        # [component_A, component_B, component_B]'s corresponding run IDs
        _, res = self.store.review_flagged_outputs()
        res = [(cr.id, count) for cr, count in res]
        expected_res = [(1, 2), (3, 1), (2, 1)]
        self.assertEqual(res, expected_res)

    def testManyFlaggedOutputs(self):
        # Create components and iopointers
        self.store.create_component("test_component_A", "test_description",
                                    "shreya")
        self.store.create_component("test_component_B", "test_description",
                                    "shreya")
        self.store.create_component("test_component_C", "test_description",
                                    "shreya")

        iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 8)]
        # Create component runs
        # First pipeline
        cr_A1 = self.store.initialize_empty_component_run("test_component_A")
        cr_A1.set_start_timestamp()
        cr_A1.set_end_timestamp()
        cr_A1.add_outputs([iop[0], iop[1]])
        self.store.set_dependencies_from_inputs(cr_A1)
        self.store.commit_component_run(cr_A1)
        cr_B1 = self.store.initialize_empty_component_run("test_component_B")
        cr_B1.set_start_timestamp()
        cr_B1.set_end_timestamp()
        cr_B1.add_input(iop[0])
        cr_B1.add_output(iop[2])
        self.store.set_dependencies_from_inputs(cr_B1)
        self.store.commit_component_run(cr_B1)
        cr_C1 = self.store.initialize_empty_component_run("test_component_C")
        cr_C1.set_start_timestamp()
        cr_C1.set_end_timestamp()
        cr_C1.add_inputs([iop[1], iop[2]])
        cr_C1.add_output(iop[3])
        self.store.set_dependencies_from_inputs(cr_C1)
        self.store.commit_component_run(cr_C1)

        # Second pipeline
        cr_C2 = self.store.initialize_empty_component_run("test_component_C")
        cr_C2.set_start_timestamp()
        cr_C2.set_end_timestamp()
        cr_C2.add_inputs([iop[1], iop[2]])
        cr_C2.add_output(iop[4])
        self.store.set_dependencies_from_inputs(cr_C2)
        self.store.commit_component_run(cr_C2)

        # Third pipeline
        cr_C3 = self.store.initialize_empty_component_run("test_component_C")
        cr_C3.set_start_timestamp()
        cr_C3.set_end_timestamp()
        cr_C3.add_inputs([iop[1], iop[2]])
        cr_C3.add_output(iop[5])
        self.store.set_dependencies_from_inputs(cr_C3)
        self.store.commit_component_run(cr_C3)

        # Fourth pipeline
        cr_C4 = self.store.initialize_empty_component_run("test_component_C")
        cr_C4.set_start_timestamp()
        cr_C4.set_end_timestamp()
        cr_C4.add_inputs([iop[1], iop[2]])
        cr_C4.add_output(iop[6])
        self.store.set_dependencies_from_inputs(cr_C4)
        self.store.commit_component_run(cr_C4)

        # Flag
        self.store.set_io_pointer_flag("iop_4", True)
        self.store.set_io_pointer_flag("iop_5", True)
        self.store.set_io_pointer_flag("iop_6", True)
        self.store.set_io_pointer_flag("iop_7", True)

        _, res = self.store.review_flagged_outputs()
        res = [(cr.component_name, cr.id, count) for cr, count in res]
        expected_res = [
            ("test_component_B", 2, 4),
            ("test_component_A", 1, 4),
            ("test_component_C", 6, 1),
            ("test_component_C", 5, 1),
            ("test_component_C", 4, 1),
            ("test_component_C", 3, 1),
        ]
        self.assertEqual(res, expected_res)
Beispiel #4
0
class TestDags(unittest.TestCase):
    def setUp(self):
        self.store = Store("test")

    def testLinkedList(self):
        # Create chain of component runs
        expected_result = []
        num_runs = 10
        for i in range(1, num_runs + 1):
            self.store.create_component(f"mock_component_{i}", "", "")
            inp = self.store.get_io_pointer(f"iop_{i}")
            out = self.store.get_io_pointer(f"iop_{i + 1}")
            cr = self.store.initialize_empty_component_run(
                f"mock_component_{i}")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(out)
            self.store.set_dependencies_from_inputs(cr)
            self.store.commit_component_run(cr)
            expected_result.append((num_runs - i, i))

        # Reverse the expected result
        expected_result.reverse()

        # Trace the final output
        trace = self.store.trace("iop_11")
        level_id = [(l, cr.id) for l, cr in trace]
        self.assertEqual(expected_result, level_id)

    def testVersionedComputation(self):
        # Run the same computation many times
        self.store.create_component("mock_component", "", "")
        num_runs = 10
        for i in range(1, num_runs + 1):
            inp = self.store.get_io_pointer("inp")
            out = self.store.get_io_pointer("out")
            cr = self.store.initialize_empty_component_run("mock_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(out)
            self.store.set_dependencies_from_inputs(cr)
            self.store.commit_component_run(cr)

        # Trace the out pointer. Only most recent run ID should show.
        trace = self.store.trace("out")
        self.assertEqual(len(trace), 1)
        self.assertEqual(trace[0][0], 0)
        self.assertEqual(trace[0][1].id, num_runs)

    def testTree(self):
        # Create a tree of component runs, 5 levels deep
        num_levels = 2
        global cr_counter
        global iop_counter
        cr_counter = 1
        iop_counter = 1

        def create_tree(level, inp):
            if level == num_levels:
                return

            global cr_counter
            global iop_counter

            self.store.create_component(f"mock_component_{cr_counter}", "", "")
            cr = self.store.initialize_empty_component_run(
                f"mock_component_{cr_counter}")
            cr_counter += 1
            cr.set_start_timestamp()
            cr.set_end_timestamp()

            # Create output pointers
            out1 = self.store.get_io_pointer(f"iop_{iop_counter}")
            iop_counter += 1
            out2 = self.store.get_io_pointer(f"iop_{iop_counter}")
            iop_counter += 1

            # Add and commit component run
            cr.add_input(inp)
            cr.add_outputs([out1, out2])
            self.store.set_dependencies_from_inputs(cr)
            self.store.commit_component_run(cr)

            # Create left and right trees
            create_tree(level + 1, out1)
            create_tree(level + 1, out2)

        # Create first input pointer and tree of computation
        inp = self.store.get_io_pointer(f"iop_{iop_counter}")
        iop_counter += 1
        create_tree(0, inp)

        # Grab last iop id and trace it
        last_iop_id = f"iop_{iop_counter - 1}"
        trace = self.store.trace(last_iop_id)
        level_id = [(l, cr.id) for l, cr in trace]
        self.assertEqual(level_id, [(0, 3), (1, 1)])

    def testCycle(self):
        # Create cycle. Since dependencies are versioned, we shouldn't run into problems.
        # Create io pointers and components
        iop1 = self.store.get_io_pointer("iop1")
        iop2 = self.store.get_io_pointer("iop2")
        self.store.create_component("component_1", "", "")
        self.store.create_component("component_2", "", "")

        # Create component runs
        cr = self.store.initialize_empty_component_run("component_1")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop1)
        cr.add_output(iop2)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        cr = self.store.initialize_empty_component_run("component_2")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop2)
        cr.add_output(iop1)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Trace iop1
        trace_1 = [(l, cr.id) for l, cr in self.store.trace("iop1")]
        trace_2 = [(l, cr.id) for l, cr in self.store.trace("iop2")]
        self.assertEqual(trace_1, [(0, 2), (1, 1)])
        self.assertEqual(trace_2, [(0, 1)])
Beispiel #5
0
        def wrapper(*args, **kwargs):
            # Get function information
            filename = inspect.getfile(func)
            function_name = func.__name__

            # Construct component run object
            store = Store(_db_uri)
            component_run = store.initialize_empty_component_run(
                component_name)
            component_run.set_start_timestamp()

            # Define trace helper
            def trace_helper(frame, event, arg):
                if event != "return":
                    return

                logging.info(f"Inspecting {frame.f_code.co_filename}")
                input_pointers = []
                output_pointers = []
                local_vars = frame.f_locals
                # Add input_vars and output_vars as pointers
                for var in input_vars:
                    if var not in local_vars:
                        logging.debug(
                            f"Variable {var} not in current stack frame.")
                        continue
                    val = local_vars[var]
                    if val == None:
                        logging.debug(f"Variable {var} has value {val}.")
                        continue
                    if isinstance(val, list):
                        input_pointers += store.get_io_pointers(val)
                    else:
                        input_pointers.append(store.get_io_pointer(str(val)))
                for var in output_vars:
                    if var not in local_vars:
                        logging.debug(
                            f"Variable {var} not in current stack frame.")
                        continue
                    val = local_vars[var]
                    if val == None:
                        logging.debug(f"Variable {var} has value {val}.")
                        continue
                    if isinstance(val, list):
                        output_pointers += (store.get_io_pointers(
                            val, PointerTypeEnum.ENDPOINT) if endpoint else
                                            store.get_io_pointers(val))
                    else:
                        output_pointers += ([
                            store.get_io_pointer(str(val),
                                                 PointerTypeEnum.ENDPOINT)
                        ] if endpoint else [store.get_io_pointer(str(val))])
                component_run.add_inputs(input_pointers)
                component_run.add_outputs(output_pointers)

            # Define tracer
            def tracer(frame, event, arg):
                if event == "call":
                    if (frame.f_code.co_name == function_name
                            and frame.f_code.co_filename == filename):
                        return trace_helper
                    return

            # Run function under the tracer
            sys.settrace(tracer)
            try:
                value = func(*args, **kwargs)
            finally:
                sys.settrace(None)

            # Log relevant info
            component_run.set_end_timestamp()
            input_pointers = [store.get_io_pointer(inp) for inp in inputs]
            output_pointers = ([
                store.get_io_pointer(out, PointerTypeEnum.ENDPOINT)
                for out in outputs
            ] if endpoint else [store.get_io_pointer(out) for out in outputs])
            component_run.add_inputs(input_pointers)
            component_run.add_outputs(output_pointers)
            store.set_dependencies_from_inputs(component_run)

            # Add code versions
            try:
                repo = git.Repo(search_parent_directories=True)
                component_run.set_git_hash(str(repo.head.object.hexsha))
            except:
                logging.info("No git repo found.")

            # Add source code if less than 2^16
            func_source_code = inspect.getsource(func)
            if len(func_source_code) < 2**16:
                component_run.set_code_snapshot(
                    bytes(func_source_code, "ascii"))

            # Commit component run object to the DB
            store.commit_component_run(component_run)

            return value
Beispiel #6
0
class TestStore(unittest.TestCase):
    def setUp(self):
        self.store = Store("test")

    def testComponent(self):
        self.store.create_component("test_component", "test_description", "shreya")
        component = self.store.get_component("test_component")
        self.assertEqual(component.name, "test_component")

        # Retrieve components with owner
        components = self.store.get_components_with_owner("shreya")
        self.assertEqual(1, len(components))

    def testCompleteComponentRun(self):
        # Create component
        self.store.create_component("test_component", "test_description", "shreya")

        # Create component run
        cr = self.store.initialize_empty_component_run("test_component")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(IOPointer("inp"))
        cr.add_output(IOPointer("out"))
        self.store.commit_component_run(cr)

        # Test retrieval
        component_runs = self.store.get_history("test_component", limit=None)
        self.assertEqual(1, len(component_runs))
        self.assertEqual(component_runs[0], cr)

    def testIncompleteComponentRun(self):
        # Create component
        self.store.create_component("test_component", "test_description", "shreya")

        # Create incomplete component run
        cr = self.store.initialize_empty_component_run("test_component")
        with self.assertRaises(RuntimeError):
            self.store.commit_component_run(cr)

    def testTags(self):
        # Create component without tags
        self.store.create_component("test_component", "test_description", "shreya")

        # Add tags
        self.store.add_tags_to_component("test_component", ["tag1", "tag2"])

        # Test retrieval
        component = self.store.get_component("test_component")
        tags = [t.name for t in component.tags]
        self.assertEqual(component.name, "test_component")
        self.assertEqual(set(tags), set(["tag1", "tag2"]))

    def testDuplicateTags(self):
        # Create component without tags
        self.store.create_component("test_component", "test_description", "shreya")

        # Add duplicate tags
        self.store.add_tags_to_component("test_component", ["tag1", "tag1"])

        # Test retrieval
        component = self.store.get_component("test_component")
        tags = [t.name for t in component.tags]
        self.assertEqual(component.name, "test_component")
        self.assertEqual(tags, ["tag1"])

    def testIOPointer(self):
        # Test there is no IOPointer
        with self.assertRaises(RuntimeError):
            self.store.get_io_pointer("iop", create=False)

        # Create IOPointer
        iop = self.store.get_io_pointer("iop")
        iop2 = self.store.get_io_pointer("iop")

        self.assertEqual(iop, iop2)

    def testIOPointers(self):
        # Create new IOPointers from scratch
        iop_names = [f"iop_{i}" for i in range(100)]
        iops = self.store.get_io_pointers(iop_names)
        iops2 = self.store.get_io_pointers(iop_names)

        self.assertEqual(set(iops), set(iops2))

    def testSetDependenciesFromInputs(self):
        # Create IO pointers
        inp = self.store.get_io_pointer("inp")
        out = self.store.get_io_pointer("out")
        another_out = self.store.get_io_pointer("another_out")

        # Create two component runs that have the same output
        self.store.create_component("test_component", "test_description", "shreya")
        for idx in range(2):
            cr = self.store.initialize_empty_component_run("test_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(out)
            self.store.commit_component_run(cr)

        # Create another two component runs that have the same output
        self.store.create_component("test_component", "test_description", "shreya")
        for idx in range(2):
            cr = self.store.initialize_empty_component_run("test_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(another_out)
            self.store.commit_component_run(cr)

        # Create new component run that depends on "out" pointer
        cr = self.store.initialize_empty_component_run("test_component")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_inputs([out, another_out])
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Retrieve latest component run and check dependencies
        component_runs = self.store.get_history("test_component", limit=None)
        self.assertTrue(component_runs[1] in component_runs[0].dependencies)
        self.assertTrue(component_runs[3] in component_runs[0].dependencies)

    def _set_up_computation(self):
        # Create dag of computation
        # Create component and IOPointers
        self.store.create_component("test_component", "test_description", "shreya")
        iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 5)]

        # Create component runs
        cr1 = self.store.initialize_empty_component_run("test_component")
        cr1.set_start_timestamp()
        cr1.set_end_timestamp()
        cr1.add_output(iop[0])
        self.store.set_dependencies_from_inputs(cr1)
        self.store.commit_component_run(cr1)

        cr2 = self.store.initialize_empty_component_run("test_component")
        cr2.set_start_timestamp()
        cr2.set_end_timestamp()
        cr2.add_output(iop[0])
        self.store.set_dependencies_from_inputs(cr2)
        self.store.commit_component_run(cr2)

        cr3 = self.store.initialize_empty_component_run("test_component")
        cr3.set_start_timestamp()
        cr3.set_end_timestamp()
        cr3.add_input(iop[0])
        cr3.add_outputs([iop[1], iop[2]])
        self.store.set_dependencies_from_inputs(cr3)
        self.store.commit_component_run(cr3)

        cr4 = self.store.initialize_empty_component_run("test_component")
        cr4.set_start_timestamp()
        cr4.set_end_timestamp()
        cr4.add_input(iop[2])
        cr4.add_output(iop[3])
        self.store.set_dependencies_from_inputs(cr4)
        self.store.commit_component_run(cr4)

    def testTrace(self):
        self._set_up_computation()

        # Call trace functionality
        trace = self.store.trace("iop_4")
        level_id = [(l, cr.id) for l, cr in trace]

        self.assertEqual(level_id, [(0, 4), (1, 3), (2, 2)])

    def testEmptyTrace(self):
        with self.assertRaises(RuntimeError):
            self.store.trace("some_weird_pointer")
        with self.assertRaises(RuntimeError):
            self.store.web_trace("some_weird_pointer")

    def testWebTrace(self):
        self._set_up_computation()

        # Call web trace functionality. The ordering is nondeterministic.
        expected_res = [
            {
                "id": "componentrun_4",
                "label": "test_component",
                "hasCaret": True,
                "isExpanded": True,
                "childNodes": [
                    {
                        "id": "iopointer_iop_4",
                        "label": "iop_4",
                        "hasCaret": False,
                        "parent": "componentrun_4",
                    },
                    {
                        "id": "componentrun_3",
                        "label": "test_component",
                        "hasCaret": True,
                        "isExpanded": True,
                        "childNodes": [
                            {
                                "id": "iopointer_iop_2",
                                "label": "iop_2",
                                "hasCaret": False,
                                "parent": "componentrun_3",
                            },
                            {
                                "id": "iopointer_iop_3",
                                "label": "iop_3",
                                "hasCaret": False,
                                "parent": "componentrun_3",
                            },
                            {
                                "id": "componentrun_2",
                                "label": "test_component",
                                "hasCaret": True,
                                "isExpanded": True,
                                "childNodes": [
                                    {
                                        "id": "iopointer_iop_1",
                                        "label": "iop_1",
                                        "hasCaret": False,
                                        "parent": "componentrun_2",
                                    }
                                ],
                            },
                        ],
                    },
                ],
            }
        ]
        web_trace = self.store.web_trace("iop_4")

        self.assertEqual(web_trace, expected_res)
Beispiel #7
0
        def wrapper(*args, **kwargs):
            # Get function information
            filename = inspect.getfile(func)
            function_name = func.__name__

            # Construct component run object
            store = Store(_db_uri)
            component_run = store.initialize_empty_component_run(
                component_name)
            component_run.set_start_timestamp()

            # Define trace helper
            frame = None
            trace = sys.gettrace()

            def trace_helper(_frame, event, arg):
                nonlocal frame
                if frame is None and event == "call":
                    frame = _frame
                    sys.settrace(trace)
                    return trace

            # Run function under the tracer
            sys.settrace(trace_helper)
            try:
                # merge with existing run
                value = func(*args, **kwargs)
            finally:
                sys.settrace(trace)

            component_run.set_end_timestamp()

            # Do logging here
            logging.info(f"Inspecting {frame.f_code.co_filename}")
            input_pointers = []
            output_pointers = []
            local_vars = frame.f_locals

            # Auto log inputs
            if auto_log:
                # Get IOPointers corresponding to args and f_locals
                all_input_args = {
                    k: v.default
                    for k, v in inspect.signature(func).parameters.items()
                    if v.default is not inspect.Parameter.empty
                }
                all_input_args = {
                    **all_input_args,
                    **dict(zip(inspect.getfullargspec(func).args, args)),
                }
                all_input_args = {**all_input_args, **kwargs}
                input_pointers += store.get_io_pointers_from_args(
                    **all_input_args)

            # Add input_vars and output_vars as pointers
            for var in input_vars:
                if var not in local_vars:
                    raise ValueError(
                        f"Variable {var} not in current stack frame.")
                val = local_vars[var]
                if val is None:
                    logging.debug(f"Variable {var} has value {val}.")
                    continue
                if isinstance(val, list):
                    input_pointers += store.get_io_pointers(val)
                else:
                    input_pointers.append(store.get_io_pointer(str(val)))
            for var in output_vars:
                if var not in local_vars:
                    raise ValueError(
                        f"Variable {var} not in current stack frame.")
                val = local_vars[var]
                if val is None:
                    logging.debug(f"Variable {var} has value {val}.")
                    continue
                if isinstance(val, list):
                    output_pointers += (store.get_io_pointers(
                        val, pointer_type=PointerTypeEnum.ENDPOINT) if endpoint
                                        else store.get_io_pointers(val))
                else:
                    output_pointers += ([
                        store.get_io_pointer(
                            str(val), pointer_type=PointerTypeEnum.ENDPOINT)
                    ] if endpoint else [store.get_io_pointer(str(val))])
            # Add input_kwargs and output_kwargs as pointers
            for key, val in input_kwargs.items():
                if key not in local_vars or val not in local_vars:
                    raise ValueError(
                        f"Variables ({key}, {val}) not in current stack frame."
                    )
                if local_vars[key] is None:
                    logging.debug(
                        f"Variable {key} has value {local_vars[key]}.")
                    continue
                if isinstance(local_vars[key], list):
                    if not isinstance(local_vars[val], list) or len(
                            local_vars[key]) != len(local_vars[val]):
                        raise ValueError(
                            f'Value "{val}" does not have the same length as' +
                            f' the key "{key}."')
                    input_pointers += store.get_io_pointers(
                        local_vars[key], values=local_vars[val])
                else:
                    input_pointers.append(
                        store.get_io_pointer(str(local_vars[key]),
                                             local_vars[val]))
            for key, val in output_kwargs.items():
                if key not in local_vars or val not in local_vars:
                    raise ValueError(
                        f"Variables ({key}, {val}) not in current stack frame."
                    )
                if local_vars[key] is None:
                    logging.debug(
                        f"Variable {key} has value {local_vars[key]}.")
                    continue
                if isinstance(local_vars[key], list):
                    if not isinstance(local_vars[val], list) or len(
                            local_vars[key]) != len(local_vars[val]):
                        raise ValueError(
                            f'Value "{val}" does not have the same length as' +
                            f' the key "{key}."')
                    output_pointers += (store.get_io_pointers(
                        local_vars[key],
                        local_vars[val],
                        pointer_type=PointerTypeEnum.ENDPOINT,
                    ) if endpoint else store.get_io_pointers(
                        local_vars[key], local_vars[val]))
                else:
                    output_pointers += ([
                        store.get_io_pointer(
                            str(local_vars[key]),
                            local_vars[val],
                            pointer_type=PointerTypeEnum.ENDPOINT,
                        )
                    ] if endpoint else [
                        store.get_io_pointer(str(local_vars[key]),
                                             local_vars[val])
                    ])

            # Directly specified I/O
            if not callable(inputs):
                input_pointers += [store.get_io_pointer(inp) for inp in inputs]
            input_pointers += [store.get_io_pointer(inp) for inp in inputs]
            output_pointers += ([
                store.get_io_pointer(out,
                                     pointer_type=PointerTypeEnum.ENDPOINT)
                for out in outputs
            ] if endpoint else [store.get_io_pointer(out) for out in outputs])

            # If there were calls to mltrace.load and mltrace.save, log them
            if "_mltrace_loaded_artifacts" in local_vars:
                input_pointers += [
                    store.get_io_pointer(name, val) for name, val in
                    local_vars["_mltrace_loaded_artifacts"].items()
                ]
            if "_mltrace_saved_artifacts" in local_vars:
                output_pointers += [
                    store.get_io_pointer(name, val) for name, val in
                    local_vars["_mltrace_saved_artifacts"].items()
                ]

            func_source_code = inspect.getsource(func)
            if auto_log:
                # Get IOPointers corresponding to args and f_locals
                all_output_args = {
                    k: v
                    for k, v in local_vars.items() if k not in all_input_args
                }
                output_pointers += store.get_io_pointers_from_args(
                    **all_output_args)

            component_run.add_inputs(input_pointers)
            component_run.add_outputs(output_pointers)

            # Add code versions
            try:
                repo = git.Repo(search_parent_directories=True)
                component_run.set_git_hash(str(repo.head.object.hexsha))
            except Exception as e:
                logging.info("No git repo found.")

            # Add git tags
            if get_git_tags() is not None:
                component_run.set_git_tags(get_git_tags())

            # Add source code if less than 2^16
            if len(func_source_code) < 2**16:
                component_run.set_code_snapshot(
                    bytes(func_source_code, "ascii"))

            # Create component if it does not exist
            create_component(component_run.component_name, "", "")

            store.set_dependencies_from_inputs(component_run)

            # Commit component run object to the DB
            store.commit_component_run(component_run,
                                       staleness_threshold=staleness_threshold)

            return value
Beispiel #8
0
            def wrapper(*args, **kwargs):
                # Construct component run object
                store = Store(clientUtils.get_db_uri())
                component_run = store.initialize_empty_component_run(self.name)

                # Assert key names are not in args or kwargs
                if (set(key_names) & set(inspect.getfullargspec(func).args)
                    ) or (set(key_names) & set(kwargs.keys())):
                    raise ValueError(
                        "skip_before or skip_after cannot be in " +
                        f"the arguments of the function {func.__name__}")

                # Make Dictionary of test status
                status = {}

                # Run before tests
                if not user_kwargs.get("skip_before"):
                    all_args = dict(
                        zip(inspect.getfullargspec(func).args, args))
                    all_args = {
                        k if k not in inv_user_kwargs else inv_user_kwargs[k]:
                        v
                        for k, v in all_args.items()
                    }
                    all_args = {**all_args, **kwargs}
                    status.update(self.beforeRun(**all_args))

                # Create input and output pointers
                input_pointers = []
                output_pointers = []

                # Auto log inputs
                if auto_log:
                    # Get IOPointers corresponding to args and f_locals
                    all_input_args = {
                        k: v.default
                        for k, v in inspect.signature(func).parameters.items()
                        if v.default is not inspect.Parameter.empty
                    }
                    all_input_args = {
                        **all_input_args,
                        **dict(zip(inspect.getfullargspec(func).args, args)),
                    }
                    all_input_args = {**all_input_args, **kwargs}
                    input_pointers += store.get_io_pointers_from_args(
                        should_filter=True, **all_input_args)

                def mlflow_start_run_id():
                    nonlocal mlflow_run_id
                    res = mlflow_start_run_copy()
                    if mlflow.active_run():
                        mlflow_run_id = mlflow.active_run().info.run_id
                    return res

                # monkey patching mlflow.start_run method
                mlflow_run_id = None
                mlflow_start_run_copy = mlflow.start_run
                mlflow.start_run = mlflow_start_run_id

                component_run.set_start_timestamp()
                # Run function
                local_vars, value = utils.run_func_capture_locals(
                    func, *args, **kwargs)
                component_run.set_end_timestamp()

                if mlflow_run_id is not None:
                    try:
                        mlflow_run = mlflow.get_run(mlflow_run_id)
                        component_run.set_mlflow_run_id(mlflow_run_id)
                        metrics = mlflow_run.data.metrics
                        params = mlflow_run.data.params
                        component_run.set_mlflow_run_metrics(metrics)
                        component_run.set_mlflow_run_params(params)
                    except Exception as e:
                        logging.warning(
                            f"Mlflow.get_run {mlflow_run_id} failed.")
                mlflow.start_run = mlflow_start_run_copy

                if not callable(input_vars):
                    # Log input and output vars
                    duplicate = input_vars
                    if not isinstance(duplicate, dict):
                        duplicate = {vname: None for vname in input_vars}

                    for var, label_vars in duplicate.items():
                        if var not in local_vars:
                            raise ValueError(
                                f"Variable {var} not in current stack frame.")
                        val = local_vars[var]
                        labels = None
                        if label_vars is not None:
                            try:
                                labels = ([
                                    local_vars[lv] for lv in label_vars
                                ] if isinstance(label_vars, list) else
                                          local_vars[label_vars])
                                if isinstance(labels, str):
                                    labels = [labels]
                            except KeyError:
                                raise ValueError(
                                    f"Variable {label_vars} not " +
                                    f"in current stack frame.")
                        if val is None:
                            logging.debug(f"Variable {var} has value {val}.")
                            continue
                        input_pointers += store.get_io_pointers_from_args(
                            should_filter=False, labels=labels, **{var: val})

                    for var in output_vars:
                        if var not in local_vars:
                            raise ValueError(
                                f"Variable {var} not in current stack frame.")
                        val = local_vars[var]
                        if val is None:
                            logging.debug(f"Variable {var} has value {val}.")
                            continue
                        output_pointers += store.get_io_pointers_from_args(
                            should_filter=False, **{var: val})

                # If there were calls to mltrace.load and mltrace.save, log

                if "_mltrace_loaded_artifacts" in local_vars:
                    input_pointers += [
                        store.get_io_pointer(name, val) for name, val in
                        local_vars["_mltrace_loaded_artifacts"].items()
                    ]
                if "_mltrace_saved_artifacts" in local_vars:
                    output_pointers += [
                        store.get_io_pointer(name, val) for name, val in
                        local_vars["_mltrace_saved_artifacts"].items()
                    ]

                func_source_code = inspect.getsource(func)
                if auto_log:
                    # Get IOPointers corresponding to args and f_locals
                    all_output_args = {
                        k: v
                        for k, v in local_vars.items()
                        if k not in all_input_args
                    }
                    output_pointers += store.get_io_pointers_from_args(
                        should_filter=True, **all_output_args)

                # Check that none of the labels in the inputs are deleted
                store.assert_not_deleted_labels(
                    input_pointers, staleness_threshold=staleness_threshold)
                # Propagate labels
                store.propagate_labels(input_pointers, output_pointers)

                component_run.add_inputs(input_pointers)
                component_run.add_outputs(output_pointers)

                # Add code versions
                try:
                    repo = git.Repo(search_parent_directories=True)
                    component_run.set_git_hash(str(repo.head.object.hexsha))
                except Exception as e:
                    logging.info("No git repo found.")

                # Add git tags
                if client.get_git_tags() is not None:
                    component_run.set_git_tags(client.get_git_tags())

                # Add source code if less than 2^16
                if len(func_source_code) < 2**16:
                    component_run.set_code_snapshot(
                        bytes(func_source_code, "ascii"))

                # Create component if it does not exist
                client.create_component(self.name, self.description,
                                        self.owner, self.tags)

                # Set dependencies
                store.set_dependencies_from_inputs(component_run)

                # Perform after run tests
                if not user_kwargs.get("skip_after"):
                    after_run_args = {
                        k if k not in inv_user_kwargs else inv_user_kwargs[k]:
                        v
                        for k, v in local_vars.items()
                    }
                    status.update(self.afterRun(**after_run_args))

                # update the component's testStatus, convert status to a json
                component_run.set_test_result(status)

                # Commit component run object to the DB
                store.commit_component_run(
                    component_run, staleness_threshold=staleness_threshold)

                return value