예제 #1
0
def review_flagged_outputs():
    """Finds common ComponentRuns for a group of flagged outputs.
    Returns a list of ComponentRuns and occurrence counts in the
    group of flagged outputs, sorted by descending count and then
    alphabetically."""
    store = Store(_db_uri)
    return store.review_flagged_outputs()
예제 #2
0
class TestStore(unittest.TestCase):
    def setUp(self):
        self.store = Store("test")

    def testComponent(self):
        self.store.create_component("test_component", "test_description",
                                    "shreya")
        component = self.store.get_component("test_component")
        self.assertEqual(component.name, "test_component")

        # Retrieve components with owner
        components = self.store.get_components(owner="shreya")
        self.assertEqual(1, len(components))

    def testCompleteComponentRun(self):
        # Create component
        self.store.create_component("test_component", "test_description",
                                    "shreya")

        # Create component run
        cr = self.store.initialize_empty_component_run("test_component")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(IOPointer("inp"))
        cr.add_output(IOPointer("out"))
        self.store.commit_component_run(cr)

        # Test retrieval
        component_runs = self.store.get_history("test_component", limit=None)
        self.assertEqual(1, len(component_runs))
        self.assertEqual(component_runs[0], cr)

    def testLogComponentRunWithoutComponentCreated(self):
        # Create a ComponentRun
        cr = self.store.initialize_empty_component_run("test_component_new")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(IOPointer("inp"))
        cr.add_output(IOPointer("out"))
        self.store.commit_component_run(cr)

        # Test retrieval
        component_runs = self.store.get_history("test_component_new",
                                                limit=None)
        self.assertEqual(1, len(component_runs))
        self.assertEqual(component_runs[0], cr)

    def testIncompleteComponentRun(self):
        # Create component
        self.store.create_component("test_component", "test_description",
                                    "shreya")

        # Create incomplete component run
        cr = self.store.initialize_empty_component_run("test_component")
        with self.assertRaises(RuntimeError):
            self.store.commit_component_run(cr)

    def testTags(self):
        # Create component without tags
        self.store.create_component("test_component", "test_description",
                                    "shreya")

        # Add tags
        self.store.add_tags_to_component("test_component", ["tag1", "tag2"])

        # Test retrieval
        component = self.store.get_component("test_component")
        tags = [t.name for t in component.tags]
        self.assertEqual(component.name, "test_component")
        self.assertEqual(set(tags), set(["tag1", "tag2"]))

    def testDuplicateTags(self):
        # Create component without tags
        self.store.create_component("test_component", "test_description",
                                    "shreya")

        # Add duplicate tags
        self.store.add_tags_to_component("test_component", ["tag1", "tag1"])

        # Test retrieval
        component = self.store.get_component("test_component")
        tags = [t.name for t in component.tags]
        self.assertEqual(component.name, "test_component")
        self.assertEqual(tags, ["tag1"])

    def testIOPointer(self):
        # Test there is no IOPointer
        with self.assertRaises(RuntimeError):
            self.store.get_io_pointer("iop", create=False)

        # Create IOPointer
        iop = self.store.get_io_pointer("iop")
        iop2 = self.store.get_io_pointer("iop")

        self.assertEqual(iop, iop2)

    def testIOPointers(self):
        # Create new IOPointers from scratch
        iop_names = [f"iop_{i}" for i in range(100)]
        iops = self.store.get_io_pointers(iop_names)
        iops2 = self.store.get_io_pointers(iop_names)

        self.assertEqual(set(iops), set(iops2))

    def testKVIOPointer(self):
        iop_name = "name"
        iop_value = "value"

        iop = self.store.get_io_pointer(iop_name, iop_value)
        iop2 = self.store.get_io_pointer(iop_name, iop_value)

        self.assertEqual(iop, iop2)

    def testSetDependenciesFromInputs(self):
        # Create IO pointers
        inp = self.store.get_io_pointer("inp")
        out = self.store.get_io_pointer("out")
        another_out = self.store.get_io_pointer("another_out")

        # Create two component runs that have the same output
        self.store.create_component("test_component", "test_description",
                                    "shreya")
        for idx in range(2):
            cr = self.store.initialize_empty_component_run("test_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(out)
            self.store.commit_component_run(cr)

        # Create another two component runs that have the same output
        self.store.create_component("test_component", "test_description",
                                    "shreya")
        for idx in range(2):
            cr = self.store.initialize_empty_component_run("test_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(another_out)
            self.store.commit_component_run(cr)

        # Create new component run that depends on "out" pointer
        cr = self.store.initialize_empty_component_run("test_component")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_inputs([out, another_out])
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Retrieve latest component run and check dependencies
        component_runs = self.store.get_history("test_component", limit=None)
        self.assertTrue(component_runs[1] in component_runs[0].dependencies)
        self.assertTrue(component_runs[3] in component_runs[0].dependencies)

    def _set_up_computation(self):
        # Create dag of computation
        # Create component and IOPointers
        for i in range(1, 5):
            self.store.create_component(f"test_component_{i}",
                                        "test_description", "shreya")

        iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 5)]

        # Create component runs
        cr1 = self.store.initialize_empty_component_run("test_component_1")
        cr1.set_start_timestamp()
        cr1.set_end_timestamp()
        cr1.add_output(iop[0])
        self.store.set_dependencies_from_inputs(cr1)
        self.store.commit_component_run(cr1)

        cr2 = self.store.initialize_empty_component_run("test_component_2")
        cr2.set_start_timestamp()
        cr2.set_end_timestamp()
        cr2.add_output(iop[0])
        self.store.set_dependencies_from_inputs(cr2)
        self.store.commit_component_run(cr2)

        cr3 = self.store.initialize_empty_component_run("test_component_3")
        cr3.set_start_timestamp()
        cr3.set_end_timestamp()
        cr3.add_input(iop[0])
        cr3.add_outputs([iop[1], iop[2]])
        self.store.set_dependencies_from_inputs(cr3)
        self.store.commit_component_run(cr3)

        cr4 = self.store.initialize_empty_component_run("test_component_4")
        cr4.set_start_timestamp()
        cr4.set_end_timestamp()
        cr4.add_input(iop[2])
        cr4.add_output(iop[3])
        self.store.set_dependencies_from_inputs(cr4)
        self.store.commit_component_run(cr4)

    def testTrace(self):
        self._set_up_computation()

        # Call trace functionality
        trace = self.store.trace("iop_4")
        level_id = [(level, cr.id) for level, cr in trace]

        self.assertEqual(level_id, [(0, 4), (1, 3), (2, 2)])

    def testEmptyTrace(self):
        with self.assertRaises(RuntimeError):
            self.store.trace("some_weird_pointer")
        with self.assertRaises(RuntimeError):
            self.store.web_trace("some_weird_pointer")

    def testWebTrace(self):
        self._set_up_computation()

        # Call web trace functionality. The ordering is nondeterministic.
        expected_res = [{
            "id":
            "componentrun_4",
            "label":
            "test_component_4",
            "hasCaret":
            True,
            "isExpanded":
            True,
            "stale": [],
            "childNodes": [
                {
                    "id": "iopointer_iop_4",
                    "label": "iop_4",
                    "hasCaret": False,
                    "parent": "componentrun_4",
                },
                {
                    "id":
                    "componentrun_3",
                    "label":
                    "test_component_3",
                    "hasCaret":
                    True,
                    "isExpanded":
                    True,
                    "stale": [],
                    "childNodes": [
                        {
                            "id": "iopointer_iop_2",
                            "label": "iop_2",
                            "hasCaret": False,
                            "parent": "componentrun_3",
                        },
                        {
                            "id": "iopointer_iop_3",
                            "label": "iop_3",
                            "hasCaret": False,
                            "parent": "componentrun_3",
                        },
                        {
                            "id":
                            "componentrun_2",
                            "label":
                            "test_component_2",
                            "hasCaret":
                            True,
                            "isExpanded":
                            True,
                            "stale": [],
                            "childNodes": [{
                                "id": "iopointer_iop_1",
                                "label": "iop_1",
                                "hasCaret": False,
                                "parent": "componentrun_2",
                            }],
                        },
                    ],
                },
            ],
        }]
        web_trace = self.store.web_trace("iop_4")

        self.assertEqual(web_trace, expected_res)

    def testBasicFlaggedOutputs(self):
        # Create components and iopointers
        self.store.create_component("test_component_A", "test_description",
                                    "shreya")
        self.store.create_component("test_component_B", "test_description",
                                    "shreya")

        iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 5)]

        # Create component runs
        # First pipeline
        cr_A1 = self.store.initialize_empty_component_run("test_component_A")
        cr_A1.set_start_timestamp()
        cr_A1.set_end_timestamp()
        cr_A1.add_outputs([iop[0], iop[1]])
        self.store.set_dependencies_from_inputs(cr_A1)
        self.store.commit_component_run(cr_A1)
        cr_B1 = self.store.initialize_empty_component_run("test_component_B")
        cr_B1.set_start_timestamp()
        cr_B1.set_end_timestamp()
        cr_B1.add_input(iop[0])
        cr_B1.add_output(iop[2])
        self.store.set_dependencies_from_inputs(cr_B1)
        self.store.commit_component_run(cr_B1)
        # Second pipeline, which builds off iop2
        cr_B2 = self.store.initialize_empty_component_run("test_component_B")
        cr_B2.set_start_timestamp()
        cr_B2.set_end_timestamp()
        cr_B2.add_input(iop[1])
        cr_B2.add_output(iop[3])
        self.store.set_dependencies_from_inputs(cr_B2)
        self.store.commit_component_run(cr_B2)

        # Flag iop_3 and iop_4
        self.store.set_io_pointer_flag("iop_3", True)
        self.store.set_io_pointer_flag("iop_4", True)

        # Run diagnose. It should output
        # [component_A, component_B, component_B]'s corresponding run IDs
        _, res = self.store.review_flagged_outputs()
        res = [(cr.id, count) for cr, count in res]
        expected_res = [(1, 2), (3, 1), (2, 1)]
        self.assertEqual(res, expected_res)

    def testManyFlaggedOutputs(self):
        # Create components and iopointers
        self.store.create_component("test_component_A", "test_description",
                                    "shreya")
        self.store.create_component("test_component_B", "test_description",
                                    "shreya")
        self.store.create_component("test_component_C", "test_description",
                                    "shreya")

        iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 8)]
        # Create component runs
        # First pipeline
        cr_A1 = self.store.initialize_empty_component_run("test_component_A")
        cr_A1.set_start_timestamp()
        cr_A1.set_end_timestamp()
        cr_A1.add_outputs([iop[0], iop[1]])
        self.store.set_dependencies_from_inputs(cr_A1)
        self.store.commit_component_run(cr_A1)
        cr_B1 = self.store.initialize_empty_component_run("test_component_B")
        cr_B1.set_start_timestamp()
        cr_B1.set_end_timestamp()
        cr_B1.add_input(iop[0])
        cr_B1.add_output(iop[2])
        self.store.set_dependencies_from_inputs(cr_B1)
        self.store.commit_component_run(cr_B1)
        cr_C1 = self.store.initialize_empty_component_run("test_component_C")
        cr_C1.set_start_timestamp()
        cr_C1.set_end_timestamp()
        cr_C1.add_inputs([iop[1], iop[2]])
        cr_C1.add_output(iop[3])
        self.store.set_dependencies_from_inputs(cr_C1)
        self.store.commit_component_run(cr_C1)

        # Second pipeline
        cr_C2 = self.store.initialize_empty_component_run("test_component_C")
        cr_C2.set_start_timestamp()
        cr_C2.set_end_timestamp()
        cr_C2.add_inputs([iop[1], iop[2]])
        cr_C2.add_output(iop[4])
        self.store.set_dependencies_from_inputs(cr_C2)
        self.store.commit_component_run(cr_C2)

        # Third pipeline
        cr_C3 = self.store.initialize_empty_component_run("test_component_C")
        cr_C3.set_start_timestamp()
        cr_C3.set_end_timestamp()
        cr_C3.add_inputs([iop[1], iop[2]])
        cr_C3.add_output(iop[5])
        self.store.set_dependencies_from_inputs(cr_C3)
        self.store.commit_component_run(cr_C3)

        # Fourth pipeline
        cr_C4 = self.store.initialize_empty_component_run("test_component_C")
        cr_C4.set_start_timestamp()
        cr_C4.set_end_timestamp()
        cr_C4.add_inputs([iop[1], iop[2]])
        cr_C4.add_output(iop[6])
        self.store.set_dependencies_from_inputs(cr_C4)
        self.store.commit_component_run(cr_C4)

        # Flag
        self.store.set_io_pointer_flag("iop_4", True)
        self.store.set_io_pointer_flag("iop_5", True)
        self.store.set_io_pointer_flag("iop_6", True)
        self.store.set_io_pointer_flag("iop_7", True)

        _, res = self.store.review_flagged_outputs()
        res = [(cr.component_name, cr.id, count) for cr, count in res]
        expected_res = [
            ("test_component_B", 2, 4),
            ("test_component_A", 1, 4),
            ("test_component_C", 6, 1),
            ("test_component_C", 5, 1),
            ("test_component_C", 4, 1),
            ("test_component_C", 3, 1),
        ]
        self.assertEqual(res, expected_res)