class TestStore(unittest.TestCase): def setUp(self): self.store = Store("test") def testComponent(self): self.store.create_component("test_component", "test_description", "shreya") component = self.store.get_component("test_component") self.assertEqual(component.name, "test_component") # Retrieve components with owner components = self.store.get_components_with_owner("shreya") self.assertEqual(1, len(components)) def testCompleteComponentRun(self): # Create component self.store.create_component("test_component", "test_description", "shreya") # Create component run cr = self.store.initialize_empty_component_run("test_component") cr.set_start_timestamp() cr.set_end_timestamp() cr.add_input(IOPointer("inp")) cr.add_output(IOPointer("out")) self.store.commit_component_run(cr) # Test retrieval component_runs = self.store.get_history("test_component", limit=None) self.assertEqual(1, len(component_runs)) self.assertEqual(component_runs[0], cr) def testIncompleteComponentRun(self): # Create component self.store.create_component("test_component", "test_description", "shreya") # Create incomplete component run cr = self.store.initialize_empty_component_run("test_component") with self.assertRaises(RuntimeError): self.store.commit_component_run(cr) def testTags(self): # Create component without tags self.store.create_component("test_component", "test_description", "shreya") # Add tags self.store.add_tags_to_component("test_component", ["tag1", "tag2"]) # Test retrieval component = self.store.get_component("test_component") tags = [t.name for t in component.tags] self.assertEqual(component.name, "test_component") self.assertEqual(set(tags), set(["tag1", "tag2"])) def testDuplicateTags(self): # Create component without tags self.store.create_component("test_component", "test_description", "shreya") # Add duplicate tags self.store.add_tags_to_component("test_component", ["tag1", "tag1"]) # Test retrieval component = self.store.get_component("test_component") tags = [t.name for t in component.tags] self.assertEqual(component.name, "test_component") self.assertEqual(tags, ["tag1"]) def testIOPointer(self): # Test there is no IOPointer with self.assertRaises(RuntimeError): self.store.get_io_pointer("iop", create=False) # Create IOPointer iop = self.store.get_io_pointer("iop") iop2 = self.store.get_io_pointer("iop") self.assertEqual(iop, iop2) def testIOPointers(self): # Create new IOPointers from scratch iop_names = [f"iop_{i}" for i in range(100)] iops = self.store.get_io_pointers(iop_names) iops2 = self.store.get_io_pointers(iop_names) self.assertEqual(set(iops), set(iops2)) def testSetDependenciesFromInputs(self): # Create IO pointers inp = self.store.get_io_pointer("inp") out = self.store.get_io_pointer("out") another_out = self.store.get_io_pointer("another_out") # Create two component runs that have the same output self.store.create_component("test_component", "test_description", "shreya") for idx in range(2): cr = self.store.initialize_empty_component_run("test_component") cr.set_start_timestamp() cr.set_end_timestamp() cr.add_input(inp) cr.add_output(out) self.store.commit_component_run(cr) # Create another two component runs that have the same output self.store.create_component("test_component", "test_description", "shreya") for idx in range(2): cr = self.store.initialize_empty_component_run("test_component") cr.set_start_timestamp() cr.set_end_timestamp() cr.add_input(inp) cr.add_output(another_out) self.store.commit_component_run(cr) # Create new component run that depends on "out" pointer cr = self.store.initialize_empty_component_run("test_component") cr.set_start_timestamp() cr.set_end_timestamp() cr.add_inputs([out, another_out]) self.store.set_dependencies_from_inputs(cr) self.store.commit_component_run(cr) # Retrieve latest component run and check dependencies component_runs = self.store.get_history("test_component", limit=None) self.assertTrue(component_runs[1] in component_runs[0].dependencies) self.assertTrue(component_runs[3] in component_runs[0].dependencies) def _set_up_computation(self): # Create dag of computation # Create component and IOPointers self.store.create_component("test_component", "test_description", "shreya") iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 5)] # Create component runs cr1 = self.store.initialize_empty_component_run("test_component") cr1.set_start_timestamp() cr1.set_end_timestamp() cr1.add_output(iop[0]) self.store.set_dependencies_from_inputs(cr1) self.store.commit_component_run(cr1) cr2 = self.store.initialize_empty_component_run("test_component") cr2.set_start_timestamp() cr2.set_end_timestamp() cr2.add_output(iop[0]) self.store.set_dependencies_from_inputs(cr2) self.store.commit_component_run(cr2) cr3 = self.store.initialize_empty_component_run("test_component") cr3.set_start_timestamp() cr3.set_end_timestamp() cr3.add_input(iop[0]) cr3.add_outputs([iop[1], iop[2]]) self.store.set_dependencies_from_inputs(cr3) self.store.commit_component_run(cr3) cr4 = self.store.initialize_empty_component_run("test_component") cr4.set_start_timestamp() cr4.set_end_timestamp() cr4.add_input(iop[2]) cr4.add_output(iop[3]) self.store.set_dependencies_from_inputs(cr4) self.store.commit_component_run(cr4) def testTrace(self): self._set_up_computation() # Call trace functionality trace = self.store.trace("iop_4") level_id = [(l, cr.id) for l, cr in trace] self.assertEqual(level_id, [(0, 4), (1, 3), (2, 2)]) def testEmptyTrace(self): with self.assertRaises(RuntimeError): self.store.trace("some_weird_pointer") with self.assertRaises(RuntimeError): self.store.web_trace("some_weird_pointer") def testWebTrace(self): self._set_up_computation() # Call web trace functionality. The ordering is nondeterministic. expected_res = [ { "id": "componentrun_4", "label": "test_component", "hasCaret": True, "isExpanded": True, "childNodes": [ { "id": "iopointer_iop_4", "label": "iop_4", "hasCaret": False, "parent": "componentrun_4", }, { "id": "componentrun_3", "label": "test_component", "hasCaret": True, "isExpanded": True, "childNodes": [ { "id": "iopointer_iop_2", "label": "iop_2", "hasCaret": False, "parent": "componentrun_3", }, { "id": "iopointer_iop_3", "label": "iop_3", "hasCaret": False, "parent": "componentrun_3", }, { "id": "componentrun_2", "label": "test_component", "hasCaret": True, "isExpanded": True, "childNodes": [ { "id": "iopointer_iop_1", "label": "iop_1", "hasCaret": False, "parent": "componentrun_2", } ], }, ], }, ], } ] web_trace = self.store.web_trace("iop_4") self.assertEqual(web_trace, expected_res)
class TestStore(unittest.TestCase): def setUp(self): self.store = Store("test") def testComponent(self): self.store.create_component("test_component", "test_description", "shreya") component = self.store.get_component("test_component") self.assertEqual(component.name, "test_component") # Retrieve components with owner components = self.store.get_components(owner="shreya") self.assertEqual(1, len(components)) def testCompleteComponentRun(self): # Create component self.store.create_component("test_component", "test_description", "shreya") # Create component run cr = self.store.initialize_empty_component_run("test_component") cr.set_start_timestamp() cr.set_end_timestamp() cr.add_input(IOPointer("inp")) cr.add_output(IOPointer("out")) self.store.commit_component_run(cr) # Test retrieval component_runs = self.store.get_history("test_component", limit=None) self.assertEqual(1, len(component_runs)) self.assertEqual(component_runs[0], cr) def testLogComponentRunWithoutComponentCreated(self): # Create a ComponentRun cr = self.store.initialize_empty_component_run("test_component_new") cr.set_start_timestamp() cr.set_end_timestamp() cr.add_input(IOPointer("inp")) cr.add_output(IOPointer("out")) self.store.commit_component_run(cr) # Test retrieval component_runs = self.store.get_history("test_component_new", limit=None) self.assertEqual(1, len(component_runs)) self.assertEqual(component_runs[0], cr) def testIncompleteComponentRun(self): # Create component self.store.create_component("test_component", "test_description", "shreya") # Create incomplete component run cr = self.store.initialize_empty_component_run("test_component") with self.assertRaises(RuntimeError): self.store.commit_component_run(cr) def testTags(self): # Create component without tags self.store.create_component("test_component", "test_description", "shreya") # Add tags self.store.add_tags_to_component("test_component", ["tag1", "tag2"]) # Test retrieval component = self.store.get_component("test_component") tags = [t.name for t in component.tags] self.assertEqual(component.name, "test_component") self.assertEqual(set(tags), set(["tag1", "tag2"])) def testDuplicateTags(self): # Create component without tags self.store.create_component("test_component", "test_description", "shreya") # Add duplicate tags self.store.add_tags_to_component("test_component", ["tag1", "tag1"]) # Test retrieval component = self.store.get_component("test_component") tags = [t.name for t in component.tags] self.assertEqual(component.name, "test_component") self.assertEqual(tags, ["tag1"]) def testIOPointer(self): # Test there is no IOPointer with self.assertRaises(RuntimeError): self.store.get_io_pointer("iop", create=False) # Create IOPointer iop = self.store.get_io_pointer("iop") iop2 = self.store.get_io_pointer("iop") self.assertEqual(iop, iop2) def testIOPointers(self): # Create new IOPointers from scratch iop_names = [f"iop_{i}" for i in range(100)] iops = self.store.get_io_pointers(iop_names) iops2 = self.store.get_io_pointers(iop_names) self.assertEqual(set(iops), set(iops2)) def testKVIOPointer(self): iop_name = "name" iop_value = "value" iop = self.store.get_io_pointer(iop_name, iop_value) iop2 = self.store.get_io_pointer(iop_name, iop_value) self.assertEqual(iop, iop2) def testSetDependenciesFromInputs(self): # Create IO pointers inp = self.store.get_io_pointer("inp") out = self.store.get_io_pointer("out") another_out = self.store.get_io_pointer("another_out") # Create two component runs that have the same output self.store.create_component("test_component", "test_description", "shreya") for idx in range(2): cr = self.store.initialize_empty_component_run("test_component") cr.set_start_timestamp() cr.set_end_timestamp() cr.add_input(inp) cr.add_output(out) self.store.commit_component_run(cr) # Create another two component runs that have the same output self.store.create_component("test_component", "test_description", "shreya") for idx in range(2): cr = self.store.initialize_empty_component_run("test_component") cr.set_start_timestamp() cr.set_end_timestamp() cr.add_input(inp) cr.add_output(another_out) self.store.commit_component_run(cr) # Create new component run that depends on "out" pointer cr = self.store.initialize_empty_component_run("test_component") cr.set_start_timestamp() cr.set_end_timestamp() cr.add_inputs([out, another_out]) self.store.set_dependencies_from_inputs(cr) self.store.commit_component_run(cr) # Retrieve latest component run and check dependencies component_runs = self.store.get_history("test_component", limit=None) self.assertTrue(component_runs[1] in component_runs[0].dependencies) self.assertTrue(component_runs[3] in component_runs[0].dependencies) def _set_up_computation(self): # Create dag of computation # Create component and IOPointers for i in range(1, 5): self.store.create_component(f"test_component_{i}", "test_description", "shreya") iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 5)] # Create component runs cr1 = self.store.initialize_empty_component_run("test_component_1") cr1.set_start_timestamp() cr1.set_end_timestamp() cr1.add_output(iop[0]) self.store.set_dependencies_from_inputs(cr1) self.store.commit_component_run(cr1) cr2 = self.store.initialize_empty_component_run("test_component_2") cr2.set_start_timestamp() cr2.set_end_timestamp() cr2.add_output(iop[0]) self.store.set_dependencies_from_inputs(cr2) self.store.commit_component_run(cr2) cr3 = self.store.initialize_empty_component_run("test_component_3") cr3.set_start_timestamp() cr3.set_end_timestamp() cr3.add_input(iop[0]) cr3.add_outputs([iop[1], iop[2]]) self.store.set_dependencies_from_inputs(cr3) self.store.commit_component_run(cr3) cr4 = self.store.initialize_empty_component_run("test_component_4") cr4.set_start_timestamp() cr4.set_end_timestamp() cr4.add_input(iop[2]) cr4.add_output(iop[3]) self.store.set_dependencies_from_inputs(cr4) self.store.commit_component_run(cr4) def testTrace(self): self._set_up_computation() # Call trace functionality trace = self.store.trace("iop_4") level_id = [(level, cr.id) for level, cr in trace] self.assertEqual(level_id, [(0, 4), (1, 3), (2, 2)]) def testEmptyTrace(self): with self.assertRaises(RuntimeError): self.store.trace("some_weird_pointer") with self.assertRaises(RuntimeError): self.store.web_trace("some_weird_pointer") def testWebTrace(self): self._set_up_computation() # Call web trace functionality. The ordering is nondeterministic. expected_res = [{ "id": "componentrun_4", "label": "test_component_4", "hasCaret": True, "isExpanded": True, "stale": [], "childNodes": [ { "id": "iopointer_iop_4", "label": "iop_4", "hasCaret": False, "parent": "componentrun_4", }, { "id": "componentrun_3", "label": "test_component_3", "hasCaret": True, "isExpanded": True, "stale": [], "childNodes": [ { "id": "iopointer_iop_2", "label": "iop_2", "hasCaret": False, "parent": "componentrun_3", }, { "id": "iopointer_iop_3", "label": "iop_3", "hasCaret": False, "parent": "componentrun_3", }, { "id": "componentrun_2", "label": "test_component_2", "hasCaret": True, "isExpanded": True, "stale": [], "childNodes": [{ "id": "iopointer_iop_1", "label": "iop_1", "hasCaret": False, "parent": "componentrun_2", }], }, ], }, ], }] web_trace = self.store.web_trace("iop_4") self.assertEqual(web_trace, expected_res) def testBasicFlaggedOutputs(self): # Create components and iopointers self.store.create_component("test_component_A", "test_description", "shreya") self.store.create_component("test_component_B", "test_description", "shreya") iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 5)] # Create component runs # First pipeline cr_A1 = self.store.initialize_empty_component_run("test_component_A") cr_A1.set_start_timestamp() cr_A1.set_end_timestamp() cr_A1.add_outputs([iop[0], iop[1]]) self.store.set_dependencies_from_inputs(cr_A1) self.store.commit_component_run(cr_A1) cr_B1 = self.store.initialize_empty_component_run("test_component_B") cr_B1.set_start_timestamp() cr_B1.set_end_timestamp() cr_B1.add_input(iop[0]) cr_B1.add_output(iop[2]) self.store.set_dependencies_from_inputs(cr_B1) self.store.commit_component_run(cr_B1) # Second pipeline, which builds off iop2 cr_B2 = self.store.initialize_empty_component_run("test_component_B") cr_B2.set_start_timestamp() cr_B2.set_end_timestamp() cr_B2.add_input(iop[1]) cr_B2.add_output(iop[3]) self.store.set_dependencies_from_inputs(cr_B2) self.store.commit_component_run(cr_B2) # Flag iop_3 and iop_4 self.store.set_io_pointer_flag("iop_3", True) self.store.set_io_pointer_flag("iop_4", True) # Run diagnose. It should output # [component_A, component_B, component_B]'s corresponding run IDs _, res = self.store.review_flagged_outputs() res = [(cr.id, count) for cr, count in res] expected_res = [(1, 2), (3, 1), (2, 1)] self.assertEqual(res, expected_res) def testManyFlaggedOutputs(self): # Create components and iopointers self.store.create_component("test_component_A", "test_description", "shreya") self.store.create_component("test_component_B", "test_description", "shreya") self.store.create_component("test_component_C", "test_description", "shreya") iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 8)] # Create component runs # First pipeline cr_A1 = self.store.initialize_empty_component_run("test_component_A") cr_A1.set_start_timestamp() cr_A1.set_end_timestamp() cr_A1.add_outputs([iop[0], iop[1]]) self.store.set_dependencies_from_inputs(cr_A1) self.store.commit_component_run(cr_A1) cr_B1 = self.store.initialize_empty_component_run("test_component_B") cr_B1.set_start_timestamp() cr_B1.set_end_timestamp() cr_B1.add_input(iop[0]) cr_B1.add_output(iop[2]) self.store.set_dependencies_from_inputs(cr_B1) self.store.commit_component_run(cr_B1) cr_C1 = self.store.initialize_empty_component_run("test_component_C") cr_C1.set_start_timestamp() cr_C1.set_end_timestamp() cr_C1.add_inputs([iop[1], iop[2]]) cr_C1.add_output(iop[3]) self.store.set_dependencies_from_inputs(cr_C1) self.store.commit_component_run(cr_C1) # Second pipeline cr_C2 = self.store.initialize_empty_component_run("test_component_C") cr_C2.set_start_timestamp() cr_C2.set_end_timestamp() cr_C2.add_inputs([iop[1], iop[2]]) cr_C2.add_output(iop[4]) self.store.set_dependencies_from_inputs(cr_C2) self.store.commit_component_run(cr_C2) # Third pipeline cr_C3 = self.store.initialize_empty_component_run("test_component_C") cr_C3.set_start_timestamp() cr_C3.set_end_timestamp() cr_C3.add_inputs([iop[1], iop[2]]) cr_C3.add_output(iop[5]) self.store.set_dependencies_from_inputs(cr_C3) self.store.commit_component_run(cr_C3) # Fourth pipeline cr_C4 = self.store.initialize_empty_component_run("test_component_C") cr_C4.set_start_timestamp() cr_C4.set_end_timestamp() cr_C4.add_inputs([iop[1], iop[2]]) cr_C4.add_output(iop[6]) self.store.set_dependencies_from_inputs(cr_C4) self.store.commit_component_run(cr_C4) # Flag self.store.set_io_pointer_flag("iop_4", True) self.store.set_io_pointer_flag("iop_5", True) self.store.set_io_pointer_flag("iop_6", True) self.store.set_io_pointer_flag("iop_7", True) _, res = self.store.review_flagged_outputs() res = [(cr.component_name, cr.id, count) for cr, count in res] expected_res = [ ("test_component_B", 2, 4), ("test_component_A", 1, 4), ("test_component_C", 6, 1), ("test_component_C", 5, 1), ("test_component_C", 4, 1), ("test_component_C", 3, 1), ] self.assertEqual(res, expected_res)
def wrapper(*args, **kwargs): # Get function information filename = inspect.getfile(func) function_name = func.__name__ # Construct component run object store = Store(_db_uri) component_run = store.initialize_empty_component_run( component_name) component_run.set_start_timestamp() # Define trace helper frame = None trace = sys.gettrace() def trace_helper(_frame, event, arg): nonlocal frame if frame is None and event == "call": frame = _frame sys.settrace(trace) return trace # Run function under the tracer sys.settrace(trace_helper) try: # merge with existing run value = func(*args, **kwargs) finally: sys.settrace(trace) component_run.set_end_timestamp() # Do logging here logging.info(f"Inspecting {frame.f_code.co_filename}") input_pointers = [] output_pointers = [] local_vars = frame.f_locals # Auto log inputs if auto_log: # Get IOPointers corresponding to args and f_locals all_input_args = { k: v.default for k, v in inspect.signature(func).parameters.items() if v.default is not inspect.Parameter.empty } all_input_args = { **all_input_args, **dict(zip(inspect.getfullargspec(func).args, args)), } all_input_args = {**all_input_args, **kwargs} input_pointers += store.get_io_pointers_from_args( **all_input_args) # Add input_vars and output_vars as pointers for var in input_vars: if var not in local_vars: raise ValueError( f"Variable {var} not in current stack frame.") val = local_vars[var] if val is None: logging.debug(f"Variable {var} has value {val}.") continue if isinstance(val, list): input_pointers += store.get_io_pointers(val) else: input_pointers.append(store.get_io_pointer(str(val))) for var in output_vars: if var not in local_vars: raise ValueError( f"Variable {var} not in current stack frame.") val = local_vars[var] if val is None: logging.debug(f"Variable {var} has value {val}.") continue if isinstance(val, list): output_pointers += (store.get_io_pointers( val, pointer_type=PointerTypeEnum.ENDPOINT) if endpoint else store.get_io_pointers(val)) else: output_pointers += ([ store.get_io_pointer( str(val), pointer_type=PointerTypeEnum.ENDPOINT) ] if endpoint else [store.get_io_pointer(str(val))]) # Add input_kwargs and output_kwargs as pointers for key, val in input_kwargs.items(): if key not in local_vars or val not in local_vars: raise ValueError( f"Variables ({key}, {val}) not in current stack frame." ) if local_vars[key] is None: logging.debug( f"Variable {key} has value {local_vars[key]}.") continue if isinstance(local_vars[key], list): if not isinstance(local_vars[val], list) or len( local_vars[key]) != len(local_vars[val]): raise ValueError( f'Value "{val}" does not have the same length as' + f' the key "{key}."') input_pointers += store.get_io_pointers( local_vars[key], values=local_vars[val]) else: input_pointers.append( store.get_io_pointer(str(local_vars[key]), local_vars[val])) for key, val in output_kwargs.items(): if key not in local_vars or val not in local_vars: raise ValueError( f"Variables ({key}, {val}) not in current stack frame." ) if local_vars[key] is None: logging.debug( f"Variable {key} has value {local_vars[key]}.") continue if isinstance(local_vars[key], list): if not isinstance(local_vars[val], list) or len( local_vars[key]) != len(local_vars[val]): raise ValueError( f'Value "{val}" does not have the same length as' + f' the key "{key}."') output_pointers += (store.get_io_pointers( local_vars[key], local_vars[val], pointer_type=PointerTypeEnum.ENDPOINT, ) if endpoint else store.get_io_pointers( local_vars[key], local_vars[val])) else: output_pointers += ([ store.get_io_pointer( str(local_vars[key]), local_vars[val], pointer_type=PointerTypeEnum.ENDPOINT, ) ] if endpoint else [ store.get_io_pointer(str(local_vars[key]), local_vars[val]) ]) # Directly specified I/O if not callable(inputs): input_pointers += [store.get_io_pointer(inp) for inp in inputs] input_pointers += [store.get_io_pointer(inp) for inp in inputs] output_pointers += ([ store.get_io_pointer(out, pointer_type=PointerTypeEnum.ENDPOINT) for out in outputs ] if endpoint else [store.get_io_pointer(out) for out in outputs]) # If there were calls to mltrace.load and mltrace.save, log them if "_mltrace_loaded_artifacts" in local_vars: input_pointers += [ store.get_io_pointer(name, val) for name, val in local_vars["_mltrace_loaded_artifacts"].items() ] if "_mltrace_saved_artifacts" in local_vars: output_pointers += [ store.get_io_pointer(name, val) for name, val in local_vars["_mltrace_saved_artifacts"].items() ] func_source_code = inspect.getsource(func) if auto_log: # Get IOPointers corresponding to args and f_locals all_output_args = { k: v for k, v in local_vars.items() if k not in all_input_args } output_pointers += store.get_io_pointers_from_args( **all_output_args) component_run.add_inputs(input_pointers) component_run.add_outputs(output_pointers) # Add code versions try: repo = git.Repo(search_parent_directories=True) component_run.set_git_hash(str(repo.head.object.hexsha)) except Exception as e: logging.info("No git repo found.") # Add git tags if get_git_tags() is not None: component_run.set_git_tags(get_git_tags()) # Add source code if less than 2^16 if len(func_source_code) < 2**16: component_run.set_code_snapshot( bytes(func_source_code, "ascii")) # Create component if it does not exist create_component(component_run.component_name, "", "") store.set_dependencies_from_inputs(component_run) # Commit component run object to the DB store.commit_component_run(component_run, staleness_threshold=staleness_threshold) return value