def test_queue_manager_single_tags(compute_adapter_fixture): client, server, adapter = compute_adapter_fixture reset_server_database(server) manager_stuff = queue.QueueManager(client, adapter, queue_tag="stuff") manager_other = queue.QueueManager(client, adapter, queue_tag="other") # Add compute hooh = ptl.data.get_molecule("hooh.json") ret = client.add_compute("rdkit", "UFF", "", "energy", None, [hooh], tag="other") # Computer with the incorrect tag manager_stuff.await_results() ret = client.query_results() assert len(ret) == 0 # Computer with the correct tag manager_other.await_results() ret = client.query_results() assert len(ret) == 1 # Check the logs to make sure managers = server.storage.get_managers()["data"] assert len(managers) == 2 test_results = {"stuff": 0, "other": 1} for manager in managers: value = test_results[manager["tag"]] assert manager["submitted"] == value assert manager["completed"] == value assert manager["username"] == CLIENT_USERNAME
def test_queue_ordering_priority(fractal_compute_server): reset_server_database(fractal_compute_server) client = ptl.FractalClient(fractal_compute_server) mol1 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 1.1") mol2 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 2.2") mol3 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 3.3") ret1 = client.add_compute("rdkit", "uff", "", "energy", None, mol1).ids[0] ret2 = client.add_compute("RDKIT", "UFF", "", "energy", None, mol2, priority="high").ids[0] ret3 = client.add_compute("RDKIT", "UFF", "", "energy", None, mol3, priority="HIGH").ids[0] queue_id1 = fractal_compute_server.storage.queue_get_next( "manager", ["rdkit"], [], limit=1)[0].base_result.id queue_id2 = fractal_compute_server.storage.queue_get_next( "manager", ["RDkit"], [], limit=1)[0].base_result.id queue_id3 = fractal_compute_server.storage.queue_get_next( "manager", ["RDKIT"], [], limit=1)[0].base_result.id assert queue_id1 == ret2 assert queue_id2 == ret3 assert queue_id3 == ret1
def test_adapter_error_message(managed_compute_server): client, server, manager = managed_compute_server reset_server_database(server) manager.heartbeat() # Re-register with server after clear # HOOH without connectivity, RDKit should fail hooh = ptl.data.get_molecule("hooh.json").dict() del hooh["connectivity"] mol_ret = client.add_molecules([hooh]) ret = client.add_compute("rdkit", "UFF", "", "energy", None, mol_ret) queue_id = ret.submitted[0] # Nothing should have happened yet assert len(manager.list_current_tasks()) == 0 # Pull out a special iteration on the queue manager manager.update() assert len(manager.list_current_tasks()) == 1 manager.await_results() assert len(manager.list_current_tasks()) == 0 ret = client.query_results(id=ret.ids) assert len(ret) == 1 error = ret[0].get_error() assert "connectivity graph" in error.error_message server.objects["storage_socket"].queue_mark_complete([queue_id])
def test_queue_manager_heartbeat(compute_adapter_fixture): """Tests to ensure tasks are returned to queue when the manager shuts down """ client, _, adapter = compute_adapter_fixture with testing.loop_in_thread() as loop: # Build server, manually handle IOLoop (no start/stop needed) server = FractalServer( port=testing.find_open_port(), storage_project_name="qcf_heartbeat_checker_test", loop=loop, ssl_options=False, heartbeat_frequency=0.1) # Clean and re-init the database testing.reset_server_database(server) client = ptl.FractalClient(server) manager = queue.QueueManager(client, adapter) sman = server.list_managers(name=manager.name()) assert len(sman) == 1 assert sman[0]["status"] == "ACTIVE" # Make sure interval exceeds heartbeat time time.sleep(1) server.check_manager_heartbeats() sman = server.list_managers(name=manager.name()) assert len(sman) == 1 assert sman[0]["status"] == "INACTIVE"
def test_queue_manager_shutdown(compute_manager_fixture): """Tests to ensure tasks are returned to queue when the manager shuts down """ client, server, lpad = compute_manager_fixture reset_server_database(server) manager = queue.QueueManager(client, lpad) hooh = portal.data.get_molecule("hooh.json") ret = client.add_compute("rdkit", "UFF", "", "energy", None, [hooh.to_json()], tag="other") # Pull job to manager and shutdown manager.update() assert len(manager.list_current_tasks()) == 1 manager.shutdown() # Boot new manager and await results manager = queue.QueueManager(client, lpad) manager.await_results() ret = client.get_results() assert len(ret) == 1
def test_queue_manager_shutdown(compute_adapter_fixture): """Tests to ensure tasks are returned to queue when the manager shuts down """ client, server, adapter = compute_adapter_fixture reset_server_database(server) manager = queue.QueueManager(client, adapter) hooh = ptl.data.get_molecule("hooh.json") ret = client.add_compute("rdkit", "UFF", "", "energy", None, [hooh.json_dict()], tag="other") # Pull job to manager and shutdown manager.update() assert len(manager.list_current_tasks()) == 1 assert manager.shutdown()["nshutdown"] == 1 sman = server.list_managers(name=manager.name()) assert len(sman) == 1 assert sman[0]["status"] == "INACTIVE" # Boot new manager and await results manager = queue.QueueManager(client, adapter) manager.await_results() ret = client.query_results() assert len(ret) == 1
def test_queue_manager_statistics(compute_adapter_fixture, caplog): """Test statistics are correctly generated""" # Setup manager and add some compute client, server, adapter = compute_adapter_fixture reset_server_database(server) manager = queue.QueueManager(client, adapter, verbose=True) hooh = ptl.data.get_molecule("hooh.json") client.add_compute("rdkit", "UFF", "", "energy", None, [hooh.json_dict()], tag="other") # Set capture level with caplog_handler_at_level(caplog, logging.INFO): # Pull jobs to manager manager.update() # Tasks should not have been started yet assert "Task statistics unavailable" in caplog.text assert "Task Stats: Processed" not in caplog.text manager.await_results() # Ensure text is at least generated assert "Task Stats: Processed" in caplog.text assert "Core Usage vs. Max Resources" in caplog.text # Ensure some kind of stats are being calculated seemingly correctly stats_re = re.search(r'Core Usage Efficiency: (\d+\.\d+)%', caplog.text) assert stats_re is not None and float(stats_re.group(1)) != 0.0 # Clean up capture so it does not flood the output caplog.records.clear() caplog.handler.records.clear()
def test_queue_error(fractal_compute_server): reset_server_database(fractal_compute_server) client = ptl.FractalClient(fractal_compute_server) hooh = ptl.data.get_molecule("hooh.json").json_dict() del hooh["connectivity"] compute_ret = client.add_compute("rdkit", "UFF", "", "energy", None, hooh) # Pull out a special iteration on the queue manager fractal_compute_server.update_tasks() assert len(fractal_compute_server.list_current_tasks()) == 1 fractal_compute_server.await_results() assert len(fractal_compute_server.list_current_tasks()) == 0 # Pull from database, raw JSON db = fractal_compute_server.objects["storage_socket"] queue_ret = db.get_queue(status="ERROR")["data"] result = db.get_results(id=compute_ret.ids)['data'][0] assert len(queue_ret) == 1 assert "connectivity graph" in queue_ret[0].error.error_message assert result['status'] == 'ERROR' # Force a complete mark and test fractal_compute_server.objects["storage_socket"].queue_mark_complete( [queue_ret[0].id]) result = db.get_results(id=compute_ret.ids)['data'][0] assert result['status'] == 'COMPLETE'
def test_task_molecule_no_orientation(data, fractal_compute_server): """ Molecule orientation should not change on compute """ # Reset database each run reset_server_database(fractal_compute_server) client = ptl.FractalClient(fractal_compute_server) mol = ptl.Molecule(symbols=["H", "H"], geometry=[0, 0, 0, 0, 5, 0], connectivity=[(0, 1, 1)]) mol_id = client.add_molecules([mol])[0] program, method, basis = data ret = client.add_compute(program, method, basis, "energy", None, [mol_id]) # Manually handle the compute fractal_compute_server.await_results() # Check for the single result ret = client.query_results(id=ret.submitted) assert len(ret) == 1 assert ret[0].status == "COMPLETE" assert ret[0].molecule == mol_id # Make sure no other molecule was added ret = client.query_molecules(molecular_formula=["H2"]) assert len(ret) == 1 assert ret[0].id == mol_id
def test_queue_query_manager(fractal_compute_server): reset_server_database(fractal_compute_server) client = ptl.FractalClient(fractal_compute_server) mol1 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 1.1") mol2 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 2.2") mol3 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 3.3") ret1 = client.add_compute("rdkit", "uff", "", "energy", None, mol1).ids[0] ret2 = client.add_compute("RDKIT", "UFF", "", "energy", None, mol2).ids[0] ret3 = client.add_compute("RDKIT", "UFF", "", "energy", None, mol3).ids[0] manager = get_manager_name(fractal_compute_server) fractal_compute_server.storage.queue_get_next(manager, ["rdkit"], [], limit=1)[0] tasks_manager = client.query_tasks(manager=manager) assert len(tasks_manager) == 1 assert tasks_manager[0].base_result == ret1 fractal_compute_server.storage.queue_get_next(manager, ["RDkit"], [], limit=1)[0] fractal_compute_server.storage.queue_get_next(manager, ["RDKIT"], [], limit=1)[0] tasks_manager = client.query_tasks(manager=manager) assert len(tasks_manager) == 3
def test_queue_manager_server_delay(compute_adapter_fixture): """Test to ensure interrupts to the server shutdown correctly""" client, server, adapter = compute_adapter_fixture reset_server_database(server) manager = queue.QueueManager(client, adapter, server_error_retries=1) hooh = ptl.data.get_molecule("hooh.json") client.add_compute("rdkit", "UFF", "", "energy", None, [hooh.json_dict()], tag="other") # Pull job to manager and shutdown manager.update() assert len(manager.list_current_tasks()) == 1 # Mock a network error client._mock_network_error = True # Let the calculation finish manager.queue_adapter.await_results() # Try to push the changes through the network error manager.update() assert len(manager.list_current_tasks()) == 0 assert len(manager._stale_payload_tracking) == 1 assert manager.n_stale_jobs == 0 # Try again to push the tracked attempts into stale manager.update() assert len(manager.list_current_tasks()) == 0 assert len(manager._stale_payload_tracking) == 0 assert manager.n_stale_jobs == 1 # Update again to push jobs to stale manager.update() # Return the jobs to the server client._mock_network_error = False assert manager.shutdown()["nshutdown"] == 1 # Once more, but this time restart the server in between manager = queue.QueueManager(client, adapter, server_error_retries=1) manager.update() assert len(manager.list_current_tasks()) == 1 manager.queue_adapter.await_results() # Trigger our failure client._mock_network_error = True manager.update() assert len(manager.list_current_tasks()) == 0 assert len(manager._stale_payload_tracking) == 1 assert manager.n_stale_jobs == 0 # Stop mocking a network error client._mock_network_error = False manager.update() assert len(manager.list_current_tasks()) == 0 assert len(manager._stale_payload_tracking) == 0 assert manager.n_stale_jobs == 0
def test_queue_order_procedure_priority(fractal_compute_server): reset_server_database(fractal_compute_server) client = ptl.FractalClient(fractal_compute_server) geometric_options = { "keywords": None, "qc_spec": { "driver": "gradient", "method": "UFF", "basis": "", "keywords": None, "program": "rdkit" }, } mol1 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 1.1") mol2 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 2.2") mol3 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 3.3") ret1 = client.add_procedure("optimization", "geometric", geometric_options, [mol1]).ids[0] ret2 = client.add_procedure("OPTIMIZATION", "geometric", geometric_options, [mol2], priority="high").ids[0] ret3 = client.add_procedure("OPTimization", "GEOmetric", geometric_options, [mol3], priority="HIGH").ids[0] manager = get_manager_name(fractal_compute_server) assert len( fractal_compute_server.storage.queue_get_next(manager, ["rdkit"], [], limit=1)) == 0 assert len( fractal_compute_server.storage.queue_get_next(manager, ["rdkit"], ["geom"], limit=1)) == 0 assert len( fractal_compute_server.storage.queue_get_next(manager, ["prog1"], ["geometric"], limit=1)) == 0 queue_id1 = fractal_compute_server.storage.queue_get_next( manager, ["rdkit"], ["geometric"], limit=1)[0].base_result.id queue_id2 = fractal_compute_server.storage.queue_get_next( manager, ["RDKIT"], ["geometric"], limit=1)[0].base_result.id queue_id3 = fractal_compute_server.storage.queue_get_next( manager, ["rdkit"], ["GEOMETRIC"], limit=1)[0].base_result.id assert queue_id1 == ret2 assert queue_id2 == ret3 assert queue_id3 == ret1
def test_adapter_single(managed_compute_server): client, server, manager = managed_compute_server reset_server_database(server) # Add compute hooh = ptl.data.get_molecule("hooh.json") ret = client.add_compute("rdkit", "UFF", "", "energy", None, [hooh.json_dict()], tag="other") # Force manager compute and get results manager.await_results() ret = client.query_results() assert len(ret) == 1
def test_queue_manager_log_statistics(compute_adapter_fixture, caplog): """Test statistics are correctly generated""" # Setup manager and add some compute client, server, adapter = compute_adapter_fixture reset_server_database(server) manager = queue.QueueManager(client, adapter, cores_per_task=1, memory_per_task=1, verbose=True) hooh = ptl.data.get_molecule("hooh.json") client.add_compute("rdkit", "UFF", "", "energy", None, [hooh], tag="other") # Set capture level with caplog_handler_at_level(caplog, logging.INFO): # Pull jobs to manager manager.update() # Tasks should not have been started yet assert "Task statistics unavailable" in caplog.text assert "Task Stats: Processed" not in caplog.text manager.await_results() # Ensure text is at least generated assert "Task Stats: Processed" in caplog.text assert "Core Usage vs. Max Resources" in caplog.text # Ensure some kind of stats are being calculated seemingly correctly stats_re = re.search(r"Core Usage Efficiency: (\d+\.\d+)%", caplog.text) assert stats_re is not None and float(stats_re.group(1)) != 0.0 # Clean up capture so it does not flood the output caplog.records.clear() caplog.handler.records.clear() # Record a heartbeat timestamp = datetime.datetime.utcnow() manager.heartbeat() manager_record = server.storage.get_managers()["data"][0] logs = server.storage.get_manager_logs(manager_record["id"])["data"] # Grab just the last log latest_log = server.storage.get_manager_logs( manager_record["id"], timestamp_after=timestamp)["data"] assert len(latest_log) >= 1 assert len(latest_log) < len(logs) state = latest_log[0] assert state["completed"] == 1 assert state["total_task_walltime"] > 0 assert state["total_worker_walltime"] > 0 assert state["total_worker_walltime"] >= state["total_task_walltime"] assert state["active_memory"] > 0
def test_adapter_raised_error(managed_compute_server): client, server, manager = managed_compute_server reset_server_database(server) # HOOH without connectivity, RDKit should fail hooh = ptl.data.get_molecule("hooh.json").json_dict() ret = client.add_compute("rdkit", "UFF", "", "hessian", None, hooh) queue_id = ret.submitted[0] manager.await_results() ret = client.query_results(id=ret.ids) assert len(ret) == 1 error = ret[0].get_error() assert "Error" in error.error_message server.objects["storage_socket"].queue_mark_complete([queue_id])
def test_queue_manager_single(compute_manager_fixture): client, server, lpad = compute_manager_fixture reset_server_database(server) manager = queue.QueueManager(client, lpad) # Add compute hooh = portal.data.get_molecule("hooh.json") ret = client.add_compute("rdkit", "UFF", "", "energy", None, [hooh.to_json()], tag="other") # Force manager compute and get results manager.await_results() ret = client.get_results() assert len(ret) == 1
def test_queue_ordering_time(fractal_compute_server): reset_server_database(fractal_compute_server) client = ptl.FractalClient(fractal_compute_server) mol1 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 1.1") mol2 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 2.2") ret1 = client.add_compute("RDKIT", "UFF", "", "energy", None, mol1).ids[0] ret2 = client.add_compute("RDKIT", "UFF", "", "energy", None, mol2).ids[0] manager = get_manager_name(fractal_compute_server) assert len(fractal_compute_server.storage.queue_get_next(manager, [], [], limit=1)) == 0 queue_id1 = fractal_compute_server.storage.queue_get_next(manager, ["rdkit"], [], limit=1)[0].base_result.id queue_id2 = fractal_compute_server.storage.queue_get_next(manager, ["rdkit"], [], limit=1)[0].base_result.id assert queue_id1 == ret1 assert queue_id2 == ret2
def test_queue_duplicate_compute(fractal_compute_server): reset_server_database(fractal_compute_server) client = ptl.FractalClient(fractal_compute_server) hooh = ptl.data.get_molecule("hooh.json") mol_ret = client.add_molecules([hooh]) ret = client.add_compute("rdkit", "UFF", "", "energy", None, mol_ret) assert len(ret.ids) == 1 assert len(ret.existing) == 0 # Wait for the compute to execute fractal_compute_server.await_results() db = fractal_compute_server.objects["storage_socket"] # Should catch duplicates both ways ret = client.add_compute("RDKIT", "uff", None, "energy", None, mol_ret) assert len(ret.ids) == 1 assert len(ret.existing) == 1 ret = client.add_compute("rdkit", "uFf", "", "energy", None, mol_ret) assert len(ret.ids) == 1 assert len(ret.existing) == 1 # Multiple queries assert len(client.query_results(program="RDKIT")) == 1 assert len(client.query_results(program="RDKit")) == 1 assert len(client.query_results(method="UFF")) == 1 assert len(client.query_results(method="uff")) == 1 assert len(client.query_results(basis=None)) == 1 assert len(client.query_results(basis="")) == 1 assert len(client.query_results(keywords=None)) == 1
def test_queue_manager_single_tags(compute_adapter_fixture): client, server, adapter = compute_adapter_fixture reset_server_database(server) manager_stuff = queue.QueueManager(client, adapter, queue_tag="stuff") manager_other = queue.QueueManager(client, adapter, queue_tag="other") # Add compute hooh = ptl.data.get_molecule("hooh.json") ret = client.add_compute("rdkit", "UFF", "", "energy", None, [hooh.json_dict()], tag="other") # Computer with the incorrect tag manager_stuff.await_results() ret = client.query_results() assert len(ret) == 0 # Computer with the correct tag manager_other.await_results() ret = client.query_results() assert len(ret) == 1 # Check the logs to make sure manager_logs = server.storage.get_managers()["data"] assert len(manager_logs) == 2 stuff_log = next(x for x in manager_logs if x["tag"] == "stuff") assert stuff_log["submitted"] == 0 other_log = next(x for x in manager_logs if x["tag"] == "other") assert other_log["submitted"] == 1 assert other_log["completed"] == 1
def test_queue_query_tag(fractal_compute_server): reset_server_database(fractal_compute_server) client = ptl.FractalClient(fractal_compute_server) mol1 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 1.1") mol2 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 2.2") mol3 = ptl.Molecule.from_data("He 0 0 0\nHe 0 0 3.3") ret1 = client.add_compute("rdkit", "uff", "", "energy", None, mol1).ids[0] ret2 = client.add_compute("RDKIT", "UFF", "", "energy", None, mol2, tag="test").ids[0] ret3 = client.add_compute("RDKIT", "UFF", "", "energy", None, mol3, tag="test2").ids[0] tasks_tag_test = client.query_tasks(tag="test") assert len(tasks_tag_test) == 1 assert tasks_tag_test[0].base_result == ret2 tasks_tag_none = client.query_tasks() assert len(tasks_tag_none) == 3 assert {task.base_result for task in tasks_tag_none} == {ret1, ret2, ret3} tasks_tagged = client.query_tasks(tag=["test", "test2"]) assert len(tasks_tagged) == 2 assert {task.base_result for task in tasks_tagged} == {ret2, ret3}
def test_queue_manager_multiple_tags(compute_adapter_fixture): client, server, adapter = compute_adapter_fixture reset_server_database(server) config = {"Hello": "World"} base_molecule = ptl.data.get_molecule("butane.json") # Add compute molecules = [base_molecule.copy(update={"geometry": base_molecule.geometry + 0.1 * i}) for i in range(6)] tags = ["tag2", "tag1", "tag1", "tag2", "tag1", "tag3"] tasks = [ client.add_compute("rdkit", "UFF", "", "energy", None, [mol], tag=tag).ids[0] for mol, tag in zip(molecules, tags) ] manager = queue.QueueManager(client, adapter, queue_tag=["tag1", "tag2"], configuration=config, max_tasks=2) # Check that tasks are pulled in the correct order manager.await_results() ret = client.query_results(tasks) ref_status = { tasks[0]: "INCOMPLETE", tasks[1]: "COMPLETE", tasks[2]: "COMPLETE", tasks[3]: "INCOMPLETE", tasks[4]: "INCOMPLETE", tasks[5]: "INCOMPLETE", } for result in ret: assert result.status == ref_status[result.id] manager.await_results() ret = client.query_results(tasks) for result in ret: print(f"here you go: {(result.id, result.status)}") ref_status = { tasks[0]: "COMPLETE", tasks[1]: "COMPLETE", tasks[2]: "COMPLETE", tasks[3]: "INCOMPLETE", tasks[4]: "COMPLETE", tasks[5]: "INCOMPLETE", } for result in ret: assert result.status == ref_status[result.id] manager.await_results() ret = client.query_results(tasks) ref_status = { tasks[0]: "COMPLETE", tasks[1]: "COMPLETE", tasks[2]: "COMPLETE", tasks[3]: "COMPLETE", tasks[4]: "COMPLETE", tasks[5]: "INCOMPLETE", } for result in ret: assert result.status == ref_status[result.id] # Check that tag list is correctly validated to not include None # This could be implemented, but would require greater sophistication # in SQLAlchemySocket.queue_get_next() with pytest.raises(TypeError): queue.QueueManager(client, adapter, queue_tag=["tag1", None])