Ejemplo n.º 1
0
 def restart_torsiondrives(self, torsiondrive_ids: List[int],
                           client: FractalClient) -> None:
     """
     Restart all torsiondrive records.
     """
     for td in torsiondrive_ids:
         client.modify_services("restart", procedure_id=td)
Ejemplo n.º 2
0
 def restart_optimizations(self, optimization_ids: List[int],
                           client: FractalClient) -> None:
     """
     Restart all optimizations.
     """
     for opt in optimization_ids:
         client.modify_tasks(operation="restart", base_result=opt)
Ejemplo n.º 3
0
    def __init__(self,
                 coll_name: str,
                 qc_spec: str,
                 base_class: Type[Collection] = Dataset,
                 address: str = "localhost:7874",
                 qca_passwd: Optional[str] = None,
                 create: bool = False):
        """Open the geometry computation dataset

        Args:
            address: Address for the QCFractal server
            base_class: Type of the collection
            qc_spec: Name of the QC specification
            coll_name: Name of the collection holding the data
            qca_passwd: Password for the QCFractal server
            create: Whether creating a new collection is acceptable
        """
        if qca_passwd is None:
            qca_passwd = os.environ.get("QCAUSR", None)
        self.qc_spec = qc_spec
        self.client = FractalClient(address,
                                    username='******',
                                    password=qca_passwd,
                                    verify=False)
        try:
            self.coll = base_class.from_server(name=coll_name,
                                               client=self.client)
        except KeyError as ex:
            if create:
                self.coll = base_class(name=coll_name, client=self.client)
                self.coll.save()
            else:
                raise ex
Ejemplo n.º 4
0
def live_fractal_or_skip():
    """
    Ensure Fractal live connection can be made
    First looks for a local staging server, then tries QCArchive.
    """
    try:
        return FractalClient("localhost:7777", verify=False)
    except (requests.exceptions.ConnectionError, ConnectionRefusedError):
        print("Failed to connect to localhost, trying MolSSI QCArchive.")
        try:
            requests.get("https://api.qcarchive.molssi.org:443", json={}, timeout=5)
            return FractalClient()
        except (requests.exceptions.ConnectionError, ConnectionRefusedError):
            return pytest.skip("Could not make a connection to central Fractal server")
Ejemplo n.º 5
0
def test_collecting_results():
    """
    Make sure that tasks are collected correctly from a QCArchive instance.
    """

    # connect to the public database
    client = FractalClient()
    biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"),
                                  file_format="sdf")
    # now make the schema
    schema = get_fitting_schema(molecules=biphenyl)

    # now submit to the executor
    executor = Executor()
    # change to make sure we search the correct dataset
    executor._dataset_name = "OpenFF-benchmark-ligand-fragments-v1.0"
    # fake a collection dict
    to_collect = {
        "torsion1d": {
            "default": [
                "[h]c1c([c:1]([c:2](c(c1[h])[h])[c:3]2[c:4](c(c(c(c2[h])cl)[h])[h])[h])[h])[h]",
            ]
        },
        "optimization": {},
        "hessian": {}
    }
    # now let the executor update the task
    executor.collect_task_results(task=schema.tasks[0],
                                  collection_dict=to_collect,
                                  client=client)
    # make sure it worked
    assert schema.tasks[0].ready_for_fitting is True
Ejemplo n.º 6
0
def test_make_fitting_schema_from_results():
    """
    Test that new fitting schemas can be made from results and that all results are full
    """
    # build the workflow
    workflow = WorkflowFactory()
    fb = ForceBalanceOptimizer()
    fb.set_optimization_target(target=AbInitio_SMIRNOFF())
    workflow.set_optimizer(optimizer=fb)

    # set up the client and load the results
    # load a client and pull some results
    client = FractalClient()
    # grab a dataset with small fragments in it
    result = TorsionDriveCollectionResult.from_server(
        client=client,
        spec_name="default",
        dataset_name="OpenFF-benchmark-ligand-fragments-v1.0",
        final_molecule_only=True,
        subset=bace_entries)
    schema = workflow.fitting_schema_from_results(results=result, combine=True)
    # there should be 2 total molecules as we have combined two results
    assert schema.n_molecules == 2
    # there are a total of 3 torsiondrives
    assert schema.n_tasks == 3
    # make sure each task has results and is ready to fit
    for task in schema.tasks:
        assert task.ready_for_fitting is True
Ejemplo n.º 7
0
def test_task_from_results():
    """
    Test making an individual task from a set of results
    """
    # load a client and pull some results
    client = FractalClient()
    # grab a dataset with small fragments in it
    result = TorsionDriveCollectionResult.from_server(
        client=client,
        spec_name="default",
        dataset_name="OpenFF-benchmark-ligand-fragments-v1.0",
        final_molecule_only=True,
        subset=bace_entries[:1])
    # grab the only result
    result = list(result.collection.values())[0]
    # set up the workflow
    workflow = WorkflowFactory()
    fb = ForceBalanceOptimizer()
    fb.set_optimization_target(target=AbInitio_SMIRNOFF())
    workflow.set_optimizer(optimizer=fb)
    # this should be a simple biphenyl molecule
    opt_schema = workflow._task_from_results(results=[
        result,
    ], index=1)

    assert opt_schema.initial_forcefield == workflow.initial_forcefield
    assert opt_schema.optimizer_name == fb.optimizer_name
    assert opt_schema.job_id == "bespoke_task_1"
    assert bool(opt_schema.target_smirks) is True
    assert opt_schema.target_parameters == workflow.target_parameters
    assert result.molecule == opt_schema.target_molecule.molecule
    assert opt_schema.n_tasks == 1
    assert opt_schema.n_targets == 1
    assert opt_schema.ready_for_fitting is True
Ejemplo n.º 8
0
def managed_compute_server(request, postgres_server):
    """
    A FractalServer with compute associated parametrize for all managers.
    """

    storage_name = "test_qcfractal_compute_server"
    postgres_server.create_database(storage_name)

    adapter_client = build_adapter_clients(request.param, storage_name=storage_name)

    # Build a server with the thread in a outer context loop
    # Not all adapters play well with internal loops
    with loop_in_thread() as loop:
        server = FractalServer(port=find_open_port(),
                               storage_project_name=storage_name,
                               storage_uri=postgres_server.database_uri(),
                               loop=loop,
                               queue_socket=adapter_client,
                               ssl_options=False)

        # Clean and re-init the database
        reset_server_database(server)

        # Build Client and Manager
        from qcfractal.interface import FractalClient
        client = FractalClient(server)

        from qcfractal.queue import QueueManager
        manager = QueueManager(client, adapter_client)

        yield client, server, manager

        # Close down and clean the adapter
        manager.close_adapter()
        manager.stop()
Ejemplo n.º 9
0
def build_managed_compute_server(mtype):

    storage_name = "qcf_compute_server_test"
    adapter_client = build_adapter_clients(mtype, storage_name=storage_name)

    # Build a server with the thread in a outer context loop
    # Not all adapters play well with internal loops
    with loop_in_thread() as loop:
        server = FractalServer(port=find_open_port(),
                               storage_project_name=storage_name,
                               loop=loop,
                               queue_socket=adapter_client,
                               ssl_options=False)

        # Clean and re-init the database
        reset_server_database(server)

        # Build Client and Manager
        from qcfractal.interface import FractalClient
        client = FractalClient(server)

        from qcfractal.queue import QueueManager
        manager = QueueManager(client, adapter_client)

        yield client, server, manager

        # Close down and clean the adapter
        manager.close_adapter()
        manager.stop()
Ejemplo n.º 10
0
def live_fractal_or_skip():
    """Ensure Fractal live connection can be made"""
    try:
        import qcfractal.interface
        requests.get('https://api.qcarchive.molssi.org:443', json={}, timeout=5)
        return FractalClient()
    except (requests.exceptions.ConnectionError, ConnectionRefusedError):
        return pytest.skip("Could not make a connection to central Fractal server")
Ejemplo n.º 11
0
class QCFractalWrapper:
    """Wrapper over a QFractal Dataset class

    Handles creating and authenticating with the underlying class method.

    It is a base class for building superclasses that create utility operations
    for adding molecules to the database (e.g., generate XYZ coordinates to allow
    for a 'just add this SMILES'), using a consistent identifier format,
    post-processing routines for accessing data in a database,
    and passing data between different steps of the process."""
    def __init__(self,
                 coll_name: str,
                 qc_spec: str,
                 base_class: Type[Collection] = Dataset,
                 address: str = "localhost:7874",
                 qca_passwd: Optional[str] = None,
                 create: bool = False):
        """Open the geometry computation dataset

        Args:
            address: Address for the QCFractal server
            base_class: Type of the collection
            qc_spec: Name of the QC specification
            coll_name: Name of the collection holding the data
            qca_passwd: Password for the QCFractal server
            create: Whether creating a new collection is acceptable
        """
        if qca_passwd is None:
            qca_passwd = os.environ.get("QCAUSR", None)
        self.qc_spec = qc_spec
        self.client = FractalClient(address,
                                    username='******',
                                    password=qca_passwd,
                                    verify=False)
        try:
            self.coll = base_class.from_server(name=coll_name,
                                               client=self.client)
        except KeyError as ex:
            if create:
                self.coll = base_class(name=coll_name, client=self.client)
                self.coll.save()
            else:
                raise ex

    def get_molecules(self, mol_ids: List[int]) -> List[Molecule]:
        """Lookup the molecules from the

        Args:
            mol_ids: List of molecule IDs
        Returns:
            Requested molecules
        """
        mols: List[Molecule] = []
        for i in range(0, len(mol_ids), 1000):  # Query by 1000s
            mols.extend(self.client.query_molecules(mol_ids[i:i + 1000]))
        mol_lookup = dict((m.id, m) for m in mols)
        mols = [mol_lookup[i] for i in mol_ids]
        return mols
Ejemplo n.º 12
0
def test_submit_new_tasks(fractal_compute_server):
    """
    Make sure that any new tasks which are generated/found are added to the archive instance.
    """

    client = FractalClient(fractal_compute_server)
    # this will not actually run as we do not install psi4
    biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"),
                                  file_format="sdf")
    # now make the schema
    schema = get_fitting_schema(molecules=biphenyl)

    executor = Executor()
    # make sure new tasks are submitted
    task = schema.tasks[0]
    response = executor.submit_new_tasks(task=task, client=client)
    assert response == {'OpenFF Bespoke-fit': {'default': 1}}
Ejemplo n.º 13
0
 def restart_archive_record(
     self,
     task: Union[ResultRecord, OptimizationRecord, TorsionDriveRecord],
     client: FractalClient,
 ) -> None:
     """
     Take a record and dispatch the type of restart to be done.
     """
     if task.__class__ == ResultRecord:
         print("restarting basic ...")
         self.restart_basic(
             [
                 task.id,
             ],
             client=client,
         )
     elif task.__class__ == OptimizationRecord:
         print("restarting optimizations ...")
         self.restart_optimizations(
             [
                 task.id,
             ],
             client=client,
         )
     else:
         print("restarting torsiondrives and optimizations ...")
         # we need the optimization ids first
         td_opts = []
         for optimizations in task.optimization_history.values():
             td_opts.extend(optimizations)
         # now query the optimizations
         opt_records = client.query_procedures(td_opts)
         restart_opts = [
             opt.id for opt in opt_records if opt.status == "ERROR"
         ]
         # restart opts then torsiondrives
         self.restart_optimizations(restart_opts, client=client)
         self.restart_torsiondrives(
             [
                 task.id,
             ],
             client=client,
         )
Ejemplo n.º 14
0
def test_sort_results(combine):
    """
    Test sorting the results before making a fitting schema with and without combination.
    """
    # load up the fractal client
    client = FractalClient()
    # grab a dataset with bace fragments in it
    result = TorsionDriveCollectionResult.from_server(
        client=client,
        spec_name="default",
        dataset_name="OpenFF-benchmark-ligand-fragments-v1.0",
        final_molecule_only=True,
        subset=bace_entries)
    workflow = WorkflowFactory()
    # now sort the results
    all_results = workflow._sort_results(results=result, combine=combine)
    if combine:
        assert len(all_results) == 2
    else:
        assert len(all_results) == 3
Ejemplo n.º 15
0
def test_error_cycle_complete():
    """
    Try and error cycle a task which is complete in qcarchive this should cause the task result to be collected
    and put into the optimization queue.
    """

    client = FractalClient()
    biphenyl = Molecule.from_file(get_data("biphenyl.sdf"))
    schema = get_fitting_schema(biphenyl)
    execute = Executor()
    # fake the dataset name
    execute._dataset_name = "OpenFF-benchmark-ligand-fragments-v1.0"
    task = schema.tasks[0]
    tasks = list(task.get_task_map().keys())
    # fake the task map
    execute.task_map = {
        tasks[0]:
        "[h]c1c([c:1]([c:2](c(c1[h])[h])[c:3]2[c:4](c(c(c(c2[h])cl)[h])[h])[h])[h])[h]"
    }
    execute._error_cycle_task(task=task, client=client)
    # the result should be collected and the task is now in the opt queue
    opt_task = execute.opt_queue.get(timeout=5)
    assert opt_task.ready_for_fitting is True
Ejemplo n.º 16
0
def get_optimization_specification(client: FractalClient, name: str) -> dict:
    """Get a specification from a hard-coded list of specifications.

    Will add keywords for the specification with the QCFractal database
    so that the specification is ready to use in an OptimizationDataset

    Args:
        client: QCFractal client
        name: Name of the specification
    Returns:
        Dictionary ready to pass to QCFractal
    """

    # Lookup the desired specification
    spec = deepcopy(_opt_specs[name])

    # Add the keyword arguments for the qc_spec
    kwds = spec['qc_spec'].get('keywords', None)
    if kwds is not None:
        kwd_id = client.add_keywords([kwds])[0]
        spec['qc_spec']['keywords'] = kwd_id

    return spec
Ejemplo n.º 17
0
    def _error_cycle_task(self, task: OptimizationSchema,
                          client: FractalClient) -> None:
        """
        Specific error cycling for a given task.
        """

        print("task molecule name ", task.job_id)
        # keep track of any records that should be collected
        to_collect = {"torsion1d": {}, "optimization": {}, "hessian": {}}
        # loop through each target and loop for tasks to update
        for target in task.targets:
            # get the dataset
            dataset = client.get_collection(
                collection_type=self._dataset_type_mapping[
                    target.collection_workflow],
                name=self._dataset_name,
            )
            # now update each entry
            for entry in target.tasks:
                # now for each one we want to query the archive and their status
                task_hash = entry.get_task_hash()
                entry_id = self.task_map[task_hash]
                print("pulling record for ", entry_id)
                record = self.get_record(dataset=dataset,
                                         spec=target.qc_spec,
                                         record_name=entry_id)
                if record.status.value == "COMPLETE":
                    collection_set = to_collect[target.collection_workflow]
                    collection_set.setdefault(target.qc_spec.spec_name,
                                              []).append(entry_id)
                elif record.status.value == "ERROR":
                    # save the error into the task
                    task.error_message = record.get_error()
                    print(
                        f"The task {task.job_id} has errored with attempting restart. Error message:",
                        task.error_message,
                    )
                    # update the restart count
                    if task_hash not in self.retries:
                        self.retries[task_hash] = 1
                    else:
                        self.retries[task_hash] += 1
                    if self.retries[task_hash] == self.max_retires:
                        # mark as errored
                        task.status = Status.CollectionError
                    else:
                        # restart the job
                        self.restart_archive_record(task=record, client=client)
                        task.status = Status.ErrorCycle
                else:
                    # the task is incomplete let it run
                    continue

        # if we have values to collect update the task here
        if any(to_collect.values()):
            print("collecting results for ", to_collect)
            self.collect_task_results(task, to_collect, client)

        # now we should look for new tasks to submit
        print("looking for new reference tasks ...")
        if task.get_task_map():
            response = self.submit_new_tasks(task, client=client)
            print("response of new tasks ... ", response)

        print("checking for optimizations to run ...")
        if task.ready_for_fitting:
            # the molecule is done pas to the opt queue to be removed
            self.opt_queue.put(task)

        elif task.status == Status.CollectionError:
            # one of the collection entries has filed so pass to opt which will fail
            self.opt_queue.put(task)
        else:
            print("task not finished putting back into the queue.")
            # the molecule is not finished and not ready for opt error cycle again
            self.collection_queue.put(task)