def get_client():
    if 'connection' not in g:
        uri = current_app.config['QCPORTAL_URI']

        if uri:
            g.connection = ptl.FractalClient(uri)
        else:
            g.connection = ptl.FractalClient()

    return g.connection
    def _get_qca_client(self):
        import qcportal as ptl

        client = ptl.FractalClient(username=os.environ["QCA_USER"],
                                   password=os.environ["QCA_KEY"])

        return client
    def _activate_client(client) -> ptl.FractalClient:
        """
        Make the fractal client and connect to the requested instance.

        Parameters:
            client: The name of the file containing the client information or the client instance.

        Returns:
            A qcportal.FractalClient instance.
        """

        try:
            from qcfractal.interface import FractalClient as QCFractalClient
        except ModuleNotFoundError:
            QCFractalClient = None

        if isinstance(client, ptl.FractalClient):
            return client
        elif QCFractalClient is not None and isinstance(
                client, QCFractalClient):
            return client
        elif client == "public":
            return ptl.FractalClient()
        else:
            return ptl.FractalClient.from_file(client)
def check_compute_request(dataset_data):
    """
    Check the compute request this will access the archive and check the element coverage and any specs already ran.
    """
    qc_specs = dataset_data.pop("qc_specifications")
    dataset = create_dataset(dataset_data)
    client = ptl.FractalClient()
    # now update the dataset with client elements and specs
    updated_dataset = update_specification_and_metadata(dataset=dataset,
                                                        client=client)
    # now we need to try and add each spec this will raise errors if the spec has already been stored
    spec_report = {}
    for spec in qc_specs.values():
        try:
            updated_dataset.add_qc_spec(**spec)
            validated = check_mark
        except QCSpecificationError:
            validated = cross

        spec_report[spec["spec_name"]] = create_spec_report(spec, validated)

    # now get the basis coverage
    all_coverage = dataset._get_missing_basis_coverage(raise_errors=False)
    # now we need to update each report
    for key, report in spec_report.items():
        coverage = all_coverage.get(key, missing)
        if coverage == missing:
            spec_report[key]["**Full Basis Coverage**"] = coverage
        elif coverage:
            spec_report[key]["**Full Basis Coverage**"] = cross
        else:
            spec_report[key]["**Full Basis Coverage**"] = check_mark

    return updated_dataset.dict(), spec_report
Example #5
0
def load_DS_QCA(DSName):
    """
    Description-
    Loads in a QCA DS and return a list of smiles

    Input-
    DSName: Name of QCA optimization data set

    Return -
    smilesDS: List of smiles from the DS

    """
    client = ptl.FractalClient()
    ds = client.get_collection("OptimizationDataset", DSName)
    spec_name = ds.list_specifications().index[0]
    print(f"Loading TorsionDrive Scans from [ {DSName} ] spec [{spec_name}]")
    print(f"Found {len(ds.df)} data entries")
    # load torsiondrive record ids from the dataset
    map_record_id_entry_index = {}

    smilesDS = []
    for entry_index in ds.df.index:
        data_entry = ds.get_entry(entry_index)
        smiles = data_entry.attributes[
            'canonical_isomeric_explicit_hydrogen_smiles']
        #print(smiles)
        smilesDS.append(smiles)

    return smilesDS
Example #6
0
def get_dict():
    client = ptl.FractalClient()

    collection = client.get_collection(
        "OptimizationDataset",
        "OpenFF Full Optimization Benchmark 1",
    )

    record_names = list(collection.data.records)

    mol_idx = -1

    results = {}

    previous = ""

    for record_idx, record_name in enumerate(record_names):
        base_name = "".join(record_name.split("-")[:-1])
        if base_name != previous:
            mol_idx += 1
            results[mol_idx] = [record_idx]
            previous = base_name

        else:
            results[mol_idx].append(record_idx)

    return results
def script():
  #== initialize JuliaChem runtime ==#
  JuliaChem.initialize()

  #== get molecule information from QCArchive ==# 
  client = ptl.FractalClient()
  mol = client.query_molecules(1234)[0]

  #== create input system ==#
  molecule = JSON.parse(mol.json())

  driver = "energy"

  model = { 
    "method": "RHF",
    "basis": "6-31G(d,p)"
  }

  keywords = { 
    "scf": { 
      "niter": 50,
      "ndiis": 3,
      "dele": 1E-8,
      "rmsd": 1E-6,
      "prec": "Float64",
      "direct": False,
      "debug": False
    }
  }

  #== generate basis set ==#
  basis = JuliaChem.JCBasis.run(molecule, model)
  
  #== finalize JuliaChem runtime ==#
  JuliaChem.finalize()
Example #8
0
def get_collection() -> ptl.collections.OptimizationDataset:
    """fetches "OpenFF Full Optimization Benchmark 1"""

    client = ptl.FractalClient()
    collection = client.get_collection(
        "OptimizationDataset",
        "OpenFF Full Optimization Benchmark 1"
    )

    return collection
Example #9
0
def _get_qcarchive_collections():
    """Get Machine Learning datasets from QCArchive server"""

    # connection client to MolSSI server
    client = plt.FractalClient()

    collection_types = ['dataset', 'reactiondataset']

    payload = {
        "meta": {
            "exclude": ["records", "contributed_values"],
        },
        "data": {
            "collection": None
        }
    }

    results = []
    for type in collection_types:
        # must have the type to use exclude functionality
        payload['data']['collection'] = type
        # HTTP request to load the data
        res = client._automodel_request("collection",
                                        "get",
                                        payload,
                                        full_return=False)
        results.extend(res)

    logger.debug('Total collections fetched: ', len(results))

    data = []
    for r in results:
        if "machine learning" in r["tags"]:
            r["tags"].remove("machine learning")
        else:  # skip non ML datasets
            continue

        if r['metadata']:  # add metadata attributes
            r.update(r.pop("metadata"))

        r['data_points'] = f'{r["data_points"]:,}'

        if r['view_metadata']:  # add metadata attributes
            r.update(r.pop("view_metadata"))
            # sizes from bytes to MB
            r['plaintext_size'] = int(r['plaintext_size']) // 1024**2
            r['plaintext_size'] = f'{r["plaintext_size"]:,}'

            r['hdf5_size'] = int(r['hdf5_size']) // 1024**2
            r['hdf5_size'] = f'{r["hdf5_size"]:,}'

        data.append(r)

    return data
Example #10
0
    def _activate_client(client) -> ptl.FractalClient:
        """
        Make the fractal client and connect to the requested instance.

        Parameters:
            client: The name of the file containing the client information or the client instance.

        Returns:
            A qcportal.FractalClient instance.
        """

        if isinstance(client, ptl.FractalClient):
            return client
        elif isinstance(client, FractalClient):
            return client
        elif client == "public":
            return ptl.FractalClient()
        else:
            return ptl.FractalClient.from_file(client)
Example #11
0
def load_DS_QCA(DSName):
    """
    Description-
    Loads in a QCA DS and return a list of smiles

    Input-
    DSName: Name of QCA optimization data set

    Return -
    smilesDS: List of smiles from the DS

    """
    client = ptl.FractalClient()
    ds = client.get_collection("OptimizationDataset", DSName)
    spec_name = ds.list_specifications().index[0]
    print(f"Loading TorsionDrive Scans from [ {DSName} ] spec [{spec_name}]")
    print(f"Found {len(ds.df)} data entries")
    # load torsiondrive record ids from the dataset
    map_record_id_entry_index = {}

    smilesDS = []
    for entry_index in ds.df.index:
        data_entry = ds.get_entry(entry_index)
        smiles = data_entry.attributes[
            'canonical_isomeric_explicit_hydrogen_smiles']
        #print(smiles)
        smilesDS.append(smiles)

    file1 = open("filterLog.txt", "w")  #write mode
    file1.write("This is the optimization data set:" + str(DSName) + "\n")

    file1.write("This is the length of the original DS:" + str(len(smilesDS)) +
                "\n")
    file1.write("This is the length of the original DS without duplicates:" +
                str(len(set(smilesDS))) + "\n")
    file1.write("This is the list of the original smiles:" + str(smilesDS) +
                "\n")
    file1.close()

    return smilesDS
            # use count to generate unique index
            index_count = index_counter[index]
            this_index = f'{index}-{index_count}'
            index_counter[index] += 1
            assert this_index not in molecules_dict, f"Multiple molecules have the same index, please check {mdata}"
            molecules_dict[this_index] = qcel_molecule
            molecule_attributes[this_index] = cmiles_ids
    return molecules_dict, molecule_attributes


print("Extracting molecules...")
molecules_dict, molecule_attributes = read_molecules("optimization_inputs.json.gz")

print("Initializing dataset...")
#client = ptl.FractalClient("localhost:7777", verify=False) # TODO: Should this be changed to remote address?
client = ptl.FractalClient().from_file()

# create a new dataset with specified name
ds = ptl.collections.OptimizationDataset("Kinase inhibitors WBO distributions", client=client)

kw = ptl.models.KeywordSet(values={'maxiter': 200,
 'scf_properties': ['dipole',
  'quadrupole',
  'wiberg_lowdin_indices',
  'mayer_indices']})
kw_id = client.add_keywords([kw])[0]

# create specification for this dataset
opt_spec = {"program": "geometric"}
qc_spec = {"driver": "gradient", "method": "hf3c", "basis": "", "program": "psi4", "keywords": kw_id}
ds.add_specification("hf3c", opt_spec, qc_spec, description="HF3C geometry optimization")
Example #13
0
"""
Download torsiondrive data for figure
Ran on 2020-1-21.
QCPortal version 0.13.0
openeye version 2019.Oct.2
"""
import qcportal as ptl
import json
from openeye import oechem

client = ptl.FractalClient()
collections = ['OpenFF Group1 Torsions', 'SMIRNOFF Coverage Torsion Set 1']
for j, c in enumerate(collections):

    td_dataset = client.get_collection('TorsionDriveDataset', c)
    print(td_dataset.status(['default']))
    opts_per_td = {'heavy_atoms': [], 'opts_per_td': []}
    gradients_per_opts = {'heavy_atoms': [], 'gradients_per_opt': []}
    dictionary = {
        'driver': [],
        'method': [],
        'basis': [],
        'nbasis': [],
        'nalpha': [],
        'nbeta': [],
        'natoms': [],
        'heavy_atoms': [],
        'cpu': [],
        'nthreads': [],
        'wall_time': []
    }
Example #14
0
# Attempt to download and save all `OptimizationDataset`s appearing in Parsley training set

import numpy as np
import qcportal
from dataset_selection import optimization_datasets, dataset_type
from openforcefield.topology import Molecule
from tqdm import tqdm
from pickle import dump

# Initialize FractalClient
# As documented here: http://docs.qcarchive.molssi.org/projects/QCPortal/en/stable/client.html
from espaloma.data.qcarchive_utils import get_energy_and_gradient, MolWithTargets

client = qcportal.FractalClient()


def get_mol_with_targets(record, entry) -> MolWithTargets:
    # offmol
    offmol = Molecule.from_qcschema(entry)

    # trajectory containing xyz, energies, and gradients
    trajectory = record.get_trajectory()

    # xyz
    molecules = [snapshot.get_molecule() for snapshot in trajectory]
    xyz = np.array([mol.geometry for mol in molecules])

    # energies and gradients
    energies_and_gradients = list(map(get_energy_and_gradient, trajectory))
    energies = np.array([e for (e, _) in energies_and_gradients])
    gradients = np.array([g for (_, g) in energies_and_gradients])

# Initialize Omega
omega = oeomega.OEOmega()

omega.SetMaxConfs(1)
omega.SetIncludeInput(True)
omega.SetCanonOrder(True)
omega.SetSampleHydrogens(
    True
)  # Word to the wise: skipping this step can lead to significantly different charges!
omega.SetStrictStereo(True)
omega.SetStrictAtomTypes(True)
omega.SetIncludeInput(False)  # don't include input

client = ptl.FractalClient("https://localhost:7777/", verify=False)


def make_ptl_mol(oemol):
    """Builds a QCPortal Molecule from an OpenEye molecule"""
    coords = oemol.GetCoords()
    symbols_list = [
        oechem.OEGetAtomicSymbol(atom.GetAtomicNum())
        for atom in mol.GetAtoms()
    ]

    #convert to bohr
    print(coords)
    for key, item in coords.items():
        coords[key] = (item[0] * 1.88973, item[1] * 1.88973, item[2] * 1.88973)
            index_count = index_counter[index]
            this_index = f'{index}-{index_count}'
            index_counter[index] += 1
            assert this_index not in molecules_dict, f"Multiple molecules have the same index, please check {mdata}"
            molecules_dict[this_index] = qcel_molecule
            molecule_attributes[this_index] = cmiles_ids
    return molecules_dict, molecule_attributes


print("Extracting molecules...")
molecules_dict, molecule_attributes = read_molecules(
    "optimization_inputs.json.gz")

print("Initializing dataset...")
client = ptl.FractalClient(
    "localhost:7777",
    verify=False)  # TODO: Should this be changed to remote address?

# create a new dataset with specified name
ds = ptl.collections.OptimizationDataset("OpenFF NCI250K Boron 1",
                                         client=client)

# create specification for this dataset
opt_spec = {"program": "geometric"}
qc_spec = {
    "driver": "gradient",
    "method": "B3LYP-d3bj",
    "basis": "dzvp",
    "program": "psi4"
}
ds.add_specification(
def butane_molecule():
    client = ptl.FractalClient()
    butane_molecules = client.query_molecules(id=['61139', '70659'])

    yield butane_molecules
Example #18
0
        selected_torsions = json.load(infile)
    return selected_torsions


print("Reading selected_torsions...")

if not os.path.exists(torsion_data):
    with tarfile.open(torsion_data_gz) as f:
        f.extractfile(torsion_data)
selected_torsions = read_selected_torsions(torsion_data)

print(f"Found {len(selected_torsions)} torsions")

print("Initializing dataset...")
if local_run:
    client = ptl.FractalClient("localhost:7777", verify=False)
else:
    client = ptl.FractalClient.from_file()

if UPDATE:
    ds = client.get_collection("TorsionDriveDataset", collection_name)
    print(ds)
else:
    # create a new dataset with specified name
    ds = ptl.collections.TorsionDriveDataset(collection_name, client=client)

    # create specification for this dataset
    opt_spec = {
        "program": "geometric",
        "keywords": {
            "coordsys": "tric",
Example #19
0
import pickle
import numpy as np
import copy
import json

import cmiles
from openeye import oechem

from forcebalance.molecule import Molecule
from openforcefield.typing.engines.smirnoff import ForceField

from simtk.openmm import app, unit

import qcportal as ptl

client = ptl.FractalClient('https://api.qcarchive.molssi.org:443/')
ofs = oechem.oemolostream()


def download_hessian_data(dataset_name):
    """
    Download data from public server

    Parameters
    ----------
    dataset_name: str
        example: "OpenFF Optimization Set 1"

    Returns
    -------
    hessian_data: dict
Example #20
0
def get_client():
    return ptl.FractalClient()
Example #21
0
def loadDataset_low(datasetName, specification, benchmark_smiles, qca_overlapped_entries):
    """
    Low level call to load each torsion drive dataset and return a list of molecules

        Parameters
        ----------
        datasetName : str
            torsion drive dataset name.
        specification : str
            specification in the dataset. Example: "B3LYP-D3", "default", "UFF"

        Returns
        -------
        molList : list of objects
            each row contains the tdr_object.id, dihedral_indices, torsion_barrier, oemol_object
    """
    while True:
        try:
            assert datasetName
            break
        except AssertionError:
            print("datasetName is empty. Check input list of dataset tuples")
            raise
    while True:
        try:
            assert specification
            break
        except AssertionError:
            print("specification is empty. Check input list of dataset tuples")
            raise

    # initiate qc portal instance
    client = ptl.FractalClient()
    # from the TorsionDriveDataset collection picking up given datasetName
    ds = client.get_collection("TorsionDriveDataset", datasetName)
    ds.status([specification], status="COMPLETE")

    # Serial implementation

    # Hardcoding benchmark molecules from the lim_mobley_parsely_benchmark
    # https://openforcefield.org/force-fields/force-fields/
    # https://github.com/MobleyLab/benchmarkff/blob/91476147f35579bc52bf984839fd20c72a61d76d/molecules/set_v03_non_redundant/trim3_full_qcarchive.smi
    
    with open(benchmark_smiles) as f:
        bm_smiles = f.readlines()
    bm_mols = [Molecule.from_smiles(smiles) for smiles in bm_smiles]
    
    tb = []
    overlaps = 0
    qca_entries = []
    
    for i in range(ds.df.size):
        if ds.df.iloc[i, 0].status == "COMPLETE":
            smiles = ds.df.index[i]
            mapped_smiles = ds.get_entry(smiles).attributes[
                "canonical_isomeric_explicit_hydrogen_mapped_smiles"
            ]
            mol1 = Molecule.from_mapped_smiles(mapped_smiles)
            not_identical = True
            for mol in bm_mols:
                isomorphic,atom_map = Molecule.are_isomorphic(mol1, 
                                                  mol,
                                                  return_atom_map=False,
                                                  aromatic_matching=False,
                                                  formal_charge_matching=False,
                                                  bond_order_matching=False,
                                                  atom_stereochemistry_matching=False,
                                                  bond_stereochemistry_matching=False,
                                                          )
                if(isomorphic):
                    not_identical = False
                    overlaps += 1
                    entry = ds.get_entry(smiles)
                    tdr_id = entry.object_map['default']
#                     print(tdr_id)
                    qca_entries.append(tdr_id)
                    break
            if(not_identical): 
                tb.append(torsion_barrier_for_molecule(ds.df.iloc[i, 0], mapped_smiles))
    
    # overlaps_qca_ids.txt is also a hardcoded file
    with open(qca_overlapped_entries, "a") as f:
        for item in qca_entries:
            f.write("%s\n" % item)
        
    print("No. of overlaps with benchmark set, qca entries added to overlaps_qca_ids.txt: ", overlaps)
    print("No. of COMPLETE and not overlapping with benchmark in this dataset:", len(tb), "out of ", len(ds.df))
    return tb