def get_client(): if 'connection' not in g: uri = current_app.config['QCPORTAL_URI'] if uri: g.connection = ptl.FractalClient(uri) else: g.connection = ptl.FractalClient() return g.connection
def _get_qca_client(self): import qcportal as ptl client = ptl.FractalClient(username=os.environ["QCA_USER"], password=os.environ["QCA_KEY"]) return client
def _activate_client(client) -> ptl.FractalClient: """ Make the fractal client and connect to the requested instance. Parameters: client: The name of the file containing the client information or the client instance. Returns: A qcportal.FractalClient instance. """ try: from qcfractal.interface import FractalClient as QCFractalClient except ModuleNotFoundError: QCFractalClient = None if isinstance(client, ptl.FractalClient): return client elif QCFractalClient is not None and isinstance( client, QCFractalClient): return client elif client == "public": return ptl.FractalClient() else: return ptl.FractalClient.from_file(client)
def check_compute_request(dataset_data): """ Check the compute request this will access the archive and check the element coverage and any specs already ran. """ qc_specs = dataset_data.pop("qc_specifications") dataset = create_dataset(dataset_data) client = ptl.FractalClient() # now update the dataset with client elements and specs updated_dataset = update_specification_and_metadata(dataset=dataset, client=client) # now we need to try and add each spec this will raise errors if the spec has already been stored spec_report = {} for spec in qc_specs.values(): try: updated_dataset.add_qc_spec(**spec) validated = check_mark except QCSpecificationError: validated = cross spec_report[spec["spec_name"]] = create_spec_report(spec, validated) # now get the basis coverage all_coverage = dataset._get_missing_basis_coverage(raise_errors=False) # now we need to update each report for key, report in spec_report.items(): coverage = all_coverage.get(key, missing) if coverage == missing: spec_report[key]["**Full Basis Coverage**"] = coverage elif coverage: spec_report[key]["**Full Basis Coverage**"] = cross else: spec_report[key]["**Full Basis Coverage**"] = check_mark return updated_dataset.dict(), spec_report
def load_DS_QCA(DSName): """ Description- Loads in a QCA DS and return a list of smiles Input- DSName: Name of QCA optimization data set Return - smilesDS: List of smiles from the DS """ client = ptl.FractalClient() ds = client.get_collection("OptimizationDataset", DSName) spec_name = ds.list_specifications().index[0] print(f"Loading TorsionDrive Scans from [ {DSName} ] spec [{spec_name}]") print(f"Found {len(ds.df)} data entries") # load torsiondrive record ids from the dataset map_record_id_entry_index = {} smilesDS = [] for entry_index in ds.df.index: data_entry = ds.get_entry(entry_index) smiles = data_entry.attributes[ 'canonical_isomeric_explicit_hydrogen_smiles'] #print(smiles) smilesDS.append(smiles) return smilesDS
def get_dict(): client = ptl.FractalClient() collection = client.get_collection( "OptimizationDataset", "OpenFF Full Optimization Benchmark 1", ) record_names = list(collection.data.records) mol_idx = -1 results = {} previous = "" for record_idx, record_name in enumerate(record_names): base_name = "".join(record_name.split("-")[:-1]) if base_name != previous: mol_idx += 1 results[mol_idx] = [record_idx] previous = base_name else: results[mol_idx].append(record_idx) return results
def script(): #== initialize JuliaChem runtime ==# JuliaChem.initialize() #== get molecule information from QCArchive ==# client = ptl.FractalClient() mol = client.query_molecules(1234)[0] #== create input system ==# molecule = JSON.parse(mol.json()) driver = "energy" model = { "method": "RHF", "basis": "6-31G(d,p)" } keywords = { "scf": { "niter": 50, "ndiis": 3, "dele": 1E-8, "rmsd": 1E-6, "prec": "Float64", "direct": False, "debug": False } } #== generate basis set ==# basis = JuliaChem.JCBasis.run(molecule, model) #== finalize JuliaChem runtime ==# JuliaChem.finalize()
def get_collection() -> ptl.collections.OptimizationDataset: """fetches "OpenFF Full Optimization Benchmark 1""" client = ptl.FractalClient() collection = client.get_collection( "OptimizationDataset", "OpenFF Full Optimization Benchmark 1" ) return collection
def _get_qcarchive_collections(): """Get Machine Learning datasets from QCArchive server""" # connection client to MolSSI server client = plt.FractalClient() collection_types = ['dataset', 'reactiondataset'] payload = { "meta": { "exclude": ["records", "contributed_values"], }, "data": { "collection": None } } results = [] for type in collection_types: # must have the type to use exclude functionality payload['data']['collection'] = type # HTTP request to load the data res = client._automodel_request("collection", "get", payload, full_return=False) results.extend(res) logger.debug('Total collections fetched: ', len(results)) data = [] for r in results: if "machine learning" in r["tags"]: r["tags"].remove("machine learning") else: # skip non ML datasets continue if r['metadata']: # add metadata attributes r.update(r.pop("metadata")) r['data_points'] = f'{r["data_points"]:,}' if r['view_metadata']: # add metadata attributes r.update(r.pop("view_metadata")) # sizes from bytes to MB r['plaintext_size'] = int(r['plaintext_size']) // 1024**2 r['plaintext_size'] = f'{r["plaintext_size"]:,}' r['hdf5_size'] = int(r['hdf5_size']) // 1024**2 r['hdf5_size'] = f'{r["hdf5_size"]:,}' data.append(r) return data
def _activate_client(client) -> ptl.FractalClient: """ Make the fractal client and connect to the requested instance. Parameters: client: The name of the file containing the client information or the client instance. Returns: A qcportal.FractalClient instance. """ if isinstance(client, ptl.FractalClient): return client elif isinstance(client, FractalClient): return client elif client == "public": return ptl.FractalClient() else: return ptl.FractalClient.from_file(client)
def load_DS_QCA(DSName): """ Description- Loads in a QCA DS and return a list of smiles Input- DSName: Name of QCA optimization data set Return - smilesDS: List of smiles from the DS """ client = ptl.FractalClient() ds = client.get_collection("OptimizationDataset", DSName) spec_name = ds.list_specifications().index[0] print(f"Loading TorsionDrive Scans from [ {DSName} ] spec [{spec_name}]") print(f"Found {len(ds.df)} data entries") # load torsiondrive record ids from the dataset map_record_id_entry_index = {} smilesDS = [] for entry_index in ds.df.index: data_entry = ds.get_entry(entry_index) smiles = data_entry.attributes[ 'canonical_isomeric_explicit_hydrogen_smiles'] #print(smiles) smilesDS.append(smiles) file1 = open("filterLog.txt", "w") #write mode file1.write("This is the optimization data set:" + str(DSName) + "\n") file1.write("This is the length of the original DS:" + str(len(smilesDS)) + "\n") file1.write("This is the length of the original DS without duplicates:" + str(len(set(smilesDS))) + "\n") file1.write("This is the list of the original smiles:" + str(smilesDS) + "\n") file1.close() return smilesDS
# use count to generate unique index index_count = index_counter[index] this_index = f'{index}-{index_count}' index_counter[index] += 1 assert this_index not in molecules_dict, f"Multiple molecules have the same index, please check {mdata}" molecules_dict[this_index] = qcel_molecule molecule_attributes[this_index] = cmiles_ids return molecules_dict, molecule_attributes print("Extracting molecules...") molecules_dict, molecule_attributes = read_molecules("optimization_inputs.json.gz") print("Initializing dataset...") #client = ptl.FractalClient("localhost:7777", verify=False) # TODO: Should this be changed to remote address? client = ptl.FractalClient().from_file() # create a new dataset with specified name ds = ptl.collections.OptimizationDataset("Kinase inhibitors WBO distributions", client=client) kw = ptl.models.KeywordSet(values={'maxiter': 200, 'scf_properties': ['dipole', 'quadrupole', 'wiberg_lowdin_indices', 'mayer_indices']}) kw_id = client.add_keywords([kw])[0] # create specification for this dataset opt_spec = {"program": "geometric"} qc_spec = {"driver": "gradient", "method": "hf3c", "basis": "", "program": "psi4", "keywords": kw_id} ds.add_specification("hf3c", opt_spec, qc_spec, description="HF3C geometry optimization")
""" Download torsiondrive data for figure Ran on 2020-1-21. QCPortal version 0.13.0 openeye version 2019.Oct.2 """ import qcportal as ptl import json from openeye import oechem client = ptl.FractalClient() collections = ['OpenFF Group1 Torsions', 'SMIRNOFF Coverage Torsion Set 1'] for j, c in enumerate(collections): td_dataset = client.get_collection('TorsionDriveDataset', c) print(td_dataset.status(['default'])) opts_per_td = {'heavy_atoms': [], 'opts_per_td': []} gradients_per_opts = {'heavy_atoms': [], 'gradients_per_opt': []} dictionary = { 'driver': [], 'method': [], 'basis': [], 'nbasis': [], 'nalpha': [], 'nbeta': [], 'natoms': [], 'heavy_atoms': [], 'cpu': [], 'nthreads': [], 'wall_time': [] }
# Attempt to download and save all `OptimizationDataset`s appearing in Parsley training set import numpy as np import qcportal from dataset_selection import optimization_datasets, dataset_type from openforcefield.topology import Molecule from tqdm import tqdm from pickle import dump # Initialize FractalClient # As documented here: http://docs.qcarchive.molssi.org/projects/QCPortal/en/stable/client.html from espaloma.data.qcarchive_utils import get_energy_and_gradient, MolWithTargets client = qcportal.FractalClient() def get_mol_with_targets(record, entry) -> MolWithTargets: # offmol offmol = Molecule.from_qcschema(entry) # trajectory containing xyz, energies, and gradients trajectory = record.get_trajectory() # xyz molecules = [snapshot.get_molecule() for snapshot in trajectory] xyz = np.array([mol.geometry for mol in molecules]) # energies and gradients energies_and_gradients = list(map(get_energy_and_gradient, trajectory)) energies = np.array([e for (e, _) in energies_and_gradients]) gradients = np.array([g for (_, g) in energies_and_gradients])
# Initialize Omega omega = oeomega.OEOmega() omega.SetMaxConfs(1) omega.SetIncludeInput(True) omega.SetCanonOrder(True) omega.SetSampleHydrogens( True ) # Word to the wise: skipping this step can lead to significantly different charges! omega.SetStrictStereo(True) omega.SetStrictAtomTypes(True) omega.SetIncludeInput(False) # don't include input client = ptl.FractalClient("https://localhost:7777/", verify=False) def make_ptl_mol(oemol): """Builds a QCPortal Molecule from an OpenEye molecule""" coords = oemol.GetCoords() symbols_list = [ oechem.OEGetAtomicSymbol(atom.GetAtomicNum()) for atom in mol.GetAtoms() ] #convert to bohr print(coords) for key, item in coords.items(): coords[key] = (item[0] * 1.88973, item[1] * 1.88973, item[2] * 1.88973)
index_count = index_counter[index] this_index = f'{index}-{index_count}' index_counter[index] += 1 assert this_index not in molecules_dict, f"Multiple molecules have the same index, please check {mdata}" molecules_dict[this_index] = qcel_molecule molecule_attributes[this_index] = cmiles_ids return molecules_dict, molecule_attributes print("Extracting molecules...") molecules_dict, molecule_attributes = read_molecules( "optimization_inputs.json.gz") print("Initializing dataset...") client = ptl.FractalClient( "localhost:7777", verify=False) # TODO: Should this be changed to remote address? # create a new dataset with specified name ds = ptl.collections.OptimizationDataset("OpenFF NCI250K Boron 1", client=client) # create specification for this dataset opt_spec = {"program": "geometric"} qc_spec = { "driver": "gradient", "method": "B3LYP-d3bj", "basis": "dzvp", "program": "psi4" } ds.add_specification(
def butane_molecule(): client = ptl.FractalClient() butane_molecules = client.query_molecules(id=['61139', '70659']) yield butane_molecules
selected_torsions = json.load(infile) return selected_torsions print("Reading selected_torsions...") if not os.path.exists(torsion_data): with tarfile.open(torsion_data_gz) as f: f.extractfile(torsion_data) selected_torsions = read_selected_torsions(torsion_data) print(f"Found {len(selected_torsions)} torsions") print("Initializing dataset...") if local_run: client = ptl.FractalClient("localhost:7777", verify=False) else: client = ptl.FractalClient.from_file() if UPDATE: ds = client.get_collection("TorsionDriveDataset", collection_name) print(ds) else: # create a new dataset with specified name ds = ptl.collections.TorsionDriveDataset(collection_name, client=client) # create specification for this dataset opt_spec = { "program": "geometric", "keywords": { "coordsys": "tric",
import pickle import numpy as np import copy import json import cmiles from openeye import oechem from forcebalance.molecule import Molecule from openforcefield.typing.engines.smirnoff import ForceField from simtk.openmm import app, unit import qcportal as ptl client = ptl.FractalClient('https://api.qcarchive.molssi.org:443/') ofs = oechem.oemolostream() def download_hessian_data(dataset_name): """ Download data from public server Parameters ---------- dataset_name: str example: "OpenFF Optimization Set 1" Returns ------- hessian_data: dict
def get_client(): return ptl.FractalClient()
def loadDataset_low(datasetName, specification, benchmark_smiles, qca_overlapped_entries): """ Low level call to load each torsion drive dataset and return a list of molecules Parameters ---------- datasetName : str torsion drive dataset name. specification : str specification in the dataset. Example: "B3LYP-D3", "default", "UFF" Returns ------- molList : list of objects each row contains the tdr_object.id, dihedral_indices, torsion_barrier, oemol_object """ while True: try: assert datasetName break except AssertionError: print("datasetName is empty. Check input list of dataset tuples") raise while True: try: assert specification break except AssertionError: print("specification is empty. Check input list of dataset tuples") raise # initiate qc portal instance client = ptl.FractalClient() # from the TorsionDriveDataset collection picking up given datasetName ds = client.get_collection("TorsionDriveDataset", datasetName) ds.status([specification], status="COMPLETE") # Serial implementation # Hardcoding benchmark molecules from the lim_mobley_parsely_benchmark # https://openforcefield.org/force-fields/force-fields/ # https://github.com/MobleyLab/benchmarkff/blob/91476147f35579bc52bf984839fd20c72a61d76d/molecules/set_v03_non_redundant/trim3_full_qcarchive.smi with open(benchmark_smiles) as f: bm_smiles = f.readlines() bm_mols = [Molecule.from_smiles(smiles) for smiles in bm_smiles] tb = [] overlaps = 0 qca_entries = [] for i in range(ds.df.size): if ds.df.iloc[i, 0].status == "COMPLETE": smiles = ds.df.index[i] mapped_smiles = ds.get_entry(smiles).attributes[ "canonical_isomeric_explicit_hydrogen_mapped_smiles" ] mol1 = Molecule.from_mapped_smiles(mapped_smiles) not_identical = True for mol in bm_mols: isomorphic,atom_map = Molecule.are_isomorphic(mol1, mol, return_atom_map=False, aromatic_matching=False, formal_charge_matching=False, bond_order_matching=False, atom_stereochemistry_matching=False, bond_stereochemistry_matching=False, ) if(isomorphic): not_identical = False overlaps += 1 entry = ds.get_entry(smiles) tdr_id = entry.object_map['default'] # print(tdr_id) qca_entries.append(tdr_id) break if(not_identical): tb.append(torsion_barrier_for_molecule(ds.df.iloc[i, 0], mapped_smiles)) # overlaps_qca_ids.txt is also a hardcoded file with open(qca_overlapped_entries, "a") as f: for item in qca_entries: f.write("%s\n" % item) print("No. of overlaps with benchmark set, qca entries added to overlaps_qca_ids.txt: ", overlaps) print("No. of COMPLETE and not overlapping with benchmark in this dataset:", len(tb), "out of ", len(ds.df)) return tb