def branch_torsiondrive_record( self, nids, skel=False): """ Generate the optimizations under a TD or an Opt dataset The optimizations are separated by a Constraint """ if "Optimization" in self.drop: return if not hasattr( nids, "__iter__"): nids = [nids] nodes = [self.node_index.get( nid) for nid in nids] #print( nodes) #print( self.db) #assert False opt_ids = [list( self.db.get( node.payload).get( "data").get( "optimization_history").values()) for node in nodes] client = self.db.get( "ROOT").get( "data") #client = get_root( nodes[0]).payload print("Downloading optimization information for", len( flatten_list( opt_ids, times=-1))) #projection = { 'optimization_history': True } if skel else None projection = None flat_ids = ['QCP-' + str(x) for x in flatten_list( opt_ids, times=-1)] opt_map = self.batch_download( flat_ids, client.query_procedures, projection=projection) # add the constraint nodes if "Constraint" in self.drop: return opt_nodes = [] # have the opt map, which is the optimizations with their ids # nodes are the torsiondrives for node in nodes: obj = self.db.get( node.payload) for constraint, opts in obj.get( "data").get( "optimization_history").items(): constraint_node = Node.Node( payload=constraint , name="Constraint") self.add( node.index, constraint_node) for index in opts: index = 'QCP-' + index opt_node = Node.Node( name="Optimization", payload=index) opt_nodes.append( opt_node) self.add( constraint_node.index, opt_node) self.db.__setitem__( index, { "data": opt_map.get( index) }) #for i,n in enumerate(opt_nodes[:-1]): # idx = n.index # for j,m in enumerate(opt_nodes[i+1:],i+1): # assert idx != m.index self.branch_optimization_record( [x.index for x in opt_nodes], skel=skel)
def branch_optimization_record(self, nids, skel=False): """ Gets the gradients from the optimizations """ if nids is None: return if "Gradient" in self.drop: return suf = "QCR-" if not hasattr(nids, "__iter__"): nids = [nids] nodes = [self.node_index.get(nid) for nid in nids] try: result_ids = [ self.db.get(node.payload).get("data").get("trajectory") for node in nodes ] except AttributeError: print(nodes) assert False client = self.db.get("ROOT").get("data") flat_result_ids = list( set([suf + str(x) for x in flatten_list(result_ids, times=-1)])) result_nodes = [] track = [] if skel: # the easy case where we have the gradient indexs print("Collecting gradient stubs for", len(flat_result_ids)) result_map = None else: print("Downloading gradient information for", len(flat_result_ids)) result_map = self.batch_download(flat_result_ids, client.query_results) #breakpoint() for node in nodes: #node = self.node_index.get( node.index) obj = self.db.get(node.payload) traj = obj.get("data").get("trajectory") status = obj.get("data").get("status")[:] if status == "COMPLETE": node.state = Node.CLEAN else: print("QCA: This optimization failed (" + node.payload + ")") continue if traj is not None and len(traj) > 0: for index in traj: index = suf + index name = "GradientStub" if skel else "Gradient" result_node = Node.Node(name="GradientStub", payload=index) resutl_node = Node.CLEAN result_nodes.append(result_node) self.add(node.index, result_node) pl = {} if skel else result_map.get(index) self.db.__setitem__(index, {"data": pl}) else: print("No gradient information for", node, ": Not complete?") self.branch_result_record([x.index for x in result_nodes], skel=skel)
def branch_ds( self, nid, name, fn, skel=False): """ Generate the individual entries from a dataset """ if name in self.drop: return suf = "QCP-" node = self.node_index.get( nid) ds = self.db.get( node.payload).get( "data") records = ds.data.records #[entry.object_map.get("default") for entry in records.values()] ids = [suf + str(entry.object_map.get("default")) for entry in records.values()] #ids = ids[:1] client = self.db.get( "ROOT").get( "data") print("Downloading", name, "information for", len( flatten_list( ids, times=-1))) obj_map = self.batch_download( ids, client.query_procedures) nodes = [] for index,obj in obj_map.items(): entry_match = None for entry in records.values(): if suf + str(entry.object_map.get("default")) == index: entry_match = entry if entry_match is None: raise IndexError("Could not match Entry to Record") pl = { "entry": entry_match, "data": obj} self.db.__setitem__( index, pl) nodes.append( Node.Node( name=name, payload=index)) [ self.add( node.index, v) for v in nodes] #print( list(obj_map.values())[0] ) init_mol_ids = [obj.get( "initial_molecule") for obj in obj_map.values()] init_mols_are_lists = False if isinstance( init_mol_ids[0], list): init_mols_are_lists = True init_mol_ids = [ str(x)[0] for x in init_mol_ids] init_mol_ids = ["QCM-" + x for x in init_mol_ids] #print( init_mol_ids) print("Downloading", name, "initial molecules for for", len( init_mol_ids)) init_mol_map = self.batch_download( init_mol_ids, client.query_molecules) #[ td_node.payload.__setitem__("initial_molecule", \ # init_mol_map.get( 'QCM-' + str(td_node.payload.get( "record").get( "initial_molecule")[0])))\ # for td_node in td_nodes] for node in nodes: qcid = self.db.get( node.payload) if init_mols_are_lists: molid = 'QCM-' + str(qcid.get( "data").get( "initial_molecule")[0]) else: molid = 'QCM-' + str(qcid.get( "data").get( "initial_molecule")) mol_obj = init_mol_map.get( molid) self.db.__setitem__( molid , { "data": mol_obj}) fn( [node.index for node in nodes], skel=skel)
def branch_torsiondrive_ds( self, nid, skel=False): """ Generate the individual torsion drives """ if "TorsionDrive" in self.drop: return suf = "QCP-" node = self.node_index.get( nid) ds = self.db.get( node.payload).get( "data") records = ds.data.records #[entry.object_map.get("default") for entry in records.values()] td_ids = [suf + str(entry.object_map.get("default")) for entry in records.values()] #td_ids = td_ids[:1] client = self.db.get( "ROOT").get( "data") print("Downloading TorsionDrive information for", len( flatten_list( td_ids, times=-1))) td_map = self.batch_download( td_ids, client.query_procedures) td_nodes = [] for index, obj in td_map.items(): entry_match = None for entry in records.values(): if suf + str(entry.object_map.get("default")) == index: entry_match = entry if entry_match is None: raise IndexError("Could not match TDEntry to a TDRecord") #td_nodes.append( Node(index="".join([ suf, index]), name="TorsionDrive", payload={"meta": entry_match, "record": obj})) pl = { "entry": entry_match, "data": obj} self.db.__setitem__( index, pl) td_nodes.append( \ Node.Node( name="TorsionDrive", payload=index)) [ self.add( node.index, v) for v in td_nodes] init_mol_ids = ['QCM-' + str(td.get( "initial_molecule")[0]) for \ td in td_map.values()] print("Downloading TorsionDrive initial molecules for for", \ len( init_mol_ids)) init_mol_map = self.batch_download( init_mol_ids, client.query_molecules) #[ td_node.payload.__setitem__("initial_molecule", \ # init_mol_map.get( 'QCM-' + str(td_node.payload.get( "record").get( "initial_molecule")[0])))\ # for td_node in td_nodes] for td_node in td_nodes: qcid = self.db.get( td_node.payload) molid = 'QCM-' + str(qcid.get( "data").get( "initial_molecule")[0]) mol_obj = init_mol_map.get( molid) self.db.__setitem__( molid , { "data": mol_obj}) #print( "ADDED INIT MOL", molid, self.db.get( molid) ) #print( "*********************************") #[print( x ) for x in self.db.values()] #print( "*********************************") #print( self.node_index) #print( "*********************************") self.branch_torsiondrive_record( [ n.index for n in td_nodes], skel=skel)
def build_index(self, ds, drop=None): """ Take a QCA DS, and create a node for it. Then expand it out """ assert self._obj_is_qca_collection(ds) # the object going into the data db pl = {'data': ds} self.db.__setitem__(str(ds.data.id), pl) # create the index node for the tree and integrate it ds_node = Node.Node(name=ds.data.name, payload=str(ds.data.id)) self.add(self.root_index, ds_node) self.expand_qca_dataset_as_tree(ds_node.index, skel=True, drop=drop)
def combine_by_entry(self, fn, targets=None): """ compare entries using fn, and collect into a parent node fn is something that compares 2 entries returns a node where children match key """ new_nodes = [] if targets is None: entries = list(self.iter_entry()) elif hasattr(targets, "__iter__"): entries = list(targets) else: entries = [targets] if len(entries) == 0: return new_nodes used = set() for i in range(len(entries)): if i in used: continue ref = entries[i] ref_obj = self.db[ref.payload]['entry'] used.add(i) print("Adding", ref, "to nodes") node = Node.Node(name="Folder", payload=repr(fn)) node.add(ref) for j in range(i + 1, len(entries)): entry = entries[j] entry_obj = self.db[entry.payload]['entry'] if fn(ref_obj, entry_obj): print("MATCH!", ref, entry) node.add(entry) used.add(j) #else: # print("NOT A MATCH!", ref, entry) # node = Node.Node( name="Folder", payload=repr( fn)) # print("Adding", entry, "to nodes") # node.add( entry) # new_nodes.append( node) # ref_obj = entry_obj # ref = entry new_nodes.append(node) return new_nodes
def load(): import qcfractal.interface as ptl NAME = "QCA" # this builds the index, starting with the client node NAMEP = NAME + ".p" if os.path.exists(NAMEP): with open(NAMEP, 'rb') as fid: QCA = pickle.load(fid) if QCA.db is None: with open(NAME + ".db.p", 'rb') as fid: QCA.db = pickle.load(fid).db else: client = ptl.FractalClient() client_node = Node.Node(payload=client, name="client") QCA = qca.QCATree(NAME, root_payload=client, node_index=None, db=None) ds = client.get_collection("torsiondrivedataset", "openff group1 torsions") QCA.build_index(ds, drop=["Optimization"]) #ds = client.get_collection("optimizationdataset", "openff optimization set 1") #QCA.build_index( ds, drop=["Hessian"]) QCA.to_pickle(db=True) #QCA.set_root( client_node) if 1: #print( QCA.db.keys()) #client = QCA.db.get( QCA.node_index.get( QCA.root_index).payload).get( "data") dsopt = client.get_collection("optimizationdataset", "openff optimization set 1") QCA.build_index(dsopt, drop=["Gradient"]) #roche_opt = client.get_collection("optimizationdataset", "openff optimization set 1") #roche_opt_node = node.Node(payload=roche_opt, name=roche_opt.data.name, index=roche_opt.data.id) #QCA.add(client_node.index, roche_opt_node) #QCA.expand_qca_dataset_as_tree(QCA.root.children[-1], skel=True) if 0: #QCA.cache_torsiondriverecord_minimum_molecules() QCA.cache_optimization_minimum_molecules() if 1: QCA.to_pickle(db=False) QCA.to_pickle(name=QCA.name + ".db.p", index=False, db=True) return QCA
def load(): import qcfractal.interface as ptl NAME = "QCA" # this builds the index, starting with the client node NAMEP = NAME + ".p" if os.path.exists(NAMEP): with open(NAMEP, 'rb') as fid: QCA = pickle.load(fid) if QCA.db is None: with open(NAME + ".db.p", 'rb') as fid: QCA.db = pickle.load(fid).db else: client = ptl.FractalClient() client_node = Node.Node(payload=client, name="client") QCA = qca.QCATree(NAME, root_payload=client, node_index=None, db=None) DS_TYPE = "torsiondrivedataset" DS_NAME = "OpenFF Trivalent Nitrogen Set 1" DS_NAME = "SMIRNOFF Coverage Torsion Set 1" client.get_collection(DS_TYPE, DS_NAME) ds = client.get_collection(DS_TYPE, DS_NAME) # since we know there are no Hessians, skip looking for them drop = [] drop.append("Hessian") QCA.build_index(ds, drop=drop) # this will download the final structure of *all* minimizations found # for a gridopt, this will just be the final structure of each point QCA.cache_optimization_minimum_molecules() # save the index and data to disk for future analysis QCA.to_pickle(db=False) QCA.to_pickle(name=QCA.name + ".db.p", index=False, db=True) return QCA
def branch_result_record(self, nids, skel=False): """ Gets the molecule from the gradient """ if not hasattr(nids, "__iter__"): nids = [nids] if len(nids) == 0: assert False return if "Molecule" in self.drop: assert False return #print( nodes) nodes = [self.node_index.get(nid) for nid in nids] client = self.db.get("ROOT").get("data") mol_nodes = [] gradstubs = [ node for node in nodes if ("molecule" not in self.db.get(node.payload).get("data")) ] fullgrads = [ node for node in nodes if ("molecule" in self.db.get(node.payload).get("data")) ] mol_map = {} suf = "QCM-" if len(gradstubs) > 0: print("Downloading molecule information from grad stubs", len(gradstubs)) projection = {'id': True, 'molecule': True} projection = ['id', 'molecule'] mol_map = self.batch_download([node.payload for node in gradstubs], client.query_results, projection=projection) for node in gradstubs: obj = self.db.get(node.payload).get("data") obj.update(mol_map.get(node.payload)) if skel: # need to gather gradient stubs to get the molecules if len(fullgrads) > 0: print("Gathering molecule information from gradients", len(fullgrads)) mol_map.update({ node.payload: self.db.get(node.payload).get("data").get('molecule') for node in fullgrads }) else: print("Downloading molecule information for", len(nodes)) mol_map = self.batch_download([ self.db.get(node.payload).get("data").get('molecule') for node in nodes ], client.query_molecules) for node in nodes: if node.payload is None: print(node) assert False else: name = "MoleculeStub" if skel else "Molecule" state = "NEW" if skel else "CLEAN" index = suf + self.db.get( node.payload).get("data").get('molecule') mol_node = Node.Node(name=name, payload=index) mol_node.state = state self.add(node.index, mol_node) mol_nodes.append(mol_node) if skel and index not in self.db: self.db.__setitem__(index, {"data": mol_map.get(index)}) assert len(node.children) > 0 #print( self.db) #print( self.node_index ) # hessian stuff if "Hessian" in self.drop: return ids = [x.payload for x in mol_nodes] name = "Hessian" projection = None if skel: projection = {'id': True, 'molecule': True} projection = ['id', 'molecule'] name = "HessianStub" print("Downloading", name, "for", len(mol_nodes)) hess_objs = self.batch_download_hessian(ids, client.query_results, projection=projection) if len(hess_objs) > 0: for mol in mol_nodes: payload = mol.payload for hess in hess_objs: if payload == ("QCM-" + hess_objs.get(hess).get("molecule")): hess_node = Node.Node(name="Hessian", payload=hess) hess_node.state = Node.CLEAN self.add(mol.index, hess_node) pl = hess_objs.get(hess) self.db.__setitem__(hess, {"data": pl})
def branch_gridopt_record(self, nids, skel=False): """ Generate the optimizations under Grid Optimization record The optimizations are separated by one or more Constraints """ if "GridOpt" in self.drop: return if not hasattr(nids, "__iter__"): nids = [nids] nodes = [self.node_index.get(nid) for nid in nids] #print( nodes) #print( self.db) #assert False opt_ids = [ list( self.db.get(node.payload).get("data").get( "grid_optimizations").values()) for node in nodes ] client = self.db.get("ROOT").get("data") #client = get_root( nodes[0]).payload print("Downloading optimization information for", len(flatten_list(opt_ids, times=-1))) #projection = { 'optimization_history': True } if skel else None projection = None flat_ids = ['QCP-' + str(x) for x in flatten_list(opt_ids, times=-1)] opt_map = self.batch_download(flat_ids, client.query_procedures, projection=projection) # add the constraint nodes if "Constraint" in self.drop: return opt_nodes = [] #breakpoint() for node in nodes: obj = self.db.get(node.payload) #status = obj.get( "data").get( "status")[:] scans = obj.get("data").get("keywords").__dict__.get("scans") assert len(scans) == 1 scan = scans[0].__dict__ for constraint, opts in obj.get("data").get( "grid_optimizations").items(): #TODO need to cross ref the index to the actual constraint val #cidx = 'CSR-' + node.payload.split("-")[1] val = eval(constraint) # handle when index is "preoptimization" rather than e.g. [0] if isinstance(val, str): continue else: step = scan.get("steps")[val[0]] pl = (scan.get("type")[:], tuple(scan.get("indices")), step) constraint_node = Node.Node(payload=pl, name="Constraint") self.add(node.index, constraint_node) #self.db.__setitem__( cidx, { "data": scan }) #for index in opts: # index = 'QCP-' + index # opt_node = Node.Node( name="Optimization", payload=index) # opt_nodes.append( opt_node) # self.add( constraint_node.index, opt_node) # self.db.__setitem__( index, { "data": opt_map.get( index) }) index = 'QCP-' + opts opt_node = Node.Node(name="Optimization", payload=index) opt_nodes.append(opt_node) self.add(constraint_node.index, opt_node) self.db.__setitem__(index, {"data": opt_map.get(index)}) #for i,n in enumerate(opt_nodes[:-1]): # idx = n.index # for j,m in enumerate(opt_nodes[i+1:],i+1): # assert idx != m.index self.branch_optimization_record([x.index for x in opt_nodes], skel=skel)