Beispiel #1
0
    def branch_torsiondrive_record( self, nids, skel=False):
        """ Generate the optimizations under a TD or an Opt dataset
        The optimizations are separated by a Constraint
        """
        if "Optimization" in self.drop:
            return
        
        if not hasattr( nids, "__iter__"):
            nids = [nids]
        nodes = [self.node_index.get( nid) for nid in nids]
        #print( nodes)
        #print( self.db)
        #assert False
        opt_ids = [list( self.db.get( node.payload).get( "data").get( "optimization_history").values()) for node in nodes]
        client = self.db.get( "ROOT").get( "data")
        #client = get_root( nodes[0]).payload
        print("Downloading optimization information for", len( flatten_list( opt_ids, times=-1)))
        
        #projection = { 'optimization_history': True } if skel else None
        projection = None
        flat_ids = ['QCP-' + str(x) for x in flatten_list( opt_ids, times=-1)]
        opt_map = self.batch_download( flat_ids, client.query_procedures, projection=projection)
        
        # add the constraint nodes
        if "Constraint" in self.drop:
            return
        opt_nodes = []

        # have the opt map, which is the optimizations with their ids
        # nodes are the torsiondrives
        for node in nodes:
            obj = self.db.get( node.payload)
            for constraint, opts in obj.get( "data").get( "optimization_history").items():
                constraint_node = Node.Node( payload=constraint , name="Constraint")
                self.add( node.index, constraint_node)
                
                for index in opts:
                    index = 'QCP-' + index
                    opt_node = Node.Node( name="Optimization", payload=index)
                    opt_nodes.append( opt_node)
                    self.add( constraint_node.index, opt_node)
                    self.db.__setitem__( index, { "data": opt_map.get( index) })
        #for i,n in enumerate(opt_nodes[:-1]):
        #    idx = n.index
        #    for j,m in enumerate(opt_nodes[i+1:],i+1):
        #        assert idx != m.index
        self.branch_optimization_record( [x.index for x in opt_nodes], skel=skel)
Beispiel #2
0
    def branch_optimization_record(self, nids, skel=False):
        """ Gets the gradients from the optimizations """
        if nids is None:
            return
        if "Gradient" in self.drop:
            return
        suf = "QCR-"
        if not hasattr(nids, "__iter__"):
            nids = [nids]
        nodes = [self.node_index.get(nid) for nid in nids]
        try:
            result_ids = [
                self.db.get(node.payload).get("data").get("trajectory")
                for node in nodes
            ]
        except AttributeError:
            print(nodes)
            assert False
        client = self.db.get("ROOT").get("data")
        flat_result_ids = list(
            set([suf + str(x) for x in flatten_list(result_ids, times=-1)]))

        result_nodes = []
        track = []
        if skel:
            # the easy case where we have the gradient indexs
            print("Collecting gradient stubs for", len(flat_result_ids))
            result_map = None

        else:
            print("Downloading gradient information for", len(flat_result_ids))
            result_map = self.batch_download(flat_result_ids,
                                             client.query_results)

        #breakpoint()
        for node in nodes:
            #node = self.node_index.get( node.index)
            obj = self.db.get(node.payload)
            traj = obj.get("data").get("trajectory")
            status = obj.get("data").get("status")[:]
            if status == "COMPLETE":
                node.state = Node.CLEAN
            else:
                print("QCA: This optimization failed (" + node.payload + ")")
                continue
            if traj is not None and len(traj) > 0:
                for index in traj:
                    index = suf + index
                    name = "GradientStub" if skel else "Gradient"
                    result_node = Node.Node(name="GradientStub", payload=index)
                    resutl_node = Node.CLEAN
                    result_nodes.append(result_node)
                    self.add(node.index, result_node)
                    pl = {} if skel else result_map.get(index)
                    self.db.__setitem__(index, {"data": pl})
            else:
                print("No gradient information for", node, ": Not complete?")

        self.branch_result_record([x.index for x in result_nodes], skel=skel)
Beispiel #3
0
    def branch_ds( self, nid, name, fn, skel=False):
        """ Generate the individual entries from a dataset """
        if name in self.drop:
            return
        suf = "QCP-"
        node = self.node_index.get( nid)
        ds = self.db.get( node.payload).get( "data")
        records = ds.data.records


        #[entry.object_map.get("default") for entry in records.values()]
        ids = [suf + str(entry.object_map.get("default")) for entry in records.values()]
        #ids = ids[:1]
        client = self.db.get( "ROOT").get( "data") 
        print("Downloading", name, "information for", len( flatten_list( ids, times=-1)))
        obj_map = self.batch_download( ids, client.query_procedures) 

        nodes = []
        for index,obj in obj_map.items():
            entry_match = None
            for entry in records.values():
                if suf + str(entry.object_map.get("default")) == index:
                    entry_match = entry
            if entry_match is None:
                raise IndexError("Could not match Entry to Record")
            pl = { "entry": entry_match, "data": obj}
            self.db.__setitem__( index, pl)
            nodes.append( Node.Node( name=name, payload=index))
        [ self.add( node.index, v) for v in nodes]

        #print( list(obj_map.values())[0] )
        init_mol_ids = [obj.get( "initial_molecule") for obj in obj_map.values()]
        init_mols_are_lists = False
        if isinstance( init_mol_ids[0], list):
            init_mols_are_lists = True
            init_mol_ids = [ str(x)[0] for x in init_mol_ids]
        init_mol_ids = ["QCM-" + x for x in init_mol_ids]

        #print( init_mol_ids)
        print("Downloading", name, "initial molecules for  for", len( init_mol_ids))
        init_mol_map = self.batch_download( init_mol_ids, client.query_molecules)
            
        #[ td_node.payload.__setitem__("initial_molecule", \
        #    init_mol_map.get( 'QCM-' + str(td_node.payload.get( "record").get( "initial_molecule")[0])))\
        #    for td_node in td_nodes] 

        for node in nodes:
            qcid = self.db.get( node.payload)
            if init_mols_are_lists:
                molid = 'QCM-' + str(qcid.get( "data").get( "initial_molecule")[0])
            else:
                molid = 'QCM-' + str(qcid.get( "data").get( "initial_molecule"))
            mol_obj = init_mol_map.get( molid)
            self.db.__setitem__( molid , { "data": mol_obj})

        fn( [node.index for node in nodes], skel=skel)
Beispiel #4
0
    def branch_torsiondrive_ds( self, nid, skel=False):
        """ Generate the individual torsion drives """
        if "TorsionDrive" in self.drop:
            return
        suf = "QCP-"
        node = self.node_index.get( nid)
        ds = self.db.get( node.payload).get( "data")
        records = ds.data.records

        #[entry.object_map.get("default") for entry in records.values()]
        td_ids = [suf + str(entry.object_map.get("default")) for entry in records.values()]
        #td_ids = td_ids[:1]
        client = self.db.get( "ROOT").get( "data") 
        print("Downloading TorsionDrive information for", len( flatten_list( td_ids, times=-1)))
        td_map = self.batch_download( td_ids, client.query_procedures) 


        td_nodes = []
        for index, obj in td_map.items():
            entry_match = None
            for entry in records.values():
                if suf + str(entry.object_map.get("default")) == index:
                    entry_match = entry
            if entry_match is None:
                raise IndexError("Could not match TDEntry to a TDRecord")

            #td_nodes.append( Node(index="".join([ suf, index]), name="TorsionDrive", payload={"meta": entry_match, "record": obj}))
            pl = { "entry": entry_match, "data": obj}
            self.db.__setitem__( index, pl)
            td_nodes.append( \
                Node.Node( name="TorsionDrive", payload=index))
        [ self.add( node.index, v) for v in td_nodes]

        init_mol_ids = ['QCM-' + str(td.get( "initial_molecule")[0]) for \
            td in td_map.values()]
        print("Downloading TorsionDrive initial molecules for  for", \
            len( init_mol_ids))
        init_mol_map = self.batch_download( init_mol_ids, client.query_molecules)
            
        #[ td_node.payload.__setitem__("initial_molecule", \
        #    init_mol_map.get( 'QCM-' + str(td_node.payload.get( "record").get( "initial_molecule")[0])))\
        #    for td_node in td_nodes] 

        for td_node in td_nodes:
            qcid = self.db.get( td_node.payload)
            molid = 'QCM-' + str(qcid.get( "data").get( "initial_molecule")[0])
            mol_obj = init_mol_map.get( molid)
            self.db.__setitem__( molid , { "data": mol_obj})
            #print( "ADDED INIT MOL", molid, self.db.get( molid) )

        #print( "*********************************")
        #[print( x ) for x in self.db.values()]
        #print( "*********************************")
        #print( self.node_index)
        #print( "*********************************")
        self.branch_torsiondrive_record( [ n.index for n in td_nodes], skel=skel)
Beispiel #5
0
    def build_index(self, ds, drop=None):
        """
        Take a QCA DS, and create a node for it.
        Then expand it out
        """
        assert self._obj_is_qca_collection(ds)

        # the object going into the data db
        pl = {'data': ds}
        self.db.__setitem__(str(ds.data.id), pl)

        # create the index node for the tree and integrate it
        ds_node = Node.Node(name=ds.data.name, payload=str(ds.data.id))
        self.add(self.root_index, ds_node)

        self.expand_qca_dataset_as_tree(ds_node.index, skel=True, drop=drop)
Beispiel #6
0
    def combine_by_entry(self, fn, targets=None):
        """
        compare entries using fn, and collect into a parent node
        fn is something that compares 2 entries 
        returns a node where children match key
        """
        new_nodes = []

        if targets is None:
            entries = list(self.iter_entry())
        elif hasattr(targets, "__iter__"):
            entries = list(targets)
        else:
            entries = [targets]
        if len(entries) == 0:
            return new_nodes

        used = set()
        for i in range(len(entries)):
            if i in used:
                continue
            ref = entries[i]
            ref_obj = self.db[ref.payload]['entry']
            used.add(i)

            print("Adding", ref, "to nodes")
            node = Node.Node(name="Folder", payload=repr(fn))
            node.add(ref)

            for j in range(i + 1, len(entries)):
                entry = entries[j]
                entry_obj = self.db[entry.payload]['entry']
                if fn(ref_obj, entry_obj):
                    print("MATCH!", ref, entry)
                    node.add(entry)
                    used.add(j)
                #else:
                #    print("NOT A MATCH!", ref, entry)
                #    node = Node.Node( name="Folder", payload=repr( fn))
                #    print("Adding", entry, "to nodes")
                #    node.add( entry)
                #    new_nodes.append( node)
                #    ref_obj = entry_obj
                #    ref = entry
            new_nodes.append(node)

        return new_nodes
Beispiel #7
0
def load():

    import qcfractal.interface as ptl

    NAME = "QCA"
    # this builds the index, starting with the client node
    NAMEP = NAME + ".p"
    if os.path.exists(NAMEP):
        with open(NAMEP, 'rb') as fid:
            QCA = pickle.load(fid)
        if QCA.db is None:
            with open(NAME + ".db.p", 'rb') as fid:
                QCA.db = pickle.load(fid).db

    else:
        client = ptl.FractalClient()
        client_node = Node.Node(payload=client, name="client")
        QCA = qca.QCATree(NAME, root_payload=client, node_index=None, db=None)
        ds = client.get_collection("torsiondrivedataset",
                                   "openff group1 torsions")
        QCA.build_index(ds, drop=["Optimization"])
        #ds = client.get_collection("optimizationdataset", "openff optimization set 1")
        #QCA.build_index( ds, drop=["Hessian"])
        QCA.to_pickle(db=True)
    #QCA.set_root( client_node)

    if 1:
        #print( QCA.db.keys())
        #client = QCA.db.get( QCA.node_index.get( QCA.root_index).payload).get( "data")
        dsopt = client.get_collection("optimizationdataset",
                                      "openff optimization set 1")
        QCA.build_index(dsopt, drop=["Gradient"])
    #roche_opt = client.get_collection("optimizationdataset", "openff optimization set 1")
    #roche_opt_node = node.Node(payload=roche_opt, name=roche_opt.data.name, index=roche_opt.data.id)
    #QCA.add(client_node.index, roche_opt_node)
    #QCA.expand_qca_dataset_as_tree(QCA.root.children[-1], skel=True)
    if 0:
        #QCA.cache_torsiondriverecord_minimum_molecules()
        QCA.cache_optimization_minimum_molecules()
    if 1:
        QCA.to_pickle(db=False)
        QCA.to_pickle(name=QCA.name + ".db.p", index=False, db=True)

    return QCA
def load():

    import qcfractal.interface as ptl

    NAME = "QCA"
    # this builds the index, starting with the client node
    NAMEP = NAME + ".p"
    if os.path.exists(NAMEP):
        with open(NAMEP, 'rb') as fid:
            QCA = pickle.load(fid)
        if QCA.db is None:
            with open(NAME + ".db.p", 'rb') as fid:
                QCA.db = pickle.load(fid).db

    else:

        client = ptl.FractalClient()
        client_node = Node.Node(payload=client, name="client")
        QCA = qca.QCATree(NAME, root_payload=client, node_index=None, db=None)

        DS_TYPE = "torsiondrivedataset"
        DS_NAME = "OpenFF Trivalent Nitrogen Set 1"
        DS_NAME = "SMIRNOFF Coverage Torsion Set 1"
        client.get_collection(DS_TYPE, DS_NAME)
        ds = client.get_collection(DS_TYPE, DS_NAME)

        # since we know there are no Hessians, skip looking for them
        drop = []
        drop.append("Hessian")
        QCA.build_index(ds, drop=drop)

        # this will download the final structure of *all* minimizations found
        # for a gridopt, this will just be the final structure of each point
        QCA.cache_optimization_minimum_molecules()

        # save the index and data to disk for future analysis
        QCA.to_pickle(db=False)
        QCA.to_pickle(name=QCA.name + ".db.p", index=False, db=True)

    return QCA
Beispiel #9
0
    def branch_result_record(self, nids, skel=False):
        """ Gets the molecule from the gradient """

        if not hasattr(nids, "__iter__"):
            nids = [nids]
        if len(nids) == 0:
            assert False
            return
        if "Molecule" in self.drop:
            assert False
            return
        #print( nodes)
        nodes = [self.node_index.get(nid) for nid in nids]
        client = self.db.get("ROOT").get("data")

        mol_nodes = []
        gradstubs = [
            node for node in nodes
            if ("molecule" not in self.db.get(node.payload).get("data"))
        ]
        fullgrads = [
            node for node in nodes
            if ("molecule" in self.db.get(node.payload).get("data"))
        ]

        mol_map = {}

        suf = "QCM-"

        if len(gradstubs) > 0:
            print("Downloading molecule information from grad stubs",
                  len(gradstubs))
            projection = {'id': True, 'molecule': True}
            projection = ['id', 'molecule']
            mol_map = self.batch_download([node.payload for node in gradstubs],
                                          client.query_results,
                                          projection=projection)
            for node in gradstubs:
                obj = self.db.get(node.payload).get("data")
                obj.update(mol_map.get(node.payload))
        if skel:
            # need to gather gradient stubs to get the molecules
            if len(fullgrads) > 0:
                print("Gathering molecule information from gradients",
                      len(fullgrads))
                mol_map.update({
                    node.payload:
                    self.db.get(node.payload).get("data").get('molecule')
                    for node in fullgrads
                })
        else:
            print("Downloading molecule information for", len(nodes))
            mol_map = self.batch_download([
                self.db.get(node.payload).get("data").get('molecule')
                for node in nodes
            ], client.query_molecules)

        for node in nodes:
            if node.payload is None:
                print(node)
                assert False
            else:
                name = "MoleculeStub" if skel else "Molecule"
                state = "NEW" if skel else "CLEAN"
                index = suf + self.db.get(
                    node.payload).get("data").get('molecule')
                mol_node = Node.Node(name=name, payload=index)
                mol_node.state = state
                self.add(node.index, mol_node)
                mol_nodes.append(mol_node)

                if skel and index not in self.db:
                    self.db.__setitem__(index, {"data": mol_map.get(index)})
            assert len(node.children) > 0

        #print( self.db)
        #print( self.node_index )
        # hessian stuff
        if "Hessian" in self.drop:
            return

        ids = [x.payload for x in mol_nodes]
        name = "Hessian"
        projection = None

        if skel:
            projection = {'id': True, 'molecule': True}
            projection = ['id', 'molecule']
            name = "HessianStub"

        print("Downloading", name, "for", len(mol_nodes))
        hess_objs = self.batch_download_hessian(ids,
                                                client.query_results,
                                                projection=projection)

        if len(hess_objs) > 0:
            for mol in mol_nodes:
                payload = mol.payload
                for hess in hess_objs:
                    if payload == ("QCM-" +
                                   hess_objs.get(hess).get("molecule")):
                        hess_node = Node.Node(name="Hessian", payload=hess)
                        hess_node.state = Node.CLEAN
                        self.add(mol.index, hess_node)
                        pl = hess_objs.get(hess)
                        self.db.__setitem__(hess, {"data": pl})
Beispiel #10
0
    def branch_gridopt_record(self, nids, skel=False):
        """ Generate the optimizations under Grid Optimization record
        The optimizations are separated by one or more Constraints
        """
        if "GridOpt" in self.drop:
            return

        if not hasattr(nids, "__iter__"):
            nids = [nids]
        nodes = [self.node_index.get(nid) for nid in nids]
        #print( nodes)
        #print( self.db)
        #assert False
        opt_ids = [
            list(
                self.db.get(node.payload).get("data").get(
                    "grid_optimizations").values()) for node in nodes
        ]
        client = self.db.get("ROOT").get("data")
        #client = get_root( nodes[0]).payload
        print("Downloading optimization information for",
              len(flatten_list(opt_ids, times=-1)))

        #projection = { 'optimization_history': True } if skel else None
        projection = None
        flat_ids = ['QCP-' + str(x) for x in flatten_list(opt_ids, times=-1)]
        opt_map = self.batch_download(flat_ids,
                                      client.query_procedures,
                                      projection=projection)

        # add the constraint nodes
        if "Constraint" in self.drop:
            return
        opt_nodes = []

        #breakpoint()
        for node in nodes:

            obj = self.db.get(node.payload)
            #status = obj.get( "data").get( "status")[:]
            scans = obj.get("data").get("keywords").__dict__.get("scans")
            assert len(scans) == 1
            scan = scans[0].__dict__
            for constraint, opts in obj.get("data").get(
                    "grid_optimizations").items():
                #TODO need to cross ref the index to the actual constraint val

                #cidx = 'CSR-' + node.payload.split("-")[1]
                val = eval(constraint)

                # handle when index is "preoptimization" rather than e.g. [0]
                if isinstance(val, str):
                    continue
                else:
                    step = scan.get("steps")[val[0]]
                pl = (scan.get("type")[:], tuple(scan.get("indices")), step)
                constraint_node = Node.Node(payload=pl, name="Constraint")
                self.add(node.index, constraint_node)
                #self.db.__setitem__( cidx, { "data": scan })

                #for index in opts:
                #    index = 'QCP-' + index
                #    opt_node = Node.Node( name="Optimization", payload=index)
                #    opt_nodes.append( opt_node)
                #    self.add( constraint_node.index, opt_node)
                #    self.db.__setitem__( index, { "data": opt_map.get( index) })

                index = 'QCP-' + opts
                opt_node = Node.Node(name="Optimization", payload=index)
                opt_nodes.append(opt_node)
                self.add(constraint_node.index, opt_node)
                self.db.__setitem__(index, {"data": opt_map.get(index)})
        #for i,n in enumerate(opt_nodes[:-1]):
        #    idx = n.index
        #    for j,m in enumerate(opt_nodes[i+1:],i+1):
        #        assert idx != m.index
        self.branch_optimization_record([x.index for x in opt_nodes],
                                        skel=skel)