def AppendFromDirectory(self, apath_): """ Append all xyz files in apath_ to this set. """ for file in os.listdir(apath_): if file.endswith(".xyz"): m = Mol() m.properties = {"from_file":file} f = open(file,'r') fs = f.read() m.FromXYZString(fs) self.mols.append(m) return
def DistortAlongNormals(self, npts=8, random=True, disp=.2): ''' Create a distorted copy of a set Args: npts: the number of points to sample along the normal mode coordinate. random: whether to randomize the order of the new set. disp: the maximum displacement of atoms along the mode Returns: A set containing distorted versions of the original set. ''' print "Making distorted clone of:", self.name s = MSet(self.name+"_NEQ") ord = range(len(self.mols)) if(random): np.random.seed(int(time.time())) ord=np.random.permutation(len(self.mols)) for j in ord: newcoords = self.mols[j].ScanNormalModes(npts,disp) for i in range(newcoords.shape[0]): # Loop modes for k in range(newcoords.shape[1]): # loop points s.mols.append(Mol(self.mols[j].atoms,newcoords[i,k,:,:])) s.mols[-1].DistMatrix = self.mols[j].DistMatrix return s
def ReadXYZ(self,filename = None, xyz_type = 'mol', eqforce=False): """ Reads XYZs concatenated into a single file separated by \n\n as a molset """ if filename == None: filename = self.name f = open(self.path+filename+".xyz","r") txts = f.readlines() for line in range(len(txts)): if (txts[line].count('Comment:')>0): line0=line-1 nlines=int(txts[line0]) if xyz_type == 'mol': self.mols.append(Mol()) elif xyz_type == 'frag_of_mol': self.mols.append(Frag_of_Mol()) else: raise Exception("Unknown Type!") self.mols[-1].FromXYZString(''.join(txts[line0:line0+nlines+2])) self.mols[-1].name = str(txts[line0+1]) self.mols[-1].properties["set_name"] = self.name if (self.center): self.CenterSet() if (eqforce): self.EQ_forces() LOGGER.debug("Read "+str(len(self.mols))+" molecules from XYZ") return
def ReadXYZUnpacked(self, path="/Users/johnparkhill/gdb9/", has_energy=False, has_force=False, has_mmff94=False): """ Reads XYZs in distinct files in one directory as a molset Args: path: the directory which contains the .xyz files to be read has_energy: switch to turn on reading the energy from the comment line as formatted from the md_dataset on quantum-machine.org has_force: switch to turn on reading the force from the comment line as formatted from the md_dataset on quantum-machine.org """ from os import listdir from os.path import isfile, join #onlyfiles = [f for f in listdir(path) if isfile(join(path, f))] onlyfiles = [f for f in listdir(path) if isfile(join(path, f))] for file in onlyfiles: if ( file[-4:]!='.xyz' ): continue self.mols.append(Mol()) self.mols[-1].ReadGDB9(path+file, file) self.mols[-1].properties["set_name"] = self.name if has_force: self.mols[-1].Force_from_xyz(path+file) if has_energy: self.mols[-1].Energy_from_xyz(path+file) if has_mmff94: self.mols[-1].MMFF94_Force_from_xyz(path+file) if (self.center): self.CenterSet() return
def ReadXYZ(self, filename): """ Reads XYZs concatenated into a single separated by @@@ file as a molset """ f = open(self.path + filename + ".xyz", "r") txts = f.read() for mol in txts.split("@@@")[1:]: self.mols.append(Mol()) self.mols[-1].FromXYZString(mol) return
def ReadGDB9Unpacked(self, path="/Users/johnparkhill/gdb9/", mbe_order=3): """ Reads the GDB9 dataset as a pickled list of molecules""" from os import listdir from os.path import isfile, join onlyfiles = [f for f in listdir(path) if isfile(join(path, f))] for file in onlyfiles: if (file[-4:] != '.xyz'): continue self.mols.append(Mol()) self.mols[-1].ReadGDB9(path + file, mbe_order) return
def BruteForceAtoms(self, mol_, emb_): print "Searching for best atom fit" bestmol = copy.deepcopy(mol_) besterr = 100.0 # posib_stoich = [x for x in itertools.product([1,6,7,8], repeat=len(mol_.atoms))] # for stoich in posib_stoich: for stoich in itertools.product([1, 6, 7, 8], repeat=len(mol_.atoms)): tmpmol = Mol(np.array(stoich), mol_.coords) tmperr = self.EmbAtomwiseErr(tmpmol, emb_) if tmperr < besterr: bestmol = copy.deepcopy(tmpmol) besterr = tmperr print besterr print bestmol.atoms return bestmol.atoms
def Generate_MBE_term_General(self, order): if order in self.mbe_frags.keys(): return if order==1: self.mbe_frags[order] = [] masked=[] frag_index = 0 for i, dic in enumerate(self.frag_list): self.type_of_frags_dict[i] = [] frag_atoms = String_To_Atoms(dic["atom"]) frag_atoms = [atoi[atom] for atom in frag_atoms] num_frag_atoms = len(frag_atoms) j = 0 while (j < self.NAtoms()): if j in masked: j += 1 else: tmp_list = list(self.atoms[j:j+num_frag_atoms]) if tmp_list == frag_atoms: self.atoms_of_frags.append([]) masked += range (j, j+num_frag_atoms) self.atoms_of_frags[-1]=range (j, j+num_frag_atoms) self.type_of_frags.append(i) self.type_of_frags_dict[i].append(frag_index) tmp_coord = self.coords[j:j+num_frag_atoms,:].copy() tmp_atom = self.atoms[j:j+num_frag_atoms].copy() mbe_terms = [frag_index] tmp_mol = Mol(tmp_atom, tmp_coord) tmp_mol.properties["mbe_atom_index"] = range(j, j+num_frag_atoms) if self.center == "Heaviest": # take the first heaviest one tmp_mol.properties["center_atom"] = np.where(tmp_mol.atoms == max(tmp_mol.atoms))[0][0] tmp_mol.properties["center"] = tmp_mol.coords[np.where(tmp_mol.atoms == max(tmp_mol.atoms))[0][0]] elif self.center == "COM": tmp_mol.properties["center"] = tmp_mol.Center("Mass") elif self.center == "COP": tmp_mol.properties["center"] = tmp_mol.Center("Atom") else: print "This type of center is not implemented yet, set to COM as center" tmp_mol.properties["center"] = tmp_mol.Center("Mass") self.mbe_frags[order].append(tmp_mol) j += num_frag_atoms frag_index += 1 #print self.atoms_of_frags, tmp_list, self.type_of_frags #print self.mbe_frags[order][-1].atoms, self.mbe_frags[order][-1].coords else: j += 1 else: self.mbe_frags[order] = [] mbe_terms=[] time_log = time.time() time_now=time.time() frag_case = 0 sample_index = range(len(self.mbe_frags[1])) tmp_time = time.time() sub_combinations = list(itertools.combinations(sample_index, order)) for i in range (0, len(sub_combinations)): term = list(sub_combinations[i]) if len(list(set(term))) < len(term): continue mbe_terms.append(term) frag_case += 1 for i in range (0, len(mbe_terms)): atom_group = [] for index in mbe_terms[i]: atom_group.append(self.mbe_frags[1][index].atoms.shape[0]) tmp_coord = np.zeros((sum(atom_group), 3)) tmp_atom = np.zeros(sum(atom_group), dtype=np.uint8) pointer = 0 mbe_atom_index = [] frag_mono_center = [] natom_each_mono = [] for j, index in enumerate(mbe_terms[i]): tmp_coord[pointer:pointer+atom_group[j],:] = self.mbe_frags[1][index].coords tmp_atom[pointer:pointer+atom_group[j]] = self.mbe_frags[1][index].atoms mbe_atom_index += self.mbe_frags[1][index].properties["mbe_atom_index"] natom_each_mono.append(len(self.mbe_frags[1][index].properties["mbe_atom_index"])) frag_mono_center.append(self.mbe_frags[1][index].properties["center"]) pointer += atom_group[j] tmp_mol = Mol(tmp_atom, tmp_coord) tmp_mol.properties["mbe_atom_index"] = mbe_atom_index tmp_mol.properties["mono_index"] = mbe_terms[i] tmp_mol.properties["natom_each_mono"] = natom_each_mono tmp_mol.properties["center"] = frag_mono_center #print "tmp_coords: ", tmp_mol.coords self.mbe_frags[order].append(tmp_mol) del sub_combinations return
def callbk(x_): mn = Mol(atoms, x_.reshape(natoms, 3)) mn.BuildDistanceMatrix() print "Distance error : ", np.sqrt( np.sum((GdDistMatrix - mn.DistMatrix) * (GdDistMatrix - mn.DistMatrix)))
def ReverseAtomwiseEmbedding(self, emb_, atoms_, guess_, GdDistMatrix): """ Args: atoms_: a list of element types for which this routine provides coords. dig_: a digester emb_: the embedding which we will try to construct a mol to match. Because this is atomwise this will actually be a (natoms X embedding shape) tensor. Returns: A best-fit version of a molecule which produces an embedding as close to emb_ as possible. """ natoms = emb_.shape[0] if atoms_ == None: atoms = np.full((natoms), 6) else: atoms = atoms_ # if (guess_==None): # coords = np.zeros((natoms, 3)) # print self.EmbAtomwiseErr(Mol(atoms[:1], coords[:1,:]), emb_[:1,:]) # return # func = lambda crds: self.EmbAtomwiseErr(Mol(atoms,crds.reshape(natoms,3)),emb_) # min_kwargs = {"method": "BFGS"} # # This puts natom into a cube of length 1 so correct the density to be roughly 1atom/angstrom. # coords = np.random.rand(natoms,3) # coords *= natoms # ret = optimize.basinhopping(func, coords, minimizer_kwargs=min_kwargs, niter=500) # mfit = Mol(atoms, coords) # atoms_ = self.BruteForceAtoms(mfit, emb_) # func = lambda crds: self.EmbAtomwiseErr(Mol(atoms_,crds.reshape(natoms,3)),emb_) # ret = optimize.basinhopping(func, coords, minimizer_kwargs=min_kwargs, niter=500) # print("global minimum: coords = %s, atoms = %s, f(x0) = %.4f" % (ret.x, atoms_, ret.fun)) # # return # coords = ret.x.reshape(natoms, 3) # mfit = Mol(atoms_,coords) # mfit.WriteXYZfile("./results/", "RevLog") # # Next optimize with an equilibrium distance matrix which is roughly correct for each type of species... # # mfit.DistMatrix = np.ones((natoms,3)) # # np.fill_diagonal(mfit.DistMatrix,0.0) # # opt = Optimizer(None) # # opt.OptGoForce(mfit) # # mfit.WriteXYZfile("./results/", "RevLog") # else: coords = guess_ # atoms = np.ones(len(atoms_), dtype=np.uint8) # Now shit gets real. Create a function to minimize. objective = lambda crds: self.EmbAtomwiseErr( Mol(atoms, crds.reshape(natoms, 3)), emb_) if (1): def callbk(x_): mn = Mol(atoms, x_.reshape(natoms, 3)) mn.BuildDistanceMatrix() print "Distance error : ", np.sqrt( np.sum((GdDistMatrix - mn.DistMatrix) * (GdDistMatrix - mn.DistMatrix))) import scipy.optimize step = 0 res = optimize.minimize(objective, coords.reshape(natoms * 3), method='L-BFGS-B', tol=1.e-12, options={ "maxiter": 5000000, "maxfun": 10000000 }, callback=callbk) # res=scipy.optimize.minimize(objective,coords.reshape(natoms*3),method='SLSQP',tol=0.000001,options={"maxiter":5000000},callback=callbk) # coords = res.x.reshape(natoms,3) # res=scipy.optimize.minimize(objective,coords.reshape(natoms*3),method='Powell',tol=0.000001,options={"maxiter":5000000},callback=callbk) # while (self.EmbAtomwiseErr(Mol(atoms_,coords),emb_) > 1.e-5) and (step < 10): # step += 1 # res=scipy.optimize.minimize(objective,coords.reshape(natoms*3),method='L-BFGS-B',tol=0.000001,options={"maxiter":5000000,"maxfun":10000000},callback=callbk) # print "Reversal complete: ", res.message # coords = res.x.reshape(natoms,3) # mfit = Mol(atoms_, coords) # atoms_ = self.BruteForceAtoms(mfit, emb_) mfit = Mol(atoms, res.x.reshape(natoms, 3)) self.DistanceErr(GdDistMatrix, mfit) return mfit