def generate_Si_cluster(): from pymatgen.io.xyz import XYZ coords = [[0, 0, 0], [0.75, 0.5, 0.75]] lattice = Lattice.from_parameters(a=3.84, b=3.84, c=3.84, alpha=120, beta=90, gamma=60) struct = Structure(lattice, ['Si', 'Si'], coords) struct.make_supercell([2, 2, 2]) # Creating molecule for testing mol = Molecule.from_sites(struct) XYZ(mol).write_file(os.path.join(test_dir, "Si_cluster.xyz")) # Rorate the whole molecule mol_rotated = mol.copy() rotate(mol_rotated, seed=42) XYZ(mol_rotated).write_file(os.path.join(test_dir, "Si_cluster_rotated.xyz")) # Perturbing the atom positions mol_perturbed = mol.copy() perturb(mol_perturbed, 0.3, seed=42) XYZ(mol_perturbed).write_file(os.path.join(test_dir, "Si_cluster_perturbed.xyz")) # Permuting the order of the atoms mol_permuted = mol.copy() permute(mol_permuted, seed=42) XYZ(mol_permuted).write_file(os.path.join(test_dir, "Si_cluster_permuted.xyz")) # All-in-one mol2 = mol.copy() rotate(mol2, seed=42) perturb(mol2, 0.3, seed=42) permute(mol2, seed=42) XYZ(mol2).write_file(os.path.join(test_dir, "Si_cluster_2.xyz"))
def test_from_string(self): ans = """5 H4 C1 C 0.000000 0.000000 0.000000 H 0.000000 0.000000 1.089000 H 1.026719 0.000000 -0.363000 H -0.513360 -0.889165 -0.363000 H -0.513360 0.889165 -0.363000""" xyz = XYZ.from_string(ans) mol = xyz.molecule sp = ["C", "H", "H", "H", "H"] for i, site in enumerate(mol): self.assertEqual(site.species_string, sp[i]) self.assertEqual(len(site.coords), 3) if i == 0: self.assertTrue(all([c == 0 for c in site.coords])) mol_str = """2 Random C 2.39132145462 -0.700993488928 -7.22293142224e-06 C 1.16730636786 -1.38166622735 -2.77112970359e-06 """ xyz = XYZ.from_string(mol_str) mol = xyz.molecule self.assertTrue(abs(mol[0].z) < 1e-5) self.assertTrue(abs(mol[1].z) < 1e-5)
def outputMolecule(singleMol, dataDir): molecule = Molecule([], []) for site in singleMol: molecule.append(str(site.specie), site.coords) xyzObj = XYZ(molecule) os.chdir(dataDir) os.system('mkdir singleMolecule') xyzObj.write_file(dataDir + '/singleMolecule/singleMol.xyz')
def test_from_string(self): ans = """5 H4 C1 C 0.000000 0.000000 0.000000 H 0.000000 0.000000 1.089000 H 1.026719 0.000000 -0.363000 H -0.513360 -0.889165 -0.363000 H -0.513360 0.889165 -0.363000""" xyz = XYZ.from_string(ans) mol = xyz.molecule sp = ["C", "H", "H", "H", "H"] for i, site in enumerate(mol): self.assertEqual(site.species_string, sp[i]) self.assertEqual(len(site.coords), 3) if i == 0: self.assertTrue(all([c == 0 for c in site.coords])) mol_str = """2 Random C 2.39132145462 -0.700993488928 -7.22293142224e-06 C 1.16730636786 -1.38166622735 -2.77112970359e-06 """ xyz = XYZ.from_string(mol_str) mol = xyz.molecule self.assertTrue(abs(mol[0].z) < 1e-5) self.assertTrue(abs(mol[1].z) < 1e-5) mol_str = """3 Random C 0.000000000000E+00 2.232615992397E+01 0.000000000000E+00 C -2.383225420567E-31 1.116307996198E+01 1.933502166311E+01 C -4.440892098501D-01 -1.116307996198d+01 1.933502166311E+01 """ xyz = XYZ.from_string(mol_str) mol = xyz.molecule self.assertAlmostEqual(mol[0].x, 0) self.assertAlmostEqual(mol[1].y, 11.16307996198) self.assertAlmostEqual(mol[2].x, -0.4440892098501) self.assertAlmostEqual(mol[2].y, -11.16307996198) # self.assertTrue(abs(mol[1].z) < 1e-5) mol_str = """ 5 C32-C2-1 C 2.70450 1.16090 -0.14630 1 3 23 2 C 1.61930 1.72490 -0.79330 2 1 5 26 C 2.34210 1.02670 1.14620 3 1 8 6 C -0.68690 2.16170 -0.13790 4 5 18 7 C 0.67160 2.15830 0.14350 5 4 2 6 """ xyz = XYZ.from_string(mol_str) mol = xyz.molecule self.assertAlmostEqual(mol[0].x, 2.70450) self.assertAlmostEqual(mol[1].y, 1.72490) self.assertAlmostEqual(mol[2].x, 2.34210) self.assertAlmostEqual(mol[3].z, -0.13790)
def setUp(self): coords = [[0.000000, 0.000000, 0.000000], [0.000000, 0.000000, 1.089000], [1.026719, 0.000000, -0.363000], [-0.513360, -0.889165, -0.363000], [-0.513360, 0.889165, -0.363000]] coords2 = [[x + 10.0 for x in atom] for atom in coords] self.mol = Molecule(["C", "H", "H", "H", "H"], coords) self.multi_mols = [Molecule(["C", "H", "H", "H", "H"], coords) for coords in [coords, coords2]] self.xyz = XYZ(self.mol) self.multi_xyz = XYZ(self.multi_mols)
def largercube(cube_length): """ read POSCAR in the current directory and output another POSCAR with larger cube size """ dir_path = os.path.dirname(os.path.realpath(__file__)) poscar = Poscar.from_file(os.path.join(dir_path, 'POSCAR')) structure = poscar.structure XYZ(structure).write_file(os.path.join(dir_path, 'POSCAR.xyz')) molecule = XYZ.from_file(os.path.join(dir_path, 'POSCAR.xyz')).molecule a = b = c = cube_length structure = molecule.get_boxed_structure(a, b, c) Poscar(structure).write_file(os.path.join(dir_path, 'POSCAR_larger.vasp'))
def test_init_from_structure(self): filepath = os.path.join(test_dir, 'POSCAR') poscar = Poscar.from_file(filepath) struct = poscar.structure xyz = XYZ(struct) ans = """24 Fe4 P4 O16 Fe 2.277347 4.550379 2.260125 Fe 2.928536 1.516793 4.639870 Fe 7.483231 4.550379 0.119620 Fe 8.134420 1.516793 2.499364 P 0.985089 1.516793 1.990624 P 4.220794 4.550379 4.370369 P 6.190973 1.516793 0.389120 P 9.426677 4.550379 2.768865 O 0.451582 4.550379 3.365614 O 1.006219 1.516793 3.528306 O 1.725331 0.279529 1.358282 O 1.725331 2.754057 1.358282 O 3.480552 3.313115 3.738027 O 3.480552 5.787643 3.738027 O 4.199665 4.550379 1.148562 O 4.754301 1.516793 0.985870 O 5.657466 4.550379 3.773620 O 6.212102 1.516793 3.610928 O 6.931215 0.279529 1.021463 O 6.931215 2.754057 1.021463 O 8.686436 3.313115 3.401208 O 8.686436 5.787643 3.401208 O 9.405548 4.550379 1.231183 O 9.960184 1.516793 1.393875""" self.assertEqual(str(xyz), ans)
def test_str(self): ans = """5 H4 C1 C 0.000000 0.000000 0.000000 H 0.000000 0.000000 1.089000 H 1.026719 0.000000 -0.363000 H -0.513360 -0.889165 -0.363000 H -0.513360 0.889165 -0.363000""" self.assertEqual(str(self.xyz), ans) mxyz = XYZ(self.multi_mols, coord_precision=3) mxyz_text = str(mxyz) ans_multi = """5 H4 C1 C 0.000 0.000 0.000 H 0.000 0.000 1.089 H 1.027 0.000 -0.363 H -0.513 -0.889 -0.363 H -0.513 0.889 -0.363 5 H4 C1 C 10.000 10.000 10.000 H 10.000 10.000 11.089 H 11.027 10.000 9.637 H 9.487 9.111 9.637 H 9.487 10.889 9.637""" self.assertEqual(mxyz_text, ans_multi)
def xyz_to_sbu(path: str) -> Dict[str, Fragment]: """ [summary] Parameters ---------- path : str [description] Returns ------- Fragment [description] """ xyz = XYZ.from_file(path) names = get_xyz_names(path) sbu = {} for molecule, name in zip(xyz.all_molecules, names): dummies_idx = molecule.indices_from_symbol("X") symmetric_mol = Molecule([ "H", ] * len(dummies_idx), [molecule[idx].coords for idx in dummies_idx], charge=len(dummies_idx)) symmetry = PointGroupAnalyzer(symmetric_mol, tolerance=0.1) sbu[name] = Fragment(atoms=molecule, symmetry=symmetry, name=name) return sbu
def _write_input(self, input_dir="."): """ Write the packmol input file to the input directory. Args: input_dir (string): path to the input directory """ with open(os.path.join(input_dir, self.input_file), 'wt', encoding="utf-8") as inp: for k, v in self.control_params.items(): inp.write('{} {}\n'.format(k, self._format_param_val(v))) # write the structures of the constituent molecules to file and set # the molecule id and the corresponding filename in the packmol # input file. for idx, mol in enumerate(self.mols): filename = os.path.join( input_dir, '{}.{}'.format( idx, self.control_params["filetype"])).encode("ascii") # pdb if self.control_params["filetype"] == "pdb": self.write_pdb(mol, filename, num=idx + 1) # all other filetypes else: XYZ(mol).write_file(filename=filename) inp.write("\n") inp.write("structure {}.{}\n".format( os.path.join(input_dir, str(idx)), self.control_params["filetype"])) for k, v in self.param_list[idx].items(): inp.write(' {} {}\n'.format(k, self._format_param_val(v))) inp.write('end structure\n')
def insert_solvents(coll): names = """THF monoglyme Dimethylacetamide propylene carbonate ethylene carbonate dimethylcarbonate DMSO ACN Diethylcarbonate propyl glyme ethyl glyme ethyl diglyme diglyme Butyldiglyme tetraglyme Pentaethylene glycol diethyl ether Tetraethylene glycol diethyl ether Tetraethylene glycol dibutyl ether Butyldiglyme""" for n in names.split("\n"): response = requests.get( "http://cactus.nci.nih.gov/chemical/structure/{}/file?format=xyz". format(n)) if response.status_code == 200: xyz = XYZ.from_string(response.text) clean_mol = xyz.molecule bb = BabelMolAdaptor(clean_mol) pbmol = pb.Molecule(bb.openbabel_mol) smiles = pbmol.write("smi").split()[0] can = pbmol.write("can").split()[0] inchi = pbmol.write("inchi") svg = pbmol.write("svg") d = {"molecule": clean_mol.as_dict()} comp = clean_mol.composition d["pretty_formula"] = comp.reduced_formula d["formula"] = comp.formula d["composition"] = comp.as_dict() d["elements"] = list(comp.as_dict().keys()) d["nelements"] = len(comp) d["charge"] = clean_mol.charge d["spin_multiplicity"] = clean_mol.spin_multiplicity d["smiles"] = smiles d["can"] = can d["inchi"] = inchi # d["names"] = get_nih_names(smiles) d["svg"] = svg d["xyz"] = str(xyz) d["tags"] = ["Solvents"] coll.update( { "inchi": inchi, "charge": clean_mol.charge, "spin_multiplicity": clean_mol.spin_multiplicity }, {"$set": d}, upsert=True) else: print("{} not found.\n".format(n))
def read_mol(filename): """ Reads a molecule based on file extension. For example, anything ending in a "xyz" is assumed to be a XYZ file. Supported formats include xyz, gaussian input (gjf|g03|g09|com|inp), Gaussian output (.out|and pymatgen's JSON serialized molecules. Using openbabel, many more extensions are supported but requires openbabel to be installed. Args: filename (str): A filename to read from. Returns: A Molecule object. """ fname = os.path.basename(filename) if fnmatch(fname.lower(), "*.xyz*"): return XYZ.from_file(filename).molecule elif any([fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"]]): return GaussianInput.from_file(filename).molecule elif any([fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["out", "lis", "log"]]): return GaussianOutput(filename).final_structure elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"): with zopen(filename) as f: s = json.load(f, cls=MontyDecoder) if type(s) != Molecule: raise IOError("File does not contain a valid serialized " "molecule") return s else: m = re.search("\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", filename.lower()) if m: return BabelMolAdaptor.from_file(filename, m.group(1)).pymatgen_mol raise ValueError("Unrecognized file extension!")
def write_mol(mol, filename): """ Write a molecule to a file based on file extension. For example, anything ending in a "xyz" is assumed to be a XYZ file. Supported formats include xyz, Gaussian input (gjf|g03|g09|com|inp), and pymatgen's JSON serialized molecules. Args: mol (Molecule/IMolecule): Molecule to write filename (str): A filename to write to. """ fname = os.path.basename(filename) if fnmatch(fname.lower(), "*.xyz*"): return XYZ(mol).write_file(filename) elif any([ fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"] ]): return GaussianInput(mol).write_file(filename) elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"): with zopen(filename, "wt") as f: return f.write(str2unicode(json.dumps(mol, cls=MontyEncoder))) else: m = re.search("\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", filename.lower()) if m: return BabelMolAdaptor(mol).write_file(filename, m.group(1)) raise ValueError("Unrecognized file extension!")
def setUp(self): coords = [[0.000000, 0.000000, 0.000000], [0.000000, 0.000000, 1.089000], [1.026719, 0.000000, -0.363000], [-0.513360, -0.889165, -0.363000], [-0.513360, 0.889165, -0.363000]] self.mol = Molecule(["C", "H", "H", "H", "H"], coords) self.xyz = XYZ(self.mol)
def write_traj(structures): molecules = [] for struc in structures: molecules.append( Molecule(struc.species, coords=[s.coords for s in struc.sites])) XYZ(mol=molecules).write_file('traj.xyz')
def test_from_file(self): filepath = os.path.join(test_dir, 'multiple_frame_xyz.xyz') mxyz = XYZ.from_file(filepath) self.assertEqual(len(mxyz.all_molecules), 302) self.assertEqual(list(mxyz.all_molecules[0].cart_coords[0]), [0.20303525080000001, 2.8569761204000002, 0.44737723190000001]) self.assertEqual(list(mxyz.all_molecules[-1].cart_coords[-1]), [5.5355550720000002, 0.0282305931, -0.30993102189999999]) self.assertEqual(list(mxyz.molecule.cart_coords[-1]), [5.5355550720000002, 0.0282305931, -0.30993102189999999])
def central_bond_length_dict(self): _dict = {} for degree in self.structures_data.keys(): xyz_structure = self.structures_data[degree] mol = XYZ._from_frame_string(xyz_structure) cnt_atom1 = self.cent_diheds[0][1] cnt_atom2 = self.cent_diheds[0][2] cnt_bond_length = mol.get_distance(cnt_atom1, cnt_atom2) _dict[float(degree)] = cnt_bond_length _sorted_dict = dict(sorted(_dict.items())) return _sorted_dict
def form_xyz(self, filename): xyz = XYZ.from_file(filename) mols = xyz.all_molecules self.total_time = len(mols) for i in mols: self.sites.append(i.sites) self.N = len(self.sites[0]) self.time = np.arange(0, self.total_time * self.t_convert, self.t_convert) self.msd['time'] = self.time
def test_from_string(self): ans = """5 H4 C1 C 0.000000 0.000000 0.000000 H 0.000000 0.000000 1.089000 H 1.026719 0.000000 -0.363000 H -0.513360 -0.889165 -0.363000 H -0.513360 0.889165 -0.363000""" xyz = XYZ.from_string(ans) mol = xyz.molecule sp = ["C", "H", "H", "H", "H"] for i, site in enumerate(mol): self.assertEqual(site.species_string, sp[i]) self.assertEqual(len(site.coords), 3) if i == 0: self.assertTrue(all([c == 0 for c in site.coords])) mol_str = """2 Random C 2.39132145462 -0.700993488928 -7.22293142224e-06 C 1.16730636786 -1.38166622735 -2.77112970359e-06 """ xyz = XYZ.from_string(mol_str) mol = xyz.molecule self.assertTrue(abs(mol[0].z) < 1e-5) self.assertTrue(abs(mol[1].z) < 1e-5) mol_str = """3 Random C 0.000000000000E+00 2.232615992397E+01 0.000000000000E+00 C -2.383225420567E-31 1.116307996198E+01 1.933502166311E+01 C -4.440892098501D-01 -1.116307996198d+01 1.933502166311E+01 """ xyz = XYZ.from_string(mol_str) mol = xyz.molecule self.assertAlmostEqual(mol[0].x, 0) self.assertAlmostEqual(mol[1].y, 11.16307996198) self.assertAlmostEqual(mol[2].x, -0.4440892098501) self.assertAlmostEqual(mol[2].y, -11.16307996198)
def main(): parser = argparse.ArgumentParser( description= "Replace the atom coordinates of the first job in QChem input file with the coordinates from" "an XYZ file") parser.add_argument("-i", "--input", dest="input", type=str, required=True, help="the QChem input filename") parser.add_argument("-c", "--coords", dest="coords", type=str, required=True, help="The XYZ file contains the new coords") parser.add_argument("-v", "--velocity", dest="velocity", type=str, default=None, help="The AIMD velocity file") parser.add_argument( "-o", "--output", dest="output", type=str, required=True, help="the QChem input filename with the coordinates from the XYZ file") options = parser.parse_args() qcinp = QcInput.from_file(options.input) charge, spin = qcinp.jobs[0].charge, qcinp.jobs[0].spin_multiplicity if options.velocity is None: new_mol = Molecule.from_file(options.coords) else: mxyz = XYZ.from_file(options.coords) new_mol = mxyz.all_molecules[-1] qcinp.jobs[0].params["rem"].pop("aimd_init_veloc", None) qcnv = QcNucVeloc(options.velocity) assert len(mxyz.molecules) == len(qcnv.velocities) qcinp.jobs[0].set_velocities(qcnv.velocities[-1]) if charge is not None: new_mol.set_charge_and_spin(charge, spin) qcinp.jobs[0].mol = new_mol qcinp.write_file(options.output) print( "created new QChem input file {new_file} using {old_inp} as an template and filled with coordinates " \ "from {coord_file}".format(old_inp=options.input, coord_file=options.coords, new_file=options.output))
def insert_g3testset(coll): for f in glob.glob("g*.txt"): print("Parsing " + f) for (m, charge, spin) in parse_file(f): try: clean_sites = [] for site in m: if Element.is_valid_symbol(site.specie.symbol): clean_sites.append(site) clean_mol = Molecule.from_sites(clean_sites, charge=charge, spin_multiplicity=spin) xyz = XYZ(clean_mol) bb = BabelMolAdaptor.from_string(str(xyz), "xyz") pbmol = pb.Molecule(bb.openbabel_mol) smiles = pbmol.write("smi").split()[0] can = pbmol.write("can").split()[0] inchi = pbmol.write("inchi") svg = pbmol.write("svg") d = {"molecule": clean_mol.as_dict()} comp = clean_mol.composition d["pretty_formula"] = comp.reduced_formula d["formula"] = comp.formula d["composition"] = comp.as_dict() d["elements"] = list(comp.as_dict().keys()) d["nelements"] = len(comp) d["charge"] = charge d["spin_multiplicity"] = spin d["smiles"] = smiles d["can"] = can d["inchi"] = inchi # d["names"] = get_nih_names(smiles) d["svg"] = svg d["xyz"] = str(xyz) d["tags"] = ["G305 test set"] coll.update( { "inchi": inchi, "charge": charge, "spin_multiplicity": spin }, {"$set": d}, upsert=True) except Exception as ex: print("Error in {}".format(f)) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout) print("{} parsed!".format(f))
def read_file(filename): ''' 根据path读取各个文件,不论是vasp还是cif还是xyz :param filename: 文件的路径 :return: 如果是vasp或者cif 则返回structure对象,如果是xyz则返回mol对象 ''' suffix = os.path.split(filename)[-1] suffix = suffix.split('.')[-1] if suffix == 'vasp' or suffix == 'POSCAR' or suffix == 'CONTCAR': structure = Poscar.from_file(filename).structure elif suffix == 'cif': structure = CifParser( filename, occupancy_tolerance=0.7).get_structures(primitive=False)[0] elif suffix == 'xyz': structure = XYZ.from_file(filename).molecule elif suffix == 'CHGCAR' or suffix == 'CHG': structure = Chgcar.from_file(filename) return structure
def run(self, copy_to_current_on_exit=False, site_property=None): """ Write the input file to the scratch directory, run packmol and return the packed molecule. Args: copy_to_current_on_exit (bool): Whether or not to copy the packmol input/output files from the scratch directory to the current directory. site_property (str): if set then the specified site property for the the final packed molecule will be restored. Returns: Molecule object """ scratch = tempfile.gettempdir() with ScratchDir(scratch, copy_to_current_on_exit=copy_to_current_on_exit ) as scratch_dir: self._write_input(input_dir=scratch_dir) packmol_input = open(os.path.join(scratch_dir, self.input_file), 'r') p = Popen(self.packmol_bin, stdin=packmol_input, stdout=PIPE, stderr=PIPE) (stdout, stderr) = p.communicate() output_file = os.path.join(scratch_dir, self.control_params["output"]) if os.path.isfile(output_file): packed_mol = XYZ.from_file(output_file) print("packed molecule written to {}".format( self.control_params["output"])) if site_property: packed_mol = self.restore_site_properties( site_property=site_property, filename=output_file) return packed_mol else: print("Packmol execution failed") print(stdout, stderr) return None
def read_mol(filename): """ Reads a molecule based on file extension. For example, anything ending in a "xyz" is assumed to be a XYZ file. Supported formats include xyz, gaussian input (gjf|g03|g09|com|inp), Gaussian output (.out|and pymatgen's JSON serialized molecules. Using openbabel, many more extensions are supported but requires openbabel to be installed. Args: filename (str): A filename to read from. Returns: A Molecule object. """ fname = os.path.basename(filename) if fnmatch(fname.lower(), "*.xyz*"): return XYZ.from_file(filename).molecule elif any([ fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"] ]): return GaussianInput.from_file(filename).molecule elif any([ fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["out", "lis", "log"] ]): return GaussianOutput(filename).final_structure elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"): with zopen(filename) as f: s = json.load(f, cls=MontyDecoder) if type(s) != Molecule: raise IOError("File does not contain a valid serialized " "molecule") return s else: m = re.search("\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", filename.lower()) if m: return BabelMolAdaptor.from_file(filename, m.group(1)).pymatgen_mol raise ValueError("Unrecognized file extension!")
def insert_elements(coll): print("adding missing elements.") for z in range(1, 19): el = Element.from_Z(z) r = coll.find(filter={"formula": "{}1".format(el.symbol)}) if r.count() == 0: try: clean_mol = Molecule([el], [[0, 0, 0]]) xyz = XYZ(clean_mol) bb = BabelMolAdaptor.from_string(str(xyz), "xyz") pbmol = pb.Molecule(bb.openbabel_mol) smiles = pbmol.write("smi").split()[0] can = pbmol.write("can").split()[0] inchi = pbmol.write("inchi") svg = pbmol.write("svg") d = {"molecule": clean_mol.as_dict()} comp = clean_mol.composition d["pretty_formula"] = comp.reduced_formula d["formula"] = comp.formula d["composition"] = comp.as_dict() d["elements"] = list(comp.as_dict().keys()) d["nelements"] = len(comp) d["charge"] = 0 d["spin_multiplicity"] = clean_mol.spin_multiplicity d["smiles"] = smiles d["can"] = can d["inchi"] = inchi # d["names"] = get_nih_names(smiles) d["svg"] = svg d["xyz"] = str(xyz) d["tags"] = ["G305 test set"] coll.insert(d) except Exception as ex: print("Error in {}".format(el)) elif r.count() > 1: print("More than 1 {} found. Removing...".format(el)) results = list(r) for r in results[1:]: print(r["_id"]) coll.remove({"_id": r["_id"]})
def outputMolecule(singleMol, dataDir): molecule = Molecule([], []) singlemolpath = os.path.join(dataDir, 'singlemolecule') for siteIndex in singleMol.keys(): molecule.append(str(singleMol[siteIndex].specie), singleMol[siteIndex].coords) xyzObj = XYZ(molecule) if "singleMol.xyz" in os.listdir(singlemolpath): decision = None print('You have one \'singleMol.xyz\' file inside the single molecule path.') print('This previous file will be overwritten.') while decision != 'Y' and decision != 'N': decision = input('Do you want to proceed? Y for yes, N for no.') if decision == 'Y': xyzObj.write_file(os.path.join(singlemolpath, 'singleMol.xyz')) print('The single molecule structure is saved under:', os.path.join(dataDir, 'singlemolecule.singleMol.xyz')) elif decision == 'N': print('The previous file is not changed. ') sys.exit() else: print('Not eligible response!!!\n') else: xyzObj.write_file(os.path.join(singlemolpath, 'singleMol.xyz')) print('The single molecule structure is saved under:', os.path.join(dataDir, 'singlemolecule/singleMol.xyz'))
def test_from_string(self): xyz = XYZ(self.mol) adaptor = BabelMolAdaptor.from_string(str(xyz), "xyz") mol = adaptor.pymatgen_mol self.assertEqual(mol.formula, "H4 C1")
class XYZTest(unittest.TestCase): def setUp(self): coords = [[0.000000, 0.000000, 0.000000], [0.000000, 0.000000, 1.089000], [1.026719, 0.000000, -0.363000], [-0.513360, -0.889165, -0.363000], [-0.513360, 0.889165, -0.363000]] coords2 = [[x + 10.0 for x in atom] for atom in coords] self.mol = Molecule(["C", "H", "H", "H", "H"], coords) self.multi_mols = [ Molecule(["C", "H", "H", "H", "H"], coords) for coords in [coords, coords2] ] self.xyz = XYZ(self.mol) self.multi_xyz = XYZ(self.multi_mols) def test_str(self): ans = """5 H4 C1 C 0.000000 0.000000 0.000000 H 0.000000 0.000000 1.089000 H 1.026719 0.000000 -0.363000 H -0.513360 -0.889165 -0.363000 H -0.513360 0.889165 -0.363000""" self.assertEqual(str(self.xyz), ans) mxyz = XYZ(self.multi_mols, coord_precision=3) mxyz_text = str(mxyz) ans_multi = """5 H4 C1 C 0.000 0.000 0.000 H 0.000 0.000 1.089 H 1.027 0.000 -0.363 H -0.513 -0.889 -0.363 H -0.513 0.889 -0.363 5 H4 C1 C 10.000 10.000 10.000 H 10.000 10.000 11.089 H 11.027 10.000 9.637 H 9.487 9.111 9.637 H 9.487 10.889 9.637""" self.assertEqual(mxyz_text, ans_multi) def test_from_string(self): ans = """5 H4 C1 C 0.000000 0.000000 0.000000 H 0.000000 0.000000 1.089000 H 1.026719 0.000000 -0.363000 H -0.513360 -0.889165 -0.363000 H -0.513360 0.889165 -0.363000""" xyz = XYZ.from_string(ans) mol = xyz.molecule sp = ["C", "H", "H", "H", "H"] for i, site in enumerate(mol): self.assertEqual(site.species_string, sp[i]) self.assertEqual(len(site.coords), 3) if i == 0: self.assertTrue(all([c == 0 for c in site.coords])) mol_str = """2 Random C 2.39132145462 -0.700993488928 -7.22293142224e-06 C 1.16730636786 -1.38166622735 -2.77112970359e-06 """ xyz = XYZ.from_string(mol_str) mol = xyz.molecule self.assertTrue(abs(mol[0].z) < 1e-5) self.assertTrue(abs(mol[1].z) < 1e-5) mol_str = """2 Random, Alternate Scientific Notation C 2.39132145462 -0.700993488928 -7.222*^-06 C 1.16730636786 -1.38166622735 -2.771*^-06 """ xyz = XYZ.from_string(mol_str) mol = xyz.molecule self.assertEqual(mol[0].z, -7.222e-06) self.assertEqual(mol[1].z, -2.771e-06) mol_str = """3 Random C 0.000000000000E+00 2.232615992397E+01 0.000000000000E+00 C -2.383225420567E-31 1.116307996198E+01 1.933502166311E+01 C -4.440892098501D-01 -1.116307996198d+01 1.933502166311E+01 """ xyz = XYZ.from_string(mol_str) mol = xyz.molecule self.assertAlmostEqual(mol[0].x, 0) self.assertAlmostEqual(mol[1].y, 11.16307996198) self.assertAlmostEqual(mol[2].x, -0.4440892098501) self.assertAlmostEqual(mol[2].y, -11.16307996198) # self.assertTrue(abs(mol[1].z) < 1e-5) mol_str = """ 5 C32-C2-1 C 2.70450 1.16090 -0.14630 1 3 23 2 C 1.61930 1.72490 -0.79330 2 1 5 26 C 2.34210 1.02670 1.14620 3 1 8 6 C -0.68690 2.16170 -0.13790 4 5 18 7 C 0.67160 2.15830 0.14350 5 4 2 6 """ xyz = XYZ.from_string(mol_str) mol = xyz.molecule self.assertAlmostEqual(mol[0].x, 2.70450) self.assertAlmostEqual(mol[1].y, 1.72490) self.assertAlmostEqual(mol[2].x, 2.34210) self.assertAlmostEqual(mol[3].z, -0.13790) def test_from_file(self): filepath = os.path.join(test_dir, 'multiple_frame_xyz.xyz') mxyz = XYZ.from_file(filepath) self.assertEqual(len(mxyz.all_molecules), 302) self.assertEqual( list(mxyz.all_molecules[0].cart_coords[0]), [0.20303525080000001, 2.8569761204000002, 0.44737723190000001]) self.assertEqual( list(mxyz.all_molecules[-1].cart_coords[-1]), [5.5355550720000002, 0.0282305931, -0.30993102189999999]) self.assertEqual( list(mxyz.molecule.cart_coords[-1]), [5.5355550720000002, 0.0282305931, -0.30993102189999999]) def test_init_from_structure(self): filepath = os.path.join(test_dir, 'POSCAR') poscar = Poscar.from_file(filepath) struct = poscar.structure xyz = XYZ(struct) ans = """24 Fe4 P4 O16 Fe 2.277347 4.550379 2.260125 Fe 2.928536 1.516793 4.639870 Fe 7.483231 4.550379 0.119620 Fe 8.134420 1.516793 2.499364 P 0.985089 1.516793 1.990624 P 4.220794 4.550379 4.370369 P 6.190973 1.516793 0.389120 P 9.426677 4.550379 2.768865 O 0.451582 4.550379 3.365614 O 1.006219 1.516793 3.528306 O 1.725331 0.279529 1.358282 O 1.725331 2.754057 1.358282 O 3.480552 3.313115 3.738027 O 3.480552 5.787643 3.738027 O 4.199665 4.550379 1.148562 O 4.754301 1.516793 0.985870 O 5.657466 4.550379 3.773620 O 6.212102 1.516793 3.610928 O 6.931215 0.279529 1.021463 O 6.931215 2.754057 1.021463 O 8.686436 3.313115 3.401208 O 8.686436 5.787643 3.401208 O 9.405548 4.550379 1.231183 O 9.960184 1.516793 1.393875""" self.assertEqual(str(xyz), ans) def test_as_dataframe(self): coords = [[0.000000, 0.000000, 0.000000], [0.000000, 0.000000, 1.089000], [1.026719, 0.000000, -0.363000], [-0.513360, -0.889165, -0.363000], [-0.513360, 0.889165, -0.363000]] test_df = pd.DataFrame(coords, columns=['x', 'y', 'z']) test_df.insert(0, "atom", ["C", "H", "H", "H", "H"]) test_df.index += 1 coords2 = [[0.000000, 0.000000, 0.000000], [0.000000, 0.000000, 1.089000], [1.026719, 0.000000, 0.363000], [0.513360, 0.889165, 0.363000], [0.513360, 0.889165, 0.363000]] test_df2 = pd.DataFrame(coords2, columns=['x', 'y', 'z']) test_df2.insert(0, "atom", ["C", "H", "H", "H", "H"]) test_df2.index += 1 mol_df = self.xyz.as_dataframe() # body tests pd.testing.assert_frame_equal(mol_df, test_df) # index tests np.testing.assert_array_equal(mol_df.columns, test_df.columns) np.testing.assert_array_equal(mol_df.index, test_df.index)
#%% from pymatgen.core.sites import Site from pymatgen.io.xyz import XYZ import os os.chdir('/home/jinho93/oxides/wurtzite/zno/cp2k/1.aimd/3.16A/30/3.fix/output') xyz = XYZ.from_file('zno-pos-1.xyz') coords = [] for m in xyz.all_molecules: s: Site tmp = [] for s in m.sites: if s.species_string == 'Zn': tmp.append(s.z) coords.append(tmp) # %% import numpy as np coords = np.array(coords) print(coords.shape) #%% import matplotlib.pyplot as plt for i in range(coords.shape[1]): plt.plot(coords[:, i], color='black') plt.ylim((18, 30)) plt.xlim((0, 200))
def _parse_crest_output(self): """ Parse output file and directory to extract all command line inputs and output files. Sets the attributes: cmd_options: Dict of type {flag: value} sorted_structrues_energies: n x m x 2 list, for n conformers, m rotamers per conformer, and tuple of [Molecule, energy] properly_terminated: True or False if run properly terminated """ output_filepath = os.path.join(self.path, self.filename) # Get CREST command crest_cmd = None with open(output_filepath, "r") as xtbout_file: for line in xtbout_file: if "> crest" in line: crest_cmd = line.strip()[8:] break split_cmd = crest_cmd.split(" ") # Get input file if present try: self.coord_file = os.path.join(self.path, split_cmd[0]) self.input_structure = Molecule.from_file(filename=self.coord_file) except FileNotFoundError: print("Input file {} not found".format(split_cmd[0])) # Get CREST input flags for i, entry in enumerate(split_cmd): value = None if entry: if "-" in entry: option = entry[1:] if i + 1 < len(split_cmd): if "-" not in split_cmd[i + 1]: value = split_cmd[i + 1] self.cmd_options[option] = value # Get input charge for decorating parsed molecules chg = 0 if "chrg" in self.cmd_options.keys(): str_chg = self.cmd_options["chrg"] if "-" in str_chg: chg = int(str_chg) else: chg = int(str_chg[-1]) elif "c" in self.cmd_options.keys(): str_chg = self.cmd_options["c"] if "-" in str_chg: chg = int(str_chg) else: chg = int(str_chg[-1]) # Check for proper termination with open(output_filepath, "rb+") as xtbout_file: xtbout_file.seek(-2, 2) while xtbout_file.read(1) != b"\n": xtbout_file.seek(-2, 1) end_bstring = xtbout_file.read() if b"CREST terminated normally." in end_bstring: self.properly_terminated = True if self.properly_terminated: # Parse for number of conformers and rotamers conformer_pattern = re.compile( r"\s+\d+\s+(?P<Erel>\d*\.\d*)\s+(?P<Etot>-*\d+\.\d+)\s+" r"(?P<weight>-*\d+\.\d+)\s+" r"(?P<conformer>-*\d+\.\d+)\s+(?P<set>\d+)\s+(?P<degen>\d+)\s+" r"(?P<origin>\w+)\n") rotamer_pattern = re.compile( r"\s+\d+\s+(?P<Erel>\d*\.\d*)\s+(?P<Etot>-*\d+\.\d+)\s+" r"(?P<weight>-*\d+\.\d+)\s+" r"(?P<origin>\w+)\n") conformer_degeneracies = [] energies = [] with open(output_filepath, "r") as xtbout_file: for line in xtbout_file: conformer_match = conformer_pattern.match(line) rotamer_match = rotamer_pattern.match(line) if conformer_match: conformer_degeneracies.append( int(conformer_match["degen"])) energies.append(conformer_match["Etot"]) elif rotamer_match: energies.append(rotamer_match["Etot"]) # Get final rotamers file and read in all molecules, # sorted by conformer type and energy if "crest_rotamers.xyz" in os.listdir(self.path): final_rotamer_filename = "crest_rotamers.xyz" else: n_rot_files = [] for f in os.listdir(self.path): if "crest_rotamers" in f: n_rot_file = int(os.path.splitext(f)[0].split("_")[2]) n_rot_files.append(n_rot_file) if len(n_rot_files) > 0: final_rotamer_filename = "crest_rotamers_{}.xyz".format( max(n_rot_files)) try: rotamers_path = os.path.join(self.path, final_rotamer_filename) rotamer_structures = XYZ.from_file(rotamers_path).all_molecules for r in rotamer_structures: r.set_charge_and_spin(charge=chg) start = 0 for n, d in enumerate(conformer_degeneracies): self.sorted_structures_energies.append([]) i = 0 for i in range(start, start + d): self.sorted_structures_energies[n].append( [rotamer_structures[i], energies[i]]) start = i + 1 except FileNotFoundError: print("{} not found, no rotamer list processed".format( final_rotamer_filename)) # Get lowest energy conformer from 'crest_best.xyz' crestbest_path = os.path.join(self.path, "crest_best.xyz") try: lowest_e_struct = Molecule.from_file(crestbest_path) lowest_e_struct.set_charge_and_spin(charge=chg) self.lowest_energy_structure = lowest_e_struct except FileNotFoundError: print("{} not found".format(crestbest_path)) else: crestbest_path = os.path.join(self.path, "crest_best.xyz") try: lowest_e_struct = Molecule.from_file(crestbest_path) lowest_e_struct.set_charge_and_spin(charge=chg) self.lowest_energy_structure = lowest_e_struct except FileNotFoundError: print("{} not found".format(crestbest_path))
def fix_scf(self): comments = self.fix_step.params.get("comment", "") scf_pattern = re.compile(r"<SCF Fix Strategy>(.*)</SCF Fix " r"Strategy>", flags=re.DOTALL) old_strategy_text = re.findall(scf_pattern, comments) if len(old_strategy_text) > 0: old_strategy_text = old_strategy_text[0] od = self.outdata[self.error_step_id] if "Negative Eigen" in self.errors: if "thresh" not in self.fix_step.params["rem"]: self.fix_step.set_integral_threshold(thresh=12) return "use tight integral threshold" elif int(self.fix_step.params["rem"]["thresh"]) < 14: self.fix_step.set_integral_threshold(thresh=14) return "use even tighter integral threshold" if len(od["scf_iteration_energies"]) == 0 \ or len(od["scf_iteration_energies"][-1]) <= 10: if 'Exit Code 134' in self.errors: # immature termination of SCF return self.fix_error_code_134() else: return None if od["jobtype"] in ["opt", "ts", "aimd"] \ and len(od["molecules"]) >= 2: strategy = "reset" elif len(old_strategy_text) > 0: strategy = json.loads(old_strategy_text) strategy["current_method_id"] += 1 else: strategy = dict() scf_iters = od["scf_iteration_energies"][-1] if scf_iters[-1][1] >= self.rca_gdm_thresh: strategy["methods"] = ["increase_iter", "rca_diis", "gwh", "gdm", "rca", "core+rca", "fon"] strategy["current_method_id"] = 0 else: strategy["methods"] = ["increase_iter", "diis_gdm", "gwh", "rca", "gdm", "core+gdm", "fon"] strategy["current_method_id"] = 0 strategy["version"] = 2.0 # noinspection PyTypeChecker if strategy == "reset": self.fix_step.set_scf_algorithm_and_iterations( algorithm="diis", iterations=self.scf_max_cycles) if self.error_step_id > 0: self.set_scf_initial_guess("read") else: self.set_scf_initial_guess("sad") if od["jobtype"] in ["opt", "ts"]: self.set_last_input_geom(od["molecules"][-1]) else: assert od["jobtype"] == "aimd" from pymatgen.io.qchem import QcNucVeloc from pymatgen.io.xyz import XYZ scr_dir = od["scratch_dir"] qcnv_filepath = os.path.join(scr_dir, "AIMD", "NucVeloc") qc_md_view_filepath = os.path.join(scr_dir, "AIMD", "View.xyz") qcnv = QcNucVeloc(qcnv_filepath) qc_md_view = XYZ.from_file(qc_md_view_filepath) assert len(qcnv.velocities) == len(qc_md_view.all_molecules) aimd_steps = self.fix_step.params["rem"]["aimd_steps"] elapsed_steps = len(qc_md_view.all_molecules) remaining_steps = aimd_steps - elapsed_steps + 1 self.fix_step.params["rem"]["aimd_steps"] = remaining_steps self.set_last_input_geom(qc_md_view.molecule) self.fix_step.set_velocities(qcnv.velocities[-1]) self.fix_step.params["rem"].pop("aimd_init_veloc", None) traj_num = max([0] + [int(f.split(".")[1]) for f in glob.glob("traj_View.*.xyz")]) dest_view_filename = "traj_View.{}.xyz".format(traj_num + 1) dest_nv_filename = "traj_NucVeloc.{}.txt".format(traj_num + 1) logging.info("Backing up trajectory files to {} and {}." .format(dest_view_filename, dest_nv_filename)) shutil.copy(qc_md_view_filepath, dest_view_filename) shutil.copy(qcnv_filepath, dest_nv_filename) if len(old_strategy_text) > 0: comments = scf_pattern.sub("", comments) self.fix_step.params["comment"] = comments if len(comments.strip()) == 0: self.fix_step.params.pop("comment") return "reset" elif strategy["current_method_id"] > len(strategy["methods"])-1: return None else: # noinspection PyTypeChecker method = strategy["methods"][strategy["current_method_id"]] if method == "increase_iter": self.fix_step.set_scf_algorithm_and_iterations( algorithm="diis", iterations=self.scf_max_cycles) self.set_scf_initial_guess("sad") elif method == "rca_diis": self.fix_step.set_scf_algorithm_and_iterations( algorithm="rca_diis", iterations=self.scf_max_cycles) self.set_scf_initial_guess("sad") elif method == "gwh": self.fix_step.set_scf_algorithm_and_iterations( algorithm="diis", iterations=self.scf_max_cycles) self.set_scf_initial_guess("gwh") elif method == "gdm": self.fix_step.set_scf_algorithm_and_iterations( algorithm="gdm", iterations=self.scf_max_cycles) self.set_scf_initial_guess("sad") elif method == "rca": self.fix_step.set_scf_algorithm_and_iterations( algorithm="rca", iterations=self.scf_max_cycles) self.set_scf_initial_guess("sad") elif method == "core+rca": self.fix_step.set_scf_algorithm_and_iterations( algorithm="rca", iterations=self.scf_max_cycles) self.set_scf_initial_guess("core") elif method == "diis_gdm": self.fix_step.set_scf_algorithm_and_iterations( algorithm="diis_gdm", iterations=self.scf_max_cycles) self.fix_step.set_scf_initial_guess("sad") elif method == "core+gdm": self.fix_step.set_scf_algorithm_and_iterations( algorithm="gdm", iterations=self.scf_max_cycles) self.set_scf_initial_guess("core") elif method == "fon": self.fix_step.set_scf_algorithm_and_iterations( algorithm="diis", iterations=self.scf_max_cycles) self.set_scf_initial_guess("sad") natoms = len(od["molecules"][-1]) self.fix_step.params["rem"]["occupations"] = 2 self.fix_step.params["rem"]["fon_norb"] = int(natoms * 0.618) self.fix_step.params["rem"]["fon_t_start"] = 300 self.fix_step.params["rem"]["fon_t_end"] = 300 self.fix_step.params["rem"]["fon_e_thresh"] = 6 self.fix_step.set_integral_threshold(14) self.fix_step.set_scf_convergence_threshold(7) else: raise ValueError("fix method " + method + " is not supported") strategy_text = "<SCF Fix Strategy>" strategy_text += json.dumps(strategy, indent=4, sort_keys=True) strategy_text += "</SCF Fix Strategy>" if len(old_strategy_text) > 0: comments = scf_pattern.sub(strategy_text, comments) else: comments += "\n" + strategy_text self.fix_step.params["comment"] = comments return method
#%% from pymatgen import Element from pymatgen.io.xyz import XYZ import matplotlib.pyplot as plt import os os.chdir('/home/jinho93/new/oxides/perobskite/lanthanum-aluminate/periodic_step/cp2k/015_thick4/2000k') xyz = XYZ.from_file('lao-pos-1.xyz') output = [] for structure in xyz.all_molecules: p = 0 la = [] al = [] o = [] for i in structure.sites: if i.specie == Element.O: p -= 2 * i.z elif i.specie == Element.Al: p += 3 * i.z elif i.specie is Element.La: p += 3 * i.z output.append(p) plt.plot(output)
def set_struct_fromxyz(self, xyzstr): self.xyz = xyzstr self.struct = XYZ.from_string(xyzstr).molecule.as_dict() self.boundary = "0d"