def generate_Si_cluster(): from pymatgen.io.xyz import XYZ coords = [[0, 0, 0], [0.75, 0.5, 0.75]] lattice = Lattice.from_parameters(a=3.84, b=3.84, c=3.84, alpha=120, beta=90, gamma=60) struct = Structure(lattice, ['Si', 'Si'], coords) struct.make_supercell([2, 2, 2]) # Creating molecule for testing mol = Molecule.from_sites(struct) XYZ(mol).write_file(os.path.join(test_dir, "Si_cluster.xyz")) # Rorate the whole molecule mol_rotated = mol.copy() rotate(mol_rotated, seed=42) XYZ(mol_rotated).write_file(os.path.join(test_dir, "Si_cluster_rotated.xyz")) # Perturbing the atom positions mol_perturbed = mol.copy() perturb(mol_perturbed, 0.3, seed=42) XYZ(mol_perturbed).write_file(os.path.join(test_dir, "Si_cluster_perturbed.xyz")) # Permuting the order of the atoms mol_permuted = mol.copy() permute(mol_permuted, seed=42) XYZ(mol_permuted).write_file(os.path.join(test_dir, "Si_cluster_permuted.xyz")) # All-in-one mol2 = mol.copy() rotate(mol2, seed=42) perturb(mol2, 0.3, seed=42) permute(mol2, seed=42) XYZ(mol2).write_file(os.path.join(test_dir, "Si_cluster_2.xyz"))
def setUp(self): coords = [[0.000000, 0.000000, 0.000000], [0.000000, 0.000000, 1.089000], [1.026719, 0.000000, -0.363000], [-0.513360, -0.889165, -0.363000], [-0.513360, 0.889165, -0.363000]] coords2 = [[x + 10.0 for x in atom] for atom in coords] self.mol = Molecule(["C", "H", "H", "H", "H"], coords) self.multi_mols = [Molecule(["C", "H", "H", "H", "H"], coords) for coords in [coords, coords2]] self.xyz = XYZ(self.mol) self.multi_xyz = XYZ(self.multi_mols)
def write_mol(mol, filename): """ Write a molecule to a file based on file extension. For example, anything ending in a "xyz" is assumed to be a XYZ file. Supported formats include xyz, Gaussian input (gjf|g03|g09|com|inp), and pymatgen's JSON serialized molecules. Args: mol (Molecule/IMolecule): Molecule to write filename (str): A filename to write to. """ fname = os.path.basename(filename) if fnmatch(fname.lower(), "*.xyz*"): return XYZ(mol).write_file(filename) elif any([ fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"] ]): return GaussianInput(mol).write_file(filename) elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"): with zopen(filename, "wt") as f: return f.write(str2unicode(json.dumps(mol, cls=MontyEncoder))) else: m = re.search("\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", filename.lower()) if m: return BabelMolAdaptor(mol).write_file(filename, m.group(1)) raise ValueError("Unrecognized file extension!")
def test_init_from_structure(self): filepath = os.path.join(test_dir, 'POSCAR') poscar = Poscar.from_file(filepath) struct = poscar.structure xyz = XYZ(struct) ans = """24 Fe4 P4 O16 Fe 2.277347 4.550379 2.260125 Fe 2.928536 1.516793 4.639870 Fe 7.483231 4.550379 0.119620 Fe 8.134420 1.516793 2.499364 P 0.985089 1.516793 1.990624 P 4.220794 4.550379 4.370369 P 6.190973 1.516793 0.389120 P 9.426677 4.550379 2.768865 O 0.451582 4.550379 3.365614 O 1.006219 1.516793 3.528306 O 1.725331 0.279529 1.358282 O 1.725331 2.754057 1.358282 O 3.480552 3.313115 3.738027 O 3.480552 5.787643 3.738027 O 4.199665 4.550379 1.148562 O 4.754301 1.516793 0.985870 O 5.657466 4.550379 3.773620 O 6.212102 1.516793 3.610928 O 6.931215 0.279529 1.021463 O 6.931215 2.754057 1.021463 O 8.686436 3.313115 3.401208 O 8.686436 5.787643 3.401208 O 9.405548 4.550379 1.231183 O 9.960184 1.516793 1.393875""" self.assertEqual(str(xyz), ans)
def test_str(self): ans = """5 H4 C1 C 0.000000 0.000000 0.000000 H 0.000000 0.000000 1.089000 H 1.026719 0.000000 -0.363000 H -0.513360 -0.889165 -0.363000 H -0.513360 0.889165 -0.363000""" self.assertEqual(str(self.xyz), ans) mxyz = XYZ(self.multi_mols, coord_precision=3) mxyz_text = str(mxyz) ans_multi = """5 H4 C1 C 0.000 0.000 0.000 H 0.000 0.000 1.089 H 1.027 0.000 -0.363 H -0.513 -0.889 -0.363 H -0.513 0.889 -0.363 5 H4 C1 C 10.000 10.000 10.000 H 10.000 10.000 11.089 H 11.027 10.000 9.637 H 9.487 9.111 9.637 H 9.487 10.889 9.637""" self.assertEqual(mxyz_text, ans_multi)
def _write_input(self, input_dir="."): """ Write the packmol input file to the input directory. Args: input_dir (string): path to the input directory """ with open(os.path.join(input_dir, self.input_file), 'wt', encoding="utf-8") as inp: for k, v in self.control_params.items(): inp.write('{} {}\n'.format(k, self._format_param_val(v))) # write the structures of the constituent molecules to file and set # the molecule id and the corresponding filename in the packmol # input file. for idx, mol in enumerate(self.mols): filename = os.path.join( input_dir, '{}.{}'.format( idx, self.control_params["filetype"])).encode("ascii") # pdb if self.control_params["filetype"] == "pdb": self.write_pdb(mol, filename, num=idx + 1) # all other filetypes else: XYZ(mol).write_file(filename=filename) inp.write("\n") inp.write("structure {}.{}\n".format( os.path.join(input_dir, str(idx)), self.control_params["filetype"])) for k, v in self.param_list[idx].items(): inp.write(' {} {}\n'.format(k, self._format_param_val(v))) inp.write('end structure\n')
def write_traj(structures): molecules = [] for struc in structures: molecules.append( Molecule(struc.species, coords=[s.coords for s in struc.sites])) XYZ(mol=molecules).write_file('traj.xyz')
def outputMolecule(singleMol, dataDir): molecule = Molecule([], []) for site in singleMol: molecule.append(str(site.specie), site.coords) xyzObj = XYZ(molecule) os.chdir(dataDir) os.system('mkdir singleMolecule') xyzObj.write_file(dataDir + '/singleMolecule/singleMol.xyz')
def setUp(self): coords = [[0.000000, 0.000000, 0.000000], [0.000000, 0.000000, 1.089000], [1.026719, 0.000000, -0.363000], [-0.513360, -0.889165, -0.363000], [-0.513360, 0.889165, -0.363000]] self.mol = Molecule(["C", "H", "H", "H", "H"], coords) self.xyz = XYZ(self.mol)
def largercube(cube_length): """ read POSCAR in the current directory and output another POSCAR with larger cube size """ dir_path = os.path.dirname(os.path.realpath(__file__)) poscar = Poscar.from_file(os.path.join(dir_path, 'POSCAR')) structure = poscar.structure XYZ(structure).write_file(os.path.join(dir_path, 'POSCAR.xyz')) molecule = XYZ.from_file(os.path.join(dir_path, 'POSCAR.xyz')).molecule a = b = c = cube_length structure = molecule.get_boxed_structure(a, b, c) Poscar(structure).write_file(os.path.join(dir_path, 'POSCAR_larger.vasp'))
def insert_g3testset(coll): for f in glob.glob("g*.txt"): print("Parsing " + f) for (m, charge, spin) in parse_file(f): try: clean_sites = [] for site in m: if Element.is_valid_symbol(site.specie.symbol): clean_sites.append(site) clean_mol = Molecule.from_sites(clean_sites, charge=charge, spin_multiplicity=spin) xyz = XYZ(clean_mol) bb = BabelMolAdaptor.from_string(str(xyz), "xyz") pbmol = pb.Molecule(bb.openbabel_mol) smiles = pbmol.write("smi").split()[0] can = pbmol.write("can").split()[0] inchi = pbmol.write("inchi") svg = pbmol.write("svg") d = {"molecule": clean_mol.as_dict()} comp = clean_mol.composition d["pretty_formula"] = comp.reduced_formula d["formula"] = comp.formula d["composition"] = comp.as_dict() d["elements"] = list(comp.as_dict().keys()) d["nelements"] = len(comp) d["charge"] = charge d["spin_multiplicity"] = spin d["smiles"] = smiles d["can"] = can d["inchi"] = inchi # d["names"] = get_nih_names(smiles) d["svg"] = svg d["xyz"] = str(xyz) d["tags"] = ["G305 test set"] coll.update( { "inchi": inchi, "charge": charge, "spin_multiplicity": spin }, {"$set": d}, upsert=True) except Exception as ex: print("Error in {}".format(f)) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout) print("{} parsed!".format(f))
def insert_elements(coll): print("adding missing elements.") for z in range(1, 19): el = Element.from_Z(z) r = coll.find(filter={"formula": "{}1".format(el.symbol)}) if r.count() == 0: try: clean_mol = Molecule([el], [[0, 0, 0]]) xyz = XYZ(clean_mol) bb = BabelMolAdaptor.from_string(str(xyz), "xyz") pbmol = pb.Molecule(bb.openbabel_mol) smiles = pbmol.write("smi").split()[0] can = pbmol.write("can").split()[0] inchi = pbmol.write("inchi") svg = pbmol.write("svg") d = {"molecule": clean_mol.as_dict()} comp = clean_mol.composition d["pretty_formula"] = comp.reduced_formula d["formula"] = comp.formula d["composition"] = comp.as_dict() d["elements"] = list(comp.as_dict().keys()) d["nelements"] = len(comp) d["charge"] = 0 d["spin_multiplicity"] = clean_mol.spin_multiplicity d["smiles"] = smiles d["can"] = can d["inchi"] = inchi # d["names"] = get_nih_names(smiles) d["svg"] = svg d["xyz"] = str(xyz) d["tags"] = ["G305 test set"] coll.insert(d) except Exception as ex: print("Error in {}".format(el)) elif r.count() > 1: print("More than 1 {} found. Removing...".format(el)) results = list(r) for r in results[1:]: print(r["_id"]) coll.remove({"_id": r["_id"]})
def outputMolecule(singleMol, dataDir): molecule = Molecule([], []) singlemolpath = os.path.join(dataDir, 'singlemolecule') for siteIndex in singleMol.keys(): molecule.append(str(singleMol[siteIndex].specie), singleMol[siteIndex].coords) xyzObj = XYZ(molecule) if "singleMol.xyz" in os.listdir(singlemolpath): decision = None print('You have one \'singleMol.xyz\' file inside the single molecule path.') print('This previous file will be overwritten.') while decision != 'Y' and decision != 'N': decision = input('Do you want to proceed? Y for yes, N for no.') if decision == 'Y': xyzObj.write_file(os.path.join(singlemolpath, 'singleMol.xyz')) print('The single molecule structure is saved under:', os.path.join(dataDir, 'singlemolecule.singleMol.xyz')) elif decision == 'N': print('The previous file is not changed. ') sys.exit() else: print('Not eligible response!!!\n') else: xyzObj.write_file(os.path.join(singlemolpath, 'singleMol.xyz')) print('The single molecule structure is saved under:', os.path.join(dataDir, 'singlemolecule/singleMol.xyz'))
def test_from_string(self): xyz = XYZ(self.mol) adaptor = BabelMolAdaptor.from_string(str(xyz), "xyz") mol = adaptor.pymatgen_mol self.assertEqual(mol.formula, "H4 C1")
def get_task_doc(cls, path, fw_spec=None): """ Get the entire task doc for a path, including any post-processing. """ logger.info("Getting task doc for file:{}".format(path)) qcout = QcOutput(zpath(path)) data = qcout.data initial_mol = data[0]["molecules"][0] mol = data[0]["molecules"][-1] if data[0]["jobtype"] == "freq": mol = Molecule.from_dict(initial_mol.as_dict()) bb = BabelMolAdaptor(mol) pbmol = bb.pybel_mol xyz = XYZ(mol) smiles = pbmol.write(str("smi")).split()[0] can = pbmol.write(str("can")).split()[0] inchi_final = pbmol.write(str("inchi")).strip() svg = cls.modify_svg(cls.xyz2svg(xyz)) comp = mol.composition charge = mol.charge spin_mult = mol.spin_multiplicity data_dict = {} pga = PointGroupAnalyzer(mol) sch_symbol = pga.sch_symbol stationary_type = None has_structure_changing_job = False for d in data: if d["jobtype"] == "opt": data_dict["geom_opt"] = d has_structure_changing_job = True elif d["jobtype"] == "freq": data_dict["freq"] = d has_structure_changing_job = True if not d["has_error"]: if d['frequencies'][0]["frequency"] < -0.00: # it is stupied that -0.00 is less than 0.00 stationary_type = "non-minimum" else: stationary_type = "minimum" else: stationary_type = "unknown" elif d["jobtype"] == "sp": suffix = "" if d["solvent_method"] == "NA" \ else "_" + d["solvent_method"] data_dict["scf" + suffix] = d elif d["jobtype"] == "aimd": data_dict["amid"] = d has_structure_changing_job = True data = data_dict d = { "path": os.path.abspath(path), "folder": os.path.basename(os.path.dirname(os.path.abspath(path))), "calculations": data, "molecule_initial": initial_mol.as_dict(), "molecule_final": mol.as_dict(), "pointgroup": sch_symbol, "pretty_formula": comp.reduced_formula, "reduced_cell_formula_abc": comp.alphabetical_formula, "formula": comp.formula, "charge": charge, "spin_multiplicity": spin_mult, "composition": comp.as_dict(), "elements": list(comp.as_dict().keys()), "nelements": len(comp), "smiles": smiles, "can": can, "inchi_final": inchi_final, "svg": svg, "xyz": str(xyz), "names": get_nih_names(smiles) } if stationary_type: d['stationary_type'] = stationary_type if fw_spec: inchi_initial = fw_spec['inchi'] if inchi_initial != d['inchi_final']: d['inchi_changed'] = True else: d['inchi_changed'] = False if has_structure_changing_job: d['structure_changed'] = cls._check_structure_change( initial_mol, mol, path) else: d['structure_changed'] = False if d['structure_changed']: d['state'] = 'rejected' d['reject_reason'] = 'structural change' if "state" not in d: for v in data_dict.values(): if v['has_error']: d['state'] = "error" errors = d.get("errors", []) errors += v["errors"] d["errors"] = errors if "state" not in d: d["state"] = "successful" return jsanitize(d)
def saveMol(mol, write_path): ''' This function save a molecule as an xyz to path ''' XYZ(mol).write_file(write_path)
def test_expand_structure(self): molecule = ColorVonoroi.expand_structure(self.struc) xyz = XYZ(molecule) xyz.write_file( "/Users/yao/Google Drive/mmtools/data/expaned_structure.xyz")
[-0.513360, 0.889165, -0.363000]] mol = Molecule(["C", "H", "H", "H", "H"], coords) print("mol:", mol) print("\n") print("mol[0]:", mol[0]) print("mol[1]:", mol[1]) print("\n") #断连index为0和1两个原子之间的化学键,有上面的输出可以看到这两个位c和H,但是很奇怪为何会出现如下的结果?可能与空间结构有关系? for frag in mol.break_bond(0, 1): print("frag:", frag) print("\n") print("neighbors of mol[0]:", mol.get_neighbors(mol[0], 3)) #得到分子中的共价键 print("covalent_bonds of:", mol.get_covalent_bonds()) print("\n") #Creates a Structure from a Molecule by putting the Molecule in #the center of a orthorhombic box. Useful for creating Structure #for calculating molecules using periodic codes. #通过把分子放在正交晶(orthorhombic)的盒子box里,从分子Molecule生存结构Structure。 #创建结构Structure,用于通过周期代码计算分子。 structure = mol.get_boxed_structure(10, 10, 10) print("structure:", structure) #xyz format是用来表示分子几何结构的文件,需要定分子中原子的坐标。 from pymatgen.io.xyz import XYZ xyz = XYZ(mol) xyz.write_file("methane.xyz")
def run_task(self, fw_spec): get_rdf = self.get('get_rdf') or True get_diffusion = self.get('get_diffusion') or True get_viscosity = self.get('get_viscosity') or True get_vdos = self.get('get_vdos') or True get_run_data = self.get('get_run_data') or True time_step = self.get('time_step') or 2 checkpoint_dirs = fw_spec.get('checkpoint_dirs', False) calc_dir = get_calc_loc(True, fw_spec["calc_locs"])["path"] calc_loc = os.path.join(calc_dir, 'XDATCAR.gz') ionic_step_skip = self.get('ionic_step_skip') or 1 ionic_step_offset = self.get('ionic_step_offset') or 0 analysis_spec = self.get('analysis_spec') or {} if checkpoint_dirs: logger.info("LOGGER: Assimilating checkpoint structures") ionic_steps = [] structures = [] for d in checkpoint_dirs: ionic_steps.extend( Vasprun(os.path.join(d, "vasprun.xml.gz")).ionic_steps) structures.extend( Vasprun(os.path.join(d, 'vasprun.xml.gz'), ionic_step_skip=ionic_step_skip, ionic_step_offset=ionic_step_offset).structures) else: structures = Xdatcar(calc_loc).structures #write a trajectory file for Dospt molecules = [] for struc in structures: molecules.append( Molecule(species=struc.species, coords=[s.coords for s in struc.sites])) XYZ(mol=molecules).write_file('traj.xyz') db_dict = {} db_dict.update({'density': float(structures[0].density)}) db_dict.update(structures[0].composition.to_data_dict) db_dict.update({'checkpoint_dirs': checkpoint_dirs}) if get_rdf: logger.info("LOGGER: Calculating radial distribution functions...") rdf = RadialDistributionFunction(structures=structures) rdf_dat = rdf.get_radial_distribution_functions(nproc=4) db_dict.update({'rdf': rdf.get_rdf_db_dict()}) del rdf del rdf_dat if get_vdos: logger.info("LOGGER: Calculating vibrational density of states...") vdos = VDOS(structures) vdos_dat = vdos.calc_vdos_spectrum(time_step=time_step * ionic_step_skip) vdos_diff = vdos.calc_diffusion_coefficient(time_step=time_step * ionic_step_skip) db_dict.update({'vdos': vdos_dat}) del vdos del vdos_dat if get_diffusion: logger.info("LOGGER: Calculating the diffusion coefficients...") diffusion = Diffusion(structures, t_step=time_step, l_lim=50, skip_first=250, block_l=1000, ci=0.95) D = {'msd': {}, 'vdos': {}} for s in structures[0].types_of_specie: D['msd'][s.symbol] = diffusion.getD(s.symbol) if vdos_diff: D['vdos'] = vdos_diff db_dict.update({'diffusion': D}) del D if get_viscosity: logger.info("LOGGER: Calculating the viscosity...") viscosities = [] if checkpoint_dirs: for dir in checkpoint_dirs: visc = Viscosity(dir).calc_viscosity() viscosities.append(visc['viscosity']) viscosity_dat = { 'viscosity': np.mean(viscosities), 'StdDev': np.std(viscosities) } db_dict.update({'viscosity': viscosity_dat}) del viscosity_dat if get_run_data: if checkpoint_dirs: logger.info("LOGGER: Assimilating run stats...") data = MD_Data() for directory in checkpoint_dirs: data.parse_md_data(directory) md_stats = data.get_md_stats() else: logger.info("LOGGER: Getting run stats...") data = MD_Data() data.parse_md_data(calc_dir) md_stats = data.get_md_stats() db_dict.update({'md_data': md_stats}) if analysis_spec: logger.info("LOGGER: Adding user-specified data...") db_dict.update(analysis_spec) logger.info("LOGGER: Pushing data to database collection...") db_file = env_chk(">>db_file<<", fw_spec) db = VaspCalcDb.from_db_file(db_file, admin=True) db.collection = db.db["md_data"] db.collection.insert_one(db_dict) return FWAction()