def pwout2dict(fname, **kwargs): """ Extract available information from pw.x .out file. Parameters: fname (str/list): filename or list of filenames to scrape as a QuantumEspresso pw.x output. """ flines, fname = get_flines_extension_agnostic(fname, ["out", "in"]) pwout = {} pwout['source'] = [fname] try: # grab file owner username from pwd import getpwuid pwout['user'] = getpwuid(stat(fname).st_uid).pw_name except Exception: pwout['user'] = '******' if 'CollCode' in fname: pwout['icsd'] = fname.split('CollCode')[-1] for ind, line in enumerate(reversed(flines)): ind = len(flines) - 1 - ind if 'cell_parameters' in line.lower() and 'angstrom' in line.lower( ) and 'lattice_cart' not in pwout: pwout['lattice_cart'] = [] for j in range(3): line = flines[ind + j + 1].strip().split() pwout['lattice_cart'].append(list(map(float, line))) pwout['cell_volume'] = cart2volume(pwout['lattice_cart']) elif 'atomic_positions' in line.lower( ) and 'positions_frac' not in pwout: pwout['positions_frac'] = [] pwout['atom_types'] = [] j = 1 while True: if 'End final coordinates' in flines[j + ind]: break else: try: line = flines[j + ind].strip().split() pwout['atom_types'].append(line[0]) pwout['positions_frac'].append( list(map(float, line[1:5]))) j += 1 except Exception: break pwout['num_atoms'] = len(pwout['atom_types']) elif 'final enthalpy' in line.lower() and 'enthalpy' not in pwout: pwout['enthalpy'] = RY_TO_EV * float(line.lower().split()[-2]) elif 'total stress' in line.lower() and 'pressure' not in pwout: pwout['pressure'] = KBAR_TO_GPA * float(line.lower().split()[-1]) elif all(key in pwout for key in ['enthalpy', 'pressure', 'lattice_cart', 'positions_frac']): break # get abc lattice pwout['lattice_abc'] = cart2abc(pwout['lattice_cart']) # calculate stoichiometry pwout['stoichiometry'] = defaultdict(float) for atom in pwout['atom_types']: if atom not in pwout['stoichiometry']: pwout['stoichiometry'][atom] = 0 pwout['stoichiometry'][atom] += 1 gcd_val = 0 for atom in pwout['atom_types']: if gcd_val == 0: gcd_val = pwout['stoichiometry'][atom] else: gcd_val = gcd(pwout['stoichiometry'][atom], gcd_val) # convert stoichiometry to tuple for fryan temp_stoich = [] for key, value in pwout['stoichiometry'].items(): if float(value) / gcd_val % 1 != 0: temp_stoich.append([key, float(value) / gcd_val]) else: temp_stoich.append([key, value / gcd_val]) pwout['stoichiometry'] = temp_stoich atoms_per_fu = 0 for elem in pwout['stoichiometry']: atoms_per_fu += elem[1] pwout['num_fu'] = len(pwout['atom_types']) / atoms_per_fu return pwout, True
def magres2dict(fname, **kwargs): """ Extract available information from .magres file. Assumes units of Angstrom and ppm for relevant quantities. """ magres = defaultdict(list) flines, fname = get_flines_extension_agnostic(fname, "magres") magres['source'] = [fname] # grab file owner username try: from pwd import getpwuid magres['user'] = getpwuid(stat(fname).st_uid).pw_name except Exception: magres['user'] = '******' magres['magres_units'] = dict() for line_no, line in enumerate(flines): line = line.lower().strip() if line in ['<atoms>', '[atoms]']: i = 1 while flines[line_no + i].strip().lower() not in ['</atoms>', '[/atoms]']: split_line = flines[line_no + i].split() if not split_line: i += 1 continue if i > len(flines): raise RuntimeError("Something went wrong in reader loop") if split_line[0] == 'units': magres['magres_units'][split_line[1]] = split_line[2] elif 'lattice' in split_line: lattice = split_line[1:] for j in range(3): magres['lattice_cart'].append([ float(elem) for elem in lattice[j * 3:(j + 1) * 3] ]) magres['lattice_abc'] = cart2abc(magres['lattice_cart']) elif 'atom' in split_line: atom = split_line magres['atom_types'].append(atom[1]) magres['positions_abs'].append( [float(elem) for elem in atom[-3:]]) i += 1 break if "atom_types" in magres: magres['num_atoms'] = len(magres['atom_types']) magres['positions_frac'] = cart2frac(magres['lattice_cart'], magres['positions_abs']) magres['stoichiometry'] = get_stoich(magres['atom_types']) for line_no, line in enumerate(flines): line = line.lower().strip() if line in ['<magres>', '[magres]']: i = 1 while flines[line_no + i].strip().lower() not in ['</magres>', '[/magres]']: split_line = flines[line_no + i].split() if not split_line: i += 1 continue if i > len(flines): raise RuntimeError("Something went wrong in reader loop") if split_line[0] == 'units': magres['magres_units'][split_line[1]] = split_line[2] elif 'sus' in split_line: magres["susceptibility_tensor"] = np.array( [float(val) for val in split_line[1:]]).reshape(3, 3) elif 'ms' in split_line: ms = np.array([float(val) for val in split_line[3:]]).reshape(3, 3) s_iso = np.trace(ms) / 3 # find eigenvalues of symmetric part of shielding and order them to calc anisotropy eta symmetric_shielding = _symmetrise_tensor(ms) s_yy, s_xx, s_zz = _get_haeberlen_eigs(symmetric_shielding) s_aniso = s_zz - (s_xx + s_yy) / 2.0 asymm = (s_yy - s_xx) / (s_zz - s_iso) # convert from reduced anistropy to CSA magres["magnetic_shielding_tensors"].append(ms) magres["chemical_shielding_isos"].append(s_iso) magres["chemical_shift_anisos"].append(s_aniso) magres["chemical_shift_asymmetries"].append(asymm) elif "efg" in split_line: efg = np.array([float(val) for val in split_line[3:]]).reshape(3, 3) species = split_line[1] eigs = _get_haeberlen_eigs(efg) v_zz, eta = eigs[2], (eigs[0] - eigs[1]) / eigs[2] # calculate C_Q in MHz quadrupole_moment = ELECTRIC_QUADRUPOLE_MOMENTS.get( species, 1.0) C_Q = ((ELECTRON_CHARGE * v_zz * quadrupole_moment * EFG_AU_TO_SI * BARN_TO_M2) / (PLANCK_CONSTANT * 1e6)) magres["electric_field_gradient"].append(efg) magres["quadrupolar_couplings"].append(C_Q) magres["quadrupolar_asymmetries"].append(eta) i += 1 for line_no, line in enumerate(flines): line = line.lower().strip() if line in ['<calculation>', '[calculation]']: i = 1 while flines[line_no + i].strip().lower() not in [ '</calculation>', '[/calculation]' ]: if i > len(flines): raise RuntimeError("Something went wrong in reader loop") # space important as it excludes other calc_code_x variables if 'calc_code ' in flines[line_no + i]: magres['calculator'] = flines[line_no + i].split()[1] if 'calc_code_version' in flines[line_no + i]: magres['calculator_version'] = flines[line_no + i].split()[1] i += 1 return dict(magres), True
def cif2dict(fname, **kwargs): """ Extract available information from .cif file and store as a dictionary. Raw cif data is stored under the `'_cif'` key. Symmetric sites are expanded by the symmetry operations and their occupancies are tracked. Parameters: fname (str/list): filename or list of filenames of .cif file(s) (with or without extension). Returns: (dict/str, bool): if successful, a dictionary containing scraped data and True, if not, then an error string and False. """ flines, fname = get_flines_extension_agnostic(fname, "cif") doc = dict() cif_dict = _cif_parse_raw(flines) doc['_cif'] = cif_dict doc['source'] = [str(Path(fname).resolve())] doc['atom_types'] = [] atom_labels = cif_dict.get("_atom_site_type_symbol", False) if not atom_labels: atom_labels = cif_dict.get("_atom_site_label", False) if not atom_labels: raise RuntimeError(f"Unable to find atom types in cif file {fname}.") for atom in atom_labels: symbol = '' for character in atom: if not character.isalpha(): break else: symbol += character doc['atom_types'].append(symbol) doc['positions_frac'] = [list(map(lambda x: float(x.split('(')[0]), vector)) for vector in zip(cif_dict['_atom_site_fract_x'], cif_dict['_atom_site_fract_y'], cif_dict['_atom_site_fract_z'])] if '_atom_site_occupancy' in cif_dict: doc['site_occupancy'] = [float(x.split('(')[0]) for x in cif_dict['_atom_site_occupancy']] else: doc['site_occupancy'] = [1.0 for _ in doc['positions_frac']] if '_atom_site_symmetry_multiplicity' in cif_dict: doc['site_multiplicity'] = [float(x.split('(')[0]) for x in cif_dict['_atom_site_symmetry_multiplicity']] else: doc['site_multiplicity'] = [1.0 for _ in doc['positions_frac']] doc['lattice_abc'] = [list(map(_cif_parse_float_with_errors, [cif_dict['_cell_length_a'], cif_dict['_cell_length_b'], cif_dict['_cell_length_c']])), list(map(_cif_parse_float_with_errors, [cif_dict['_cell_angle_alpha'], cif_dict['_cell_angle_beta'], cif_dict['_cell_angle_gamma']]))] doc['lattice_cart'] = abc2cart(doc['lattice_abc']) doc['cell_volume'] = cart2volume(doc['lattice_cart']) doc['stoichiometry'] = _cif_disordered_stoichiometry(doc) doc['num_atoms'] = len(doc['positions_frac']) if '_space_group_symop_operation_xyz' in doc['_cif'] and '_symmetry_equiv_pos_as_xyz' not in doc['_cif']: doc["_cif"]["_symmetry_equiv_pos_as_xyz"] = doc["_cif"]["_space_group_symop_operation_xyz"] if '_symmetry_equiv_pos_as_xyz' in doc['_cif']: _cif_set_unreduced_sites(doc) try: doc['space_group'] = get_spacegroup_spg(doc, check_occ=False) except RuntimeError: pass return doc, True
def magres2dict(fname, **kwargs): """ Extract available information from .magres file. Assumes units of Angstrom and ppm for relevant quantities. """ magres = defaultdict(list) flines, fname = get_flines_extension_agnostic(fname, "magres") magres['source'] = [fname] # grab file owner username try: magres['user'] = getpwuid(stat(fname).st_uid).pw_name except Exception: magres['user'] = '******' magres['magres_units'] = dict() for line_no, line in enumerate(flines): line = line.lower().strip() if line in ['<atoms>', '[atoms]']: i = 1 while flines[line_no + i].strip().lower() not in ['</atoms>', '[/atoms]']: split_line = flines[line_no + i].split() if not split_line: i += 1 continue if i > len(flines): raise RuntimeError("Something went wrong in reader loop") if split_line[0] == 'units': magres['magres_units'][split_line[1]] = split_line[2] elif 'lattice' in flines[line_no + i]: lattice = flines[line_no + i].split()[1:] for j in range(3): magres['lattice_cart'].append([ float(elem) for elem in lattice[j * 3:(j + 1) * 3] ]) magres['lattice_abc'] = cart2abc(magres['lattice_cart']) elif 'atom' in flines[line_no + i]: atom = flines[line_no + i].split() magres['atom_types'].append(atom[1]) magres['positions_abs'].append( [float(elem) for elem in atom[-3:]]) i += 1 break magres['num_atoms'] = len(magres['atom_types']) magres['positions_frac'] = cart2frac(magres['lattice_cart'], magres['positions_abs']) magres['stoichiometry'] = get_stoich(magres['atom_types']) for line_no, line in enumerate(flines): line = line.lower().strip() if line in ['<magres>', '[magres]']: i = 1 while flines[line_no + i].strip().lower() not in ['</magres>', '[/magres]']: split_line = flines[line_no + i].split() if not split_line: i += 1 continue if i > len(flines): raise RuntimeError("Something went wrong in reader loop") if split_line[0] == 'units': magres['magres_units'][split_line[1]] = split_line[2] elif 'sus' in flines[line_no + i]: sus = flines[line_no + i].split()[1:] for j in range(3): magres['susceptibility_tensor'].append( [float(val) for val in sus[3 * j:3 * (j + 1)]]) elif 'ms' in flines[line_no + i]: ms = flines[line_no + i].split()[3:] magres['magnetic_shielding_tensors'].append([]) for j in range(3): magres['magnetic_shielding_tensors'][-1].append( [float(val) for val in ms[3 * j:3 * (j + 1)]]) magres['chemical_shielding_isos'].append(0) magres['chemical_shift_anisos'].append(0) magres['chemical_shift_asymmetries'].append(0) for j in range(3): magres['chemical_shielding_isos'][-1] += magres[ 'magnetic_shielding_tensors'][-1][j][j] / 3 # find eigenvalues of symmetric part of shielding and order them to calc anisotropy eta symmetric_shielding = ( 0.5 * (magres['magnetic_shielding_tensors'][-1] + np.asarray( magres['magnetic_shielding_tensors'][-1]).T)) eig_vals, eig_vecs = np.linalg.eig(symmetric_shielding) eig_vals, eig_vecs = zip( *sorted(zip(eig_vals, eig_vecs), key=lambda eig: abs(eig[0] - magres[ 'chemical_shielding_isos'][-1]))) # Haeberlen convention: |s_zz - s_iso| >= |s_xx - s_iso| >= |s_yy - s_iso| s_yy, s_xx, s_zz = eig_vals s_iso = magres['chemical_shielding_isos'][-1] # convert from reduced anistropy to CSA magres['chemical_shift_anisos'][-1] = s_zz - (s_xx + s_yy) / 2.0 magres['chemical_shift_asymmetries'][-1] = ( s_yy - s_xx) / (s_zz - s_iso) i += 1 for line_no, line in enumerate(flines): line = line.lower().strip() if line in ['<calculation>', '[calculation]']: i = 1 while flines[line_no + i].strip().lower() not in [ '</calculation>', '[/calculation]' ]: if i > len(flines): raise RuntimeError("Something went wrong in reader loop") # space important as it excludes other calc_code_x variables if 'calc_code ' in flines[line_no + i]: magres['calculator'] = flines[line_no + i].split()[1] if 'calc_code_version' in flines[line_no + i]: magres['calculator_version'] = flines[line_no + i].split()[1] i += 1 return magres, True