Python read_xyz Beispiele, ase.io.extxyz.read_xyz Python Beispiele

Beispiel #1

0

Datei anzeigen

def parse_extxyz(dbpath, xyzpath, env, cache=False):
    r"""Parses file in XYZ format and writes content to sqllite database

    Args:
        dbpath(str): path to sqllite database
        xyzpath (str): path to file with xyz file format
    """
    with connect(dbpath, use_lock_file=False) as conn:
        with open(xyzpath) as f:
            atoms = []
            energies = []
            forces = []
            energiesperatom = []
            eform = []
            eformperatom = []
            ehull = []
            ebin = []
            for at in read_xyz(f, index=slice(None)):
                nat = at.get_number_of_atoms()
                energies.append(at.get_total_energy())
                forces.append(at.get_forces())
                atoms.append(at)
                energiesperatom.append(energies[-1] / nat)
                eform.append(energies[-1])  # - nat*-19.0329202806)
                eformperatom.append(eform[-1] / nat)
                ehull.append(0)
                ebin.append(0)
            energies = np.array(energies)
            m = np.mean(energies)
            emin = np.min(energies)
            emax = np.max(energies)
            #energies -= m

            for i in range(len(atoms)):
                #atoms[i].energy = energies[i]
                atoms[i]._calc.results['energy'] = energies[i]
                r_ij, f_ij = None, None
                if cache:
                    r_ij, f_ij = neighbor_gen(at,
                                              distance_expansion=None,
                                              cutoff=5.0,
                                              n_gaussians=25,
                                              trainable_gaussians=False,
                                              environment_provider=env,
                                              collect_triples=False,
                                              pair_provider=None,
                                              center_positions=True)
                conn.write(atoms[i],
                           data={
                               ExtXYZ.E: energies[i],
                               ExtXYZ.F: forces[i],
                               ExtXYZ.E + 'peratom': energiesperatom[i],
                               'Eform': eform[i],
                               'Eformperatom': eformperatom[i],
                               'Ehull': ehull[i],
                               'Ebin': ebin[i],
                               'mean': m,
                               'r_ij': r_ij,
                               'f_ij': f_ij
                           })

Beispiel #2

0

Datei anzeigen

Datei: qm9.py Projekt: h4gen/dtnn

def load_data(dbpath):
    logging.info('Downloading QM9 data...')
    tmpdir = tempfile.mkdtemp('qm9')
    tar_path = os.path.join(tmpdir, 'qm9.tar.gz')
    raw_path = os.path.join(tmpdir, 'qm9_xyz')
    url = 'https://ndownloader.figshare.com/files/3195398'

    try:
        urllib.request.urlretrieve(url, tar_path)
        logging.info("Done.")
    except HTTPError as e:
        logging.error("HTTP Error:", e.code, url)
        return False
    except URLError as e:
        logging.error("URL Error:", e.reason, url)
        return False

    tar = tarfile.open(tar_path)
    tar.extractall(raw_path)
    tar.close()

    prop_names = [
        'rcA', 'rcB', 'rcC', 'mu', 'alpha', 'h**o', 'lumo', 'gap', 'r2',
        'zpve', 'energy_U0', 'energy_U', 'enthalpy_H', 'free_G', 'Cv'
    ]
    conversions = [
        1., 1., 1., 1., Bohr**3 / Ang**3, Hartree / eV, Hartree / eV,
        Hartree / eV, Bohr**2 / Ang**2, Hartree / eV, Hartree / eV,
        Hartree / eV, Hartree / eV, Hartree / eV, 1.
    ]

    logging.info('Parse xyz files...')
    with connect(dbpath) as con:
        for i, xyzfile in enumerate(os.listdir(raw_path)):
            xyzfile = os.path.join(raw_path, xyzfile)

            if i % 10000 == 0:
                logging.info('Parsed: ' + str(i) + ' / ' +
                             str(len(os.listdir(raw_path))))
            properties = {}
            tmp = os.path.join(tmpdir, 'tmp.xyz')

            with open(xyzfile, 'r') as f:
                lines = f.readlines()
                l = lines[1].split()[2:]
                for pn, p, c in zip(prop_names, l, conversions):
                    properties[pn] = float(p) * c
                with open(tmp, "wt") as fout:
                    for line in lines:
                        fout.write(line.replace('*^', 'e'))

            with open(tmp, 'r') as f:
                ats = list(read_xyz(f, 0))[0]

            con.write(ats, key_value_pairs=properties)
    logging.info('Done.')

    return True

Beispiel #3

0

Datei anzeigen

Datei: qm9.py Projekt: nicoliKim/schnetpack

    def _load_data(self):
        logging.info('Downloading GDB-9 data...')
        tmpdir = tempfile.mkdtemp('gdb9')
        tar_path = os.path.join(tmpdir, 'gdb9.tar.gz')
        raw_path = os.path.join(tmpdir, 'gdb9_xyz')
        url = 'https://ndownloader.figshare.com/files/3195389'

        try:
            request.urlretrieve(url, tar_path)
            logging.info("Done.")
        except HTTPError as e:
            logging.error("HTTP Error:", e.code, url)
            return False
        except URLError as e:
            logging.error("URL Error:", e.reason, url)
            return False

        logging.info("Extracting files...")
        tar = tarfile.open(tar_path)
        tar.extractall(raw_path)
        tar.close()
        logging.info("Done.")

        logging.info('Parse xyz files...')
        ordered_files = sorted(os.listdir(raw_path),
                               key=lambda x: (int(re.sub('\D', '', x)), x))

        all_atoms = []
        all_properties = []
        for i, xyzfile in enumerate(ordered_files):
            xyzfile = os.path.join(raw_path, xyzfile)

            if (i + 1) % 10000 == 0:
                logging.info('Parsed: {:6d} / 133885'.format(i + 1))
            properties = {}
            tmp = os.path.join(tmpdir, 'tmp.xyz')

            with open(xyzfile, 'r') as f:
                lines = f.readlines()
                l = lines[1].split()[2:]
                for pn, p in zip(self.properties, l):
                    properties[pn] = np.array([float(p) * self.units[pn]])
                with open(tmp, "wt") as fout:
                    for line in lines:
                        fout.write(line.replace('*^', 'e'))

            with open(tmp, 'r') as f:
                ats = list(read_xyz(f, 0))[0]
            all_atoms.append(ats)
            all_properties.append(properties)

        logging.info('Write atoms to db...')
        self.add_systems(all_atoms, all_properties)
        logging.info('Done.')

        shutil.rmtree(tmpdir)

        return True

Beispiel #4

0

Datei anzeigen

Datei: qm9.py Projekt: nagasaichandra/gnn_molecule

    def _load_data(self, evilmols=None):
        logging.info("Downloading GDB-9 data...")
        tmpdir = tempfile.mkdtemp("gdb9")
        tar_path = os.path.join(tmpdir, "gdb9.tar.gz")
        raw_path = os.path.join(tmpdir, "gdb9_xyz")
        url = "https://ndownloader.figshare.com/files/3195389"

        request.urlretrieve(url, tar_path)
        logging.info("Done.")

        logging.info("Extracting files...")
        tar = tarfile.open(tar_path)
        tar.extractall(raw_path)
        tar.close()
        logging.info("Done.")

        logging.info("Parse xyz files...")
        ordered_files = sorted(
            os.listdir(raw_path), key=lambda x: (int(re.sub("\D", "", x)), x)
        )

        all_atoms = []
        all_properties = []

        irange = np.arange(len(ordered_files), dtype=np.int)
        if evilmols is not None:
            irange = np.setdiff1d(irange, evilmols - 1)

        for i in irange:
            xyzfile = os.path.join(raw_path, ordered_files[i])

            if (i + 1) % 10000 == 0:
                logging.info("Parsed: {:6d} / 133885".format(i + 1))
            properties = {}
            tmp = os.path.join(tmpdir, "tmp.xyz")

            with open(xyzfile, "r") as f:
                lines = f.readlines()
                l = lines[1].split()[2:]
                for pn, p in zip(self.available_properties, l):
                    properties[pn] = np.array([float(p) * self.units[pn]])
                with open(tmp, "wt") as fout:
                    for line in lines:
                        fout.write(line.replace("*^", "e"))

            with open(tmp, "r") as f:
                ats = list(read_xyz(f, 0))[0]
            all_atoms.append(ats)
            all_properties.append(properties)

        logging.info("Write atoms to db...")
        self.add_systems(all_atoms, all_properties)
        logging.info("Done.")

        shutil.rmtree(tmpdir)

        return True

Beispiel #5

0

Datei anzeigen

    def _load_data(self):
        logging.info('Downloading GDB-9 data...')
        tmpdir = tempfile.mkdtemp('gdb9')
        tar_path = os.path.join(tmpdir, 'gdb9.tar.gz')
        raw_path = os.path.join(tmpdir, 'gdb9_xyz')
        url = 'https://ndownloader.figshare.com/files/3195389'

        try:
            request.urlretrieve(url, tar_path)
            logging.info('Done.')
        except HTTPError as e:
            logging.error('HTTP Error:', e.code, url)
            return False
        except URLError as e:
            logging.error('URL Error:', e.reason, url)
            return False

        logging.info('Extracting data from tar file...')
        tar = tarfile.open(tar_path)
        tar.extractall(raw_path)
        tar.close()
        logging.info('Done.')

        logging.info('Parsing xyz files...')
        with connect(os.path.join(self.path, 'qm9.db')) as con:
            ordered_files = sorted(os.listdir(raw_path),
                                   key=lambda x: (int(re.sub('\D', '', x)), x))
            for i, xyzfile in enumerate(ordered_files):
                xyzfile = os.path.join(raw_path, xyzfile)

                if (i + 1) % 10000 == 0:
                    logging.info('Parsed: {:6d} / 133885'.format(i + 1))
                properties = {}
                tmp = os.path.join(tmpdir, 'tmp.xyz')

                with open(xyzfile, 'r') as f:
                    lines = f.readlines()
                    l = lines[1].split()[2:]
                    for pn, p in zip(self.properties, l):
                        properties[pn] = float(p) * self.units[pn]
                    with open(tmp, "wt") as fout:
                        for line in lines:
                            fout.write(line.replace('*^', 'e'))

                with open(tmp, 'r') as f:
                    ats = list(read_xyz(f, 0))[0]
                con.write(ats, data=properties)
        logging.info('Done.')

        shutil.rmtree(tmpdir)

        return True

Beispiel #6

0

Datei anzeigen

def parse_extxyz(dbpath, xyzpath):
    r"""Parses file in XYZ format and writes content to sqllite database

    Args:
        dbpath(str): path to sqllite database
        xyzpath (str): path to file with xyz file format
    """
    with connect(dbpath, use_lock_file=False) as conn:
        with open(xyzpath) as f:
            for at in read_xyz(f, index=slice(None)):
                e = at.get_total_energy()
                f = at.get_forces()
                conn.write(at, data={ExtXYZ.E: e, ExtXYZ.F: f})

Beispiel #7

0

Datei anzeigen

Datei: qm9.py Projekt: yippp/SY-GNN

    def _load_data(self, evilmols=None):
        tmpdir = tempfile.mkdtemp('gdb9')
        raw_path = os.path.join(r'../../database/qm9')

        logging.info('Parse xyz files...')
        ordered_files = sorted(os.listdir(raw_path),
                               key=lambda x: (int(re.sub('\D', '', x)), x))

        all_atoms = []
        all_properties = []

        irange = np.arange(len(ordered_files), dtype=np.int)
        if evilmols is not None:
            irange = np.setdiff1d(irange, evilmols - 1)

        for i in irange:
            xyzfile = os.path.join(raw_path, ordered_files[i])

            if (i + 1) % 10000 == 0:
                logging.info('Parsed: {:6d}'.format(i + 1))
            properties = {}
            tmp = os.path.join(tmpdir, 'tmp.xyz')

            with open(xyzfile, 'r') as f:
                lines = f.readlines()
                l = lines[1].split()[2:]
                for pn, p in zip(QM9.available_properties, l):
                    properties[pn] = np.array([float(p)])  # * self.units[pn]])
                with open(tmp, "wt") as fout:
                    have_tag = False
                    for line in lines:
                        fout.write(line.replace('*^', 'e'))
                        if not have_tag:
                            tags = list(range(1, int(line) + 1))
                            have_tag = True
            with open(tmp, 'r') as f:
                ats = list(read_xyz(f, 0))[0]
            ats.set_tags(tags=tags)

            all_atoms.append(ats)
            all_properties.append(properties)

        logging.info('Write atoms to db...')
        self.add_systems(all_atoms, all_properties)
        logging.info('Done.')

        shutil.rmtree(tmpdir)

        return True

Beispiel #8

0

Datei anzeigen

Datei: parsing.py Projekt: z5476t4508/schnetpack

def extxyz_to_db(extxyz_path, db_path):
    r"""
    Convertes en extxyz-file to an ase database

    Args:
        extxyz_path (str): path to extxyz-file
        db_path(str): path to sqlite database
    """
    with connect(db_path, use_lock_file=False) as conn:
        with open(extxyz_path) as f:
            for at in tqdm(read_xyz(f, index=slice(None)), "creating ase db"):
                data = {}
                if at.has("forces"):
                    data["forces"] = at.get_forces()
                data.update(at.info)
                conn.write(at, data=data)

Beispiel #9

0

Datei anzeigen

Datei: xyz.py Projekt: binghuang2018/aqml

def read_xyz(fileobj, property_names=None, idx=None):
    props = {}
    zs = []
    coords = []
    nas = []
    nsheav = []
    nm = len(re.findall('^\s*\d\d*$', open(fileobj).read(), re.MULTILINE))
    index = slice(0, nm)
    _ms = []
    for i, mi in enumerate(
            rx.read_xyz(fileobj,
                        index=index,
                        properties_parser=rx.key_val_str_to_dict_regex)):
        _ms.append(mi)
    if idx is not None:
        ms = [_ms[im] for im in idx]
    else:
        ms = _ms
    for mi in ms:
        #print('mi=', mi.info)
        nas.append(len(mi))
        nsheav.append((mi.numbers > 1).sum())
        zs += list(mi.numbers)
        coords += list(mi.positions)
        #if i%1000 == 0: print('i=',i)
        if property_names:
            if ('a' in property_names) or ('all' in property_names):
                property_names = list(mi.info.keys())
            #print('pns=', property_names)
            for key in property_names:  #mi.info.keys():
                #if key not in mi.info.keys():
                #    print('#ERROR: key absent!')
                #    print('i,key=',i,key, mi.info.keys())
                #    raise
                #print('props=',props)
                if key in props.keys():
                    props[key] += [mi.info[key]]
                else:
                    props[key] = [mi.info[key]]
            #print('props=',props)
    return np.array(nas, int), np.array(zs, int), np.array(coords), np.array(
        nsheav, int), props

Beispiel #10

0

Datei anzeigen

Datei: download.py Projekt: kuangluvasuka/mpnn

def parse_xyz(tmp_dir):#, dbpath):
  client = MongoClient()
  db = client.mydb
  conn = db.my_collection

  prop_names = ['rcA', 'rcB', 'rcC', 'mu', 'alpha', 'h**o', 'lumo',
                'gap', 'r2', 'zpve', 'energy_U0', 'energy_U', 'enthalpy_H',
                'free_G', 'Cv']
  conversions = [1., 1., 1., 1., Bohr ** 3 / Ang ** 3,
                 Hartree / eV, Hartree / eV, Hartree / eV,
                 Bohr ** 2 / Ang ** 2, Hartree / eV,
                 Hartree / eV, Hartree / eV, Hartree / eV,
                 Hartree / eV, 1.]

  for i, xyzfile in enumerate(os.listdir(tmp_dir)):
    xyzfile = os.path.join(tmp_dir, xyzfile)

    if i % 10000 == 0:
      log.info(str(i) + "/133885 parsed.")
    #if i == 500:
    #  break
    properties = {}
    tmp = os.path.join(tmp_dir, 'tmp.xyz')
    with open(xyzfile, 'r') as f:
      lines = f.readlines()
      l = lines[1].split()[2:]
      for pn, p, c in zip(prop_names, l, conversions):
        properties[pn] = float(p) * c
      with open(tmp, 'wt') as fout:
        for line in lines:
          fout.write(line.replace('*^', 'e'))

    with open(tmp, 'r') as f:
      atoms = list(read_xyz(f, 0))[0]
    
    idx_ik, seg_i, idx_j, idx_jk, seg_j, offset, ratio_j = collect_neighbors(atoms, 20.)

    data = {'_idx_ik': idx_ik, '_idx_jk': idx_jk, '_idx_j': idx_j,
            '_seg_i': seg_i, '_seg_j': seg_j, '_offset': offset,
            '_ratio_j': ratio_j}

Beispiel #11

0

Datei anzeigen

def read(filename, index=None, format=None):
    """Read Atoms object(s) from file.

    filename: str
        Name of the file to read from.
    index: int or slice
        If the file contains several configurations, the last configuration
        will be returned by default.  Use index=n to get configuration
        number n (counting from zero).
    format: str
        Used to specify the file-format.  If not given, the
        file-format will be guessed by the *filetype* function.

    Known formats:

    =========================  =============
    format                     short name
    =========================  =============
    GPAW restart-file          gpw
    Dacapo netCDF output file  dacapo
    Old ASE netCDF trajectory  nc
    Virtual Nano Lab file      vnl
    ASE pickle trajectory      traj
    ASE bundle trajectory      bundle
    GPAW text output           gpaw-text
    CUBE file                  cube
    XCrySDen Structure File    xsf
    Dacapo text output         dacapo-text
    XYZ-file                   xyz
    VASP POSCAR/CONTCAR file   vasp
    VASP OUTCAR file           vasp_out
    SIESTA STRUCT file         struct_out
    ABINIT input file          abinit
    V_Sim ascii file           v_sim
    Protein Data Bank          pdb
    CIF-file                   cif
    FHI-aims geometry file     aims
    FHI-aims output file       aims_out
    VTK XML Image Data         vti
    VTK XML Structured Grid    vts
    VTK XML Unstructured Grid  vtu
    TURBOMOLE coord file       tmol
    TURBOMOLE gradient file    tmol-gradient
    exciting input             exi
    AtomEye configuration      cfg
    WIEN2k structure file      struct
    DftbPlus input file        dftb
    CASTEP geom file           cell
    CASTEP output file         castep
    CASTEP trajectory file     geom
    ETSF format                etsf.nc
    DFTBPlus GEN format        gen
    CMR db/cmr-file            db
    CMR db/cmr-file            cmr
    LAMMPS dump file           lammps
    EON reactant.con file      eon
    Gromacs coordinates        gro
    Gaussian com (input) file  gaussian
    Gaussian output file       gaussian_out
    Quantum espresso in file   esp_in
    Quantum espresso out file  esp_out
    Extended XYZ file          extxyz
    NWChem input file          nw
    =========================  =============

    """
    if isinstance(filename,
                  str) and ('.json@' in filename or '.db@' in filename or
                            filename.startswith('pg://') and '@' in filename):
        filename, index = filename.rsplit('@', 1)
        if index.isdigit():
            index = int(index)
    else:
        if isinstance(filename, str):
            p = filename.rfind('@')
            if p != -1:
                try:
                    index = string2index(filename[p + 1:])
                except ValueError:
                    pass
                else:
                    filename = filename[:p]

        if isinstance(index, str):
            index = string2index(index)

    if format is None:
        format = filetype(filename)

    if format.startswith('gpw'):
        import gpaw
        r = gpaw.io.open(filename, 'r')
        positions = r.get('CartesianPositions') * Bohr
        numbers = r.get('AtomicNumbers')
        cell = r.get('UnitCell') * Bohr
        pbc = r.get('BoundaryConditions')
        tags = r.get('Tags')
        magmoms = r.get('MagneticMoments')
        energy = r.get('PotentialEnergy') * Hartree

        if r.has_array('CartesianForces'):
            forces = r.get('CartesianForces') * Hartree / Bohr
        else:
            forces = None

        atoms = Atoms(positions=positions, numbers=numbers, cell=cell, pbc=pbc)
        if tags.any():
            atoms.set_tags(tags)

        if magmoms.any():
            atoms.set_initial_magnetic_moments(magmoms)
        else:
            magmoms = None

        atoms.calc = SinglePointDFTCalculator(atoms,
                                              energy=energy,
                                              forces=forces,
                                              magmoms=magmoms)
        kpts = []
        if r.has_array('IBZKPoints'):
            for w, kpt, eps_n, f_n in zip(r.get('IBZKPointWeights'),
                                          r.get('IBZKPoints'),
                                          r.get('Eigenvalues'),
                                          r.get('OccupationNumbers')):
                kpts.append(
                    SinglePointKPoint(w, kpt[0], kpt[1], eps_n[0], f_n[0]))
        atoms.calc.kpts = kpts

        return atoms

    if format in ['json', 'db', 'postgresql']:
        from ase.db.core import connect, dict2atoms
        if index == slice(None, None):
            index = None
        images = [
            dict2atoms(d) for d in connect(filename, format).select(index)
        ]
        if len(images) == 1:
            return images[0]
        else:
            return images

    if index is None:
        index = -1

    if format == 'castep':
        from ase.io.castep import read_castep
        return read_castep(filename, index)

    if format == 'castep_cell':
        import ase.io.castep
        return ase.io.castep.read_cell(filename, index)

    if format == 'castep_geom':
        import ase.io.castep
        return ase.io.castep.read_geom(filename, index)

    if format == 'exi':
        from ase.io.exciting import read_exciting
        return read_exciting(filename, index)

    if format in ['xyz', 'extxyz']:
        from ase.io.extxyz import read_xyz
        return read_xyz(filename, index)

    if format == 'traj':
        from ase.io.trajectory import read_trajectory
        return read_trajectory(filename, index)

    if format == 'bundle':
        from ase.io.bundletrajectory import read_bundletrajectory
        return read_bundletrajectory(filename, index)

    if format == 'cube':
        from ase.io.cube import read_cube
        return read_cube(filename, index)

    if format == 'nc':
        from ase.io.netcdf import read_netcdf
        return read_netcdf(filename, index)

    if format == 'gpaw-text':
        from ase.io.gpawtext import read_gpaw_text
        return read_gpaw_text(filename, index)

    if format == 'dacapo-text':
        from ase.io.dacapo import read_dacapo_text
        return read_dacapo_text(filename)

    if format == 'dacapo':
        from ase.io.dacapo import read_dacapo
        return read_dacapo(filename)

    if format == 'xsf':
        from ase.io.xsf import read_xsf
        return read_xsf(filename, index)

    if format == 'vasp':
        from ase.io.vasp import read_vasp
        return read_vasp(filename)

    if format == 'vasp_out':
        from ase.io.vasp import read_vasp_out
        return read_vasp_out(filename, index)

    if format == 'abinit':
        from ase.io.abinit import read_abinit
        return read_abinit(filename)

    if format == 'v_sim':
        from ase.io.v_sim import read_v_sim
        return read_v_sim(filename)

    if format == 'mol':
        from ase.io.mol import read_mol
        return read_mol(filename)

    if format == 'pdb':
        from ase.io.pdb import read_pdb
        return read_pdb(filename, index)

    if format == 'cif':
        from ase.io.cif import read_cif
        return read_cif(filename, index)

    if format == 'struct':
        from ase.io.wien2k import read_struct
        return read_struct(filename)

    if format == 'struct_out':
        from ase.io.siesta import read_struct
        return read_struct(filename)

    if format == 'vti':
        from ase.io.vtkxml import read_vti
        return read_vti(filename)

    if format == 'vts':
        from ase.io.vtkxml import read_vts
        return read_vts(filename)

    if format == 'vtu':
        from ase.io.vtkxml import read_vtu
        return read_vtu(filename)

    if format == 'aims':
        from ase.io.aims import read_aims
        return read_aims(filename)

    if format == 'aims_out':
        from ase.io.aims import read_aims_output
        return read_aims_output(filename, index)

    if format == 'iwm':
        from ase.io.iwm import read_iwm
        return read_iwm(filename)

    if format == 'Cmdft':
        from ase.io.cmdft import read_I_info
        return read_I_info(filename)

    if format == 'tmol':
        from ase.io.turbomole import read_turbomole
        return read_turbomole(filename)

    if format == 'tmol-gradient':
        from ase.io.turbomole import read_turbomole_gradient
        return read_turbomole_gradient(filename)

    if format == 'cfg':
        from ase.io.cfg import read_cfg
        return read_cfg(filename)

    if format == 'dftb':
        from ase.io.dftb import read_dftb
        return read_dftb(filename)

    if format == 'sdf':
        from ase.io.sdf import read_sdf
        return read_sdf(filename)

    if format == 'etsf':
        from ase.io.etsf import ETSFReader
        return ETSFReader(filename).read_atoms()

    if format == 'gen':
        from ase.io.gen import read_gen
        return read_gen(filename)

    if format == 'cmr':
        from ase.io.cmr_io import read_db
        return read_db(filename, index)

    if format == 'lammps':
        from ase.io.lammpsrun import read_lammps_dump
        return read_lammps_dump(filename, index)

    if format == 'eon':
        from ase.io.eon import read_reactant_con
        return read_reactant_con(filename)

    if format == 'gromacs':
        from ase.io.gromacs import read_gromacs
        return read_gromacs(filename)

    if format == 'gaussian':
        from ase.io.gaussian import read_gaussian
        return read_gaussian(filename)

    if format == 'gaussian_out':
        from ase.io.gaussian import read_gaussian_out
        return read_gaussian_out(filename, index)

    if format == 'esp_in':
        from ase.io.espresso import read_espresso_in
        return read_espresso_in(filename)

    if format == 'esp_out':
        from ase.io.espresso import read_espresso_out
        return read_espresso_out(filename, index)

    if format == 'nw':
        from ase.io.nwchem import read_nwchem_input
        return read_nwchem_input(filename)

    raise RuntimeError('File format descriptor ' + format + ' not recognized!')

Beispiel #12

0

Datei anzeigen

Datei: __init__.py Projekt: misdoro/python-ase

def read(filename, index=None, format=None):
    """Read Atoms object(s) from file.

    filename: str
        Name of the file to read from.
    index: int or slice
        If the file contains several configurations, the last configuration
        will be returned by default.  Use index=n to get configuration
        number n (counting from zero).
    format: str
        Used to specify the file-format.  If not given, the
        file-format will be guessed by the *filetype* function.

    Known formats:

    =========================  =============
    format                     short name
    =========================  =============
    GPAW restart-file          gpw
    Dacapo netCDF output file  dacapo
    Old ASE netCDF trajectory  nc
    Virtual Nano Lab file      vnl
    ASE pickle trajectory      traj
    ASE bundle trajectory      bundle
    GPAW text output           gpaw-text
    CUBE file                  cube
    XCrySDen Structure File    xsf
    Dacapo text output         dacapo-text
    XYZ-file                   xyz
    VASP POSCAR/CONTCAR file   vasp
    VASP OUTCAR file           vasp_out
    VASP XDATCAR file          vasp_xdatcar
    SIESTA STRUCT file         struct_out
    ABINIT input file          abinit
    V_Sim ascii file           v_sim
    Protein Data Bank          pdb
    CIF-file                   cif
    FHI-aims geometry file     aims
    FHI-aims output file       aims_out
    VTK XML Image Data         vti
    VTK XML Structured Grid    vts
    VTK XML Unstructured Grid  vtu
    TURBOMOLE coord file       tmol
    TURBOMOLE gradient file    tmol-gradient
    exciting input             exi
    AtomEye configuration      cfg
    WIEN2k structure file      struct
    DftbPlus input file        dftb
    CASTEP geom file           cell
    CASTEP output file         castep
    CASTEP trajectory file     geom
    ETSF format                etsf.nc
    DFTBPlus GEN format        gen
    CMR db/cmr-file            db
    CMR db/cmr-file            cmr
    LAMMPS dump file           lammps
    EON reactant.con file      eon
    Gromacs coordinates        gro
    Gaussian com (input) file  gaussian
    Gaussian output file       gaussian_out
    Quantum espresso in file   esp_in
    Quantum espresso out file  esp_out
    Extended XYZ file          extxyz
    NWChem input file          nw
    Materials Studio file      xsd
    =========================  =============

    Many formats allow on open file-like object to be passed instead
    of ``filename``. In this case the format cannot be auto-decected,
    so the ``format`` argument should be explicitly given.
    
    """
    if isinstance(filename, str) and (
        '.json@' in filename or
        '.db@' in filename or
        filename.startswith('pg://') and '@' in filename):
        filename, index = filename.rsplit('@', 1)
        if index.isdigit():
            index = int(index)
    else:
        if isinstance(filename, str):
            p = filename.rfind('@')
            if p != -1:
                try:
                    index = string2index(filename[p + 1:])
                except ValueError:
                    pass
                else:
                    filename = filename[:p]

        if isinstance(index, str):
            index = string2index(index)

    if format is None:
        format = filetype(filename)

    if format.startswith('gpw'):
        import gpaw
        r = gpaw.io.open(filename, 'r')
        positions = r.get('CartesianPositions') * Bohr
        numbers = r.get('AtomicNumbers')
        cell = r.get('UnitCell') * Bohr
        pbc = r.get('BoundaryConditions')
        tags = r.get('Tags')
        magmoms = r.get('MagneticMoments')
        energy = r.get('PotentialEnergy') * Hartree

        if r.has_array('CartesianForces'):
            forces = r.get('CartesianForces') * Hartree / Bohr
        else:
            forces = None

        atoms = Atoms(positions=positions,
                      numbers=numbers,
                      cell=cell,
                      pbc=pbc)
        if tags.any():
            atoms.set_tags(tags)

        if magmoms.any():
            atoms.set_initial_magnetic_moments(magmoms)
        else:
            magmoms = None

        atoms.calc = SinglePointDFTCalculator(atoms, energy=energy,
                                              forces=forces, magmoms=magmoms)
        kpts = []
        if r.has_array('IBZKPoints'):
            for w, kpt, eps_n, f_n in zip(r.get('IBZKPointWeights'),
                                          r.get('IBZKPoints'),
                                          r.get('Eigenvalues'),
                                          r.get('OccupationNumbers')):
                kpts.append(SinglePointKPoint(w, kpt[0], kpt[1],
                                              eps_n[0], f_n[0]))
        atoms.calc.kpts = kpts

        return atoms

    if format in ['json', 'db', 'postgresql']:
        if index == slice(None, None):
            index = None
        from ase.db.core import connect
        images = [row.toatoms()
                  for row in connect(filename, format).select(index)]
        if len(images) == 1:
            return images[0]
        else:
            return images

    if index is None:
        index = -1
        
    if format == 'castep':
        from ase.io.castep import read_castep
        return read_castep(filename, index)

    if format == 'castep_cell':
        import ase.io.castep
        return ase.io.castep.read_cell(filename, index)

    if format == 'castep_geom':
        import ase.io.castep
        return ase.io.castep.read_geom(filename, index)

    if format == 'exi':
        from ase.io.exciting import read_exciting
        return read_exciting(filename, index)

    if format in ['xyz', 'extxyz']:
        from ase.io.extxyz import read_xyz
        return read_xyz(filename, index)

    if format == 'traj':
        from ase.io.trajectory import read_trajectory
        return read_trajectory(filename, index)

    if format == 'trj':
        from ase.io.pickletrajectory import read_trajectory
        return read_trajectory(filename, index)

    if format == 'bundle':
        from ase.io.bundletrajectory import read_bundletrajectory
        return read_bundletrajectory(filename, index)

    if format == 'cube':
        from ase.io.cube import read_cube
        return read_cube(filename, index)

    if format == 'nc':
        from ase.io.netcdf import read_netcdf
        return read_netcdf(filename, index)

    if format == 'gpaw-text':
        from ase.io.gpawtext import read_gpaw_text
        return read_gpaw_text(filename, index)

    if format == 'dacapo-text':
        from ase.io.dacapo import read_dacapo_text
        return read_dacapo_text(filename)

    if format == 'dacapo':
        from ase.io.dacapo import read_dacapo
        return read_dacapo(filename)

    if format == 'xsf':
        from ase.io.xsf import read_xsf
        return read_xsf(filename, index)

    if format == 'vasp':
        from ase.io.vasp import read_vasp
        return read_vasp(filename)

    if format == 'vasp_out':
        from ase.io.vasp import read_vasp_out
        return read_vasp_out(filename, index)

    if format == 'vasp_xdatcar':
        from ase.io.vasp import read_vasp_xdatcar
        return read_vasp_xdatcar(filename, index)

    if format == 'abinit':
        from ase.io.abinit import read_abinit
        return read_abinit(filename)

    if format == 'v_sim':
        from ase.io.v_sim import read_v_sim
        return read_v_sim(filename)

    if format == 'mol':
        from ase.io.mol import read_mol
        return read_mol(filename)

    if format == 'pdb':
        from ase.io.pdb import read_pdb
        return read_pdb(filename, index)

    if format == 'cif':
        from ase.io.cif import read_cif
        return read_cif(filename, index)

    if format == 'struct':
        from ase.io.wien2k import read_struct
        return read_struct(filename)

    if format == 'struct_out':
        from ase.io.siesta import read_struct
        return read_struct(filename)

    if format == 'vti':
        from ase.io.vtkxml import read_vti
        return read_vti(filename)

    if format == 'vts':
        from ase.io.vtkxml import read_vts
        return read_vts(filename)

    if format == 'vtu':
        from ase.io.vtkxml import read_vtu
        return read_vtu(filename)

    if format == 'aims':
        from ase.io.aims import read_aims
        return read_aims(filename)

    if format == 'aims_out':
        from ase.io.aims import read_aims_output
        return read_aims_output(filename, index)

    if format == 'iwm':
        from ase.io.iwm import read_iwm
        return read_iwm(filename)

    if format == 'Cmdft':
        from ase.io.cmdft import read_I_info
        return read_I_info(filename)

    if format == 'tmol':
        from ase.io.turbomole import read_turbomole
        return read_turbomole(filename)

    if format == 'tmol-gradient':
        from ase.io.turbomole import read_turbomole_gradient
        return read_turbomole_gradient(filename)

    if format == 'cfg':
        from ase.io.cfg import read_cfg
        return read_cfg(filename)

    if format == 'dftb':
        from ase.io.dftb import read_dftb
        return read_dftb(filename)

    if format == 'sdf':
        from ase.io.sdf import read_sdf
        return read_sdf(filename)

    if format == 'etsf':
        from ase.io.etsf import ETSFReader
        return ETSFReader(filename).read_atoms()

    if format == 'gen':
        from ase.io.gen import read_gen
        return read_gen(filename)

    if format == 'cmr':
        from ase.io.cmr_io import read_db
        return read_db(filename, index)

    if format == 'lammps':
        from ase.io.lammpsrun import read_lammps_dump
        return read_lammps_dump(filename, index)

    if format == 'eon':
        from ase.io.eon import read_reactant_con
        return read_reactant_con(filename)

    if format == 'gromacs':
        from ase.io.gromacs import read_gromacs
        return read_gromacs(filename)

    if format == 'gaussian':
        from ase.io.gaussian import read_gaussian
        return read_gaussian(filename)

    if format == 'gaussian_out':
        from ase.io.gaussian import read_gaussian_out
        return read_gaussian_out(filename, index)

    if format == 'esp_in':
        from ase.io.espresso import read_espresso_in
        return read_espresso_in(filename)

    if format == 'esp_out':
        from ase.io.espresso import read_espresso_out
        return read_espresso_out(filename, index)

    if format == 'nw':
        from ase.io.nwchem import read_nwchem_input
        return read_nwchem_input(filename)

    if format == 'xsd':
        from ase.io.xsd import read_xsd
        return read_xsd(filename)

    raise RuntimeError('File format descriptor ' + format + ' not recognized!')

Beispiel #13

0

Datei anzeigen

Datei: to_dft.py Projekt: otanet/sim-trhepd-rheed

print("Parameters for main part")
pprint.pprint(input_main)
print("Parameters for ase part")
pprint.pprint(input_ase)
print("Parameters for solver part")
pprint.pprint(input_sol)
print("Pseudopotentials")
pprint.pprint(pseudopotentials)

### Read/Write file names ###
file_name = input_main["input_xyz_file"]
wfile_name = input_main["output_file_head"]

from ase.io.extxyz import read_xyz, write_xyz

atoms = read_xyz(open(file_name), index=0)
atoms_info = Atoms()
for atom in atoms:
    atoms_info += atom
ratoms = atoms_info.get_positions()
satoms = atoms_info.symbols
natoms = ratoms.shape[0]

z_max = np.max(ratoms, axis=0)[2]
z_min = np.min(ratoms, axis=0)[2]
slab_size = z_max - z_min
print('slab_size=', slab_size)
z_margin = input_main["param"]["z_margin"]
z_atoms = ratoms[:, 2]

z_bottom_most = [z_min - z_margin, z_min + z_margin]

Beispiel #14

0

Datei anzeigen

Datei: socketcalc.py Projekt: sxz1113/matscipy

def unpack_xyz_str_to_results(data):
    buffer = StringIO.StringIO(data)
    at = read_xyz(buffer)
    buffer.close()
    label = at.info['label']
    return (label, at)

Beispiel #15

0

Datei anzeigen

    def _load_data(
        self,
        xyzpath,
        evilmols=None,
    ):
        tmpdir = tempfile.mkdtemp('sym')
        raw_path = os.path.join(xyzpath)

        ordered_files = []
        logging.info('Parse xyz files...')
        for fpathe, dirs, fs in os.walk(raw_path):
            for f in fs:
                ordered_files.append(os.path.join(fpathe, f))

        ordered_files = sorted(ordered_files)

        all_atoms = []
        all_properties = []

        irange = np.arange(len(ordered_files), dtype=np.int)
        if evilmols is not None:
            irange = np.setdiff1d(irange, evilmols - 1)

        sym_dict = dict((c, i) for i, c in enumerate(
            ['BU', 'BG', 'AU', 'AG', 'EU', 'EG', 'A"', 'E"', "A'", "E'"]))

        # parse XYZ file
        for i in irange:
            xyzfile = ordered_files[i]

            if (i + 1) % 1000 == 0:
                logging.info('Parsed: {:6d}'.format(i + 1))
            properties = {}
            try:
                with open(xyzfile, 'r') as f:
                    lines = f.readlines()
                    info = lines[1].strip().split('|')

                    for j in range(1, 20):
                        properties[self.required_properties[j - 1]] = np.array(
                            float(info[j])).reshape(1)
                    for j in range(20, 32):  # degeneracy
                        properties[self.required_properties[j - 1]] = np.array(
                            int(info[j])).reshape(1)
                    for j in range(32, 44):  # symmetry
                        properties[self.required_properties[j - 1]] = np.array(
                            sym_dict[info[j]]).reshape(1)

                    def to_idx(idx_str):
                        return int(idx_str[1:])

                    # the tage for each atoms, idicate the index of the atoms in which primitive cell
                    tags = list(map(to_idx, info[44].strip().split(' ')))

                    tmp = os.path.join(tmpdir, 'tmp.xyz')
                    with open(tmp, 'wt') as fout:
                        fout.write(lines[0])
                        fout.write('**\n')
                        for line in lines[2:]:
                            fout.write(line[:line.rfind(' ')] +
                                       '\n')  # remove charge
                    with open(tmp, 'r') as ftmp:
                        ats = list(read_xyz(ftmp, 0))[0]

                ats.set_tags(tags=tags)
                all_atoms.append(ats)
                all_properties.append(properties)
            except:
                print(xyzfile, info)

        logging.info('Write atoms to db...')
        self.add_systems(all_atoms, all_properties)
        logging.info('Done.')

        shutil.rmtree(tmpdir)

        return True

Beispiel #16

0

Datei anzeigen

Datei: load_qm9.py Projekt: befallenStar/moleculeVoxelNet

def load_data(dbpath):
    print('Downloading GDB-9 data...')
    tmpdir = '..\\data'
    tar_path = os.path.join(tmpdir, 'dsgdb9nsd.xyz.tar.bz2')
    raw_path = os.path.join(tmpdir, 'gdb9_xyz')
    # url = 'https://ndownloader.figshare.com/files/3195389'

    # try:
    #     urllib.request.urlretrieve(url, tar_path)
    #     logging.info("Done.")
    # except HTTPError as e:
    #     logging.error("HTTP Error:", e.code, url)
    #     return False
    # except URLError as e:
    #     logging.error("URL Error:", e.reason, url)
    #     return False

    # tar = tarfile.open(tar_path)
    # tar.extractall(raw_path)
    # tar.close()

    basic_atoms = [
        'Atom_H', 'Atom_C', 'Atom_N', 'Atom_O', 'Atom_F', 'Atom_P', 'Atom_S',
        'Atom_Cl', 'Atom_Br', 'Atom_I'
    ]
    prop_names = [
        'rcA', 'rcB', 'rcC', 'mu', 'alpha', 'h**o', 'lumo', 'gap', 'r2',
        'zpve', 'energy_U0', 'energy_U', 'enthalpy_H', 'free_G', 'Cv'
    ]
    conversions = [
        1., 1., 1., 1., Bohr**3 / Ang**3, Hartree / eV, Hartree / eV,
        Hartree / eV, Bohr**2 / Ang**2, Hartree / eV, Hartree / eV,
        Hartree / eV, Hartree / eV, Hartree / eV, 1.
    ]

    print('Parse xyz files...')
    with connect(dbpath) as con:
        for i, xyzfile in enumerate(os.listdir(raw_path)):
            '''
            structure of the xyz files:
            number:int, the amounts of atoms in the molecule
            properties:list, properties respond to the list builtin from third to the end
            
            the following lines represent atoms
            an atom a line with the formatter of (symbol, x, y, z, initial charge)
            '''
            xyzfile = os.path.join(raw_path, xyzfile)

            if i % 10000 == 0:
                print('Parsed: ' + str(i) + ' / 133885')
            properties = {}
            charges = {a: 0 for a in basic_atoms}
            # put the content into a temp file
            tmp = os.path.join(tmpdir, 'tmp.xyz')

            # parse the XYZ files
            # get the number of atoms
            # get the properties
            # get the numbers and the charges of each atom
            with open(xyzfile, 'r') as f:
                lines = f.readlines()
                # read the properties
                l = lines[1].split()[2:]

                # do preprocessing
                for pn, p, c in zip(prop_names, l, conversions):
                    properties[pn] = float(p) * c
                with open(tmp, "wt") as fout:
                    for line in lines:
                        fout.write(line.replace('*^', 'e'))

            with open(tmp, 'r') as f:
                lines = f.readlines()
                # get the number
                cnt = int(lines[0])

                # get the numbers and the charges
                atoms = lines[2:cnt + 2]
                for atom in atoms:
                    a, _, _, _, c = atom.split()
                    a = 'Atom_' + a
                    charges[a] += float(c)
                properties.update(charges)

                # a function from ase module, which can read from XYZ formatter
                ats = list(read_xyz(f, 0))[0]

            # idx_ik, seg_i, idx_j, idx_jk, seg_j, offset, ratio_j = \
            #     collect_neighbors(ats, 20.)

            # data = {'_idx_ik': idx_ik, '_idx_jk': idx_jk, '_idx_j': idx_j,
            #         '_seg_i': seg_i, '_seg_j': seg_j, '_offset': offset,
            #         '_ratio_j': ratio_j}
            con.write(ats, key_value_pairs=properties)
    print('Done.')

    return True