Example #1
0
def read_nwchem(filename):
    """Method to read geometry from a nwchem output
    """

    f = filename
    if isinstance(filename, str):
        f = open(filename)

    lines = f.readlines()

    i = 0
    while i < len(lines):
        if lines[i].find('XYZ format geometry') >= 0:
            natoms = int(lines[i + 2].split()[0])
            string = ''
            for j in range(2, natoms + 4):
                xyzstring = lines[i + j]
                symbol = xyzstring.split()[0].strip()
                # replace bq ghost with X: MDTMP can we do better?
                if symbol.startswith('bq'):
                    xyzstring = xyzstring.replace(symbol, 'X')
                string += xyzstring
            atoms = read_xyz(StringIO(string))
            i += natoms + 4
        else:
            i += 1

    if type(filename) == str:
        f.close()

    return atoms
Example #2
0
File: nwchem.py Project: lqcata/ase
def read_nwchem(filename):
    """Method to read geometry from a nwchem output
    """
    from ase import Atoms, Atom

    if isinstance(filename, str):
        f = open(filename)

    lines = f.readlines()

    i = 0
    while i < len(lines):
        if lines[i].find('XYZ format geometry') >=0:
            natoms = int(lines[i + 2].split()[0])
            string = ''
            for j in range(2, natoms + 4):
                xyzstring = lines[i + j]
                symbol = xyzstring.split()[0].strip()
                # replace bq ghost with X: MDTMP can we do better?
                if symbol.startswith('bq'):
                    xyzstring = xyzstring.replace(symbol, 'X')
                string += xyzstring
            atoms = read_xyz(StringIO(string))
            i += natoms + 4
        else:
            i += 1

    if type(filename) == str:
        f.close()

    return atoms
Example #3
0
def read_nwchem_input(filename):
    """Method to read geometry from an NWChem input file."""
    f = filename
    if isinstance(filename, str):
        f = open(filename)
    lines = f.readlines()

    # Find geometry region of input file.
    stopline = 0
    for index, line in enumerate(lines):
        if line.startswith('geometry'):
            startline = index + 1
            stopline = -1
        elif (line.startswith('end') and stopline == -1):
            stopline = index
    # Format and send to read_xyz.
    xyz_text = '%i\n' % (stopline - startline)
    xyz_text += ' geometry\n'
    for line in lines[startline:stopline]:
        xyz_text += line
    atoms = read_xyz(StringIO(xyz_text))
    atoms.set_cell((0., 0., 0.))  # no unit cell defined

    if type(filename) == str:
        f.close()

    return atoms
Example #4
0
def read_orca_input(filename):
    """Method to read geometry from an ORCA input file."""
    f = filename
    if isinstance(filename, str):
        f = open(filename)
    lines = f.readlines()

    # Find geometry region of input file.

    done = False
    i = 0
    xyzstring = ''

    for l, line in enumerate(lines):
        if line.find('xyz') > -1 and line.find('*') > -1:
            while not done:
                i += 1

                if not (lines[l + i].find('*') > -1):
                    xyzstring += lines[l + i]
                    sym = lines[l + i].strip().split()[0]
                else:
                    done = True
        if done:
            break

    xyzstring = str(i - 1) + '\n\n' + xyzstring
    atoms = read_xyz(StringIO(xyzstring))

    if type(filename) == str:
        f.close()

    return atoms
Example #5
0
def evaluate_molecules(molecules: List[str],
                       b3lyp_energies: List[float]) -> List[float]:
    """Compute the atomization energy of molecules

    Args:
        molecules ([str]): XYZ-format molecular structures. Assumed to be
            fully-relaxed
        b3lyp_energies ([float]): B3LYP total energies of structures
    Returns:
        ([float]): Estimated G4MP2 atomization energies of molecules
    """

    # Convert the molecules to atoms objects
    atoms = [next(read_xyz(StringIO(x))) for x in molecules]

    # Generate the local environment for each atom
    conv = AtomsConverter(environment)
    inputs = [conv.convert_atoms(atom) for atom in atoms]

    # Add the b3lyp_energies to each atom object
    for i, e in zip(inputs, b3lyp_energies):
        i['u0'] = torch.Tensor(np.expand_dims(e, 0))

    # Execute in batches
    results = []
    for i in inputs:
        outputs = model(i)
        results.append(np.squeeze(outputs['y'].cpu().data.numpy()))

    # Return atomization energy
    return [
        compute_atomization_energy(a, e, 'g4mp2')
        for a, e in zip(atoms, results)
    ]
Example #6
0
def read_nwchem_input(filename):
    """Method to read geometry from an NWChem input file."""
    f = filename
    if isinstance(filename, str):
        f = open(filename)
    lines = f.readlines()

    # Find geometry region of input file.
    stopline = 0
    for index, line in enumerate(lines):
        if line.startswith('geometry'):
            startline = index + 1
            stopline = -1
        elif (line.startswith('end') and stopline == -1):
            stopline = index
    # Format and send to read_xyz.
    xyz_text = '%i\n' % (stopline - startline)
    xyz_text += ' geometry\n'
    for line in lines[startline:stopline]:
        xyz_text += line
    atoms = read_xyz(StringIO(xyz_text))
    atoms.set_cell((0., 0., 0.))  # no unit cell defined

    if type(filename) == str:
        f.close()

    return atoms
Example #7
0
def read_orca(filename):
    """Method to read geometry from a ORCA output
    """

    f = filename
    if isinstance(filename, str):
        f = open(filename)

    lines = f.readlines()

    done = False
    i = 0
    xyzstring = ''

    for l, line in enumerate(lines):
        if line.find('CARTESIAN COORDINATES (ANGSTROEM)') >= 0:
            i += 1
            while not done:
                i += 1

                if not lines[l + i] == '\n':
                    xyzstring += lines[l + i]
                    sym = lines[l + i].strip().split()[0]
                else:
                    done = True
        if done:
            break

    xyzstring = str(i - 2) + '\n\n' + xyzstring
    atoms = read_xyz(StringIO(xyzstring))

    if type(filename) == str:
        f.close()

    return atoms
Example #8
0
def evaluate_molecules(molecules: List[str],
                       b3lyp_energies: List[float]) -> List[float]:
    """Compute the atomization energy of molecules

    Args:
        molecules ([str]): XYZ-format molecular structures. Assumed to be
            fully-relaxed
        b3lyp_energies ([float]): B3LYP total energies of structures
    Returns:
        ([float]): Estimated G4MP2 atomization energies of molecules
    """

    # Convert all of the molecules to the qml representation
    compnds = [Compound(StringIO(x)) for x in molecules]

    # Compute the atomization energy for each compound
    b3lyp_atom = [
        compute_atomization_energy(next(read_xyz(StringIO(x))), u0, 'b3lyp')
        for x, u0 in zip(molecules, b3lyp_energies)
    ]

    # Compute the representaiton for each compound
    def compute_rep(x):
        """Generates representation and returns the values"""
        x.generate_fchl_representation(max_size)
        return x.representation

    reps = np.array(list(map(compute_rep, compnds)))

    # Compute the delta between B3LYP and G4MP2
    delta = model.predict(reps)

    # Return the sum of the two
    return np.add(b3lyp_atom, delta)
Example #9
0
 def _load_molecule(self, molecule_path):
     """
     Load molecule from file (can handle all ase formats).
     Args:
         molecule_path (str): Path to molecular geometry
     """
     file_format = os.path.splitext(molecule_path)[-1]
     if file_format == 'xyz':
         self.molecule = read_xyz(molecule_path)
     else:
         self.molecule = read(molecule_path)
def acetaldehyde():
    return next(
        read_xyz(
            StringIO("""7
H4 C2 O1
C -0.002945 1.509914 0.008673
C 0.026083 0.003276 -0.037459
O 0.942288 -0.655070 -0.456826
H 0.922788 1.926342 -0.391466
H -0.862015 1.878525 -0.564795
H -0.150506 1.843934 1.042891
H -0.894430 -0.486434 0.357749""")))
Example #11
0
def main(filename, source, x=1, y=1, z=1):
    temp = list(xyz.read_xyz(filename))[0]

    if source == "NA":
        temp.cell = Cell([[x, 0, 0], [0, y, 0], [0, 0, z]])

    else:
        try:
            temp.cell = vasp.read_vasp(source).cell
        except:
            raise Exception("source is neither valid POSCAR nor NA")

    vasp.write_vasp("../POSCAR_ec", temp, sort=True, vasp5=True)
Example #12
0
def evaluate_schnet(models: List[Union[TorchMessage, torch.nn.Module, Path]],
                    molecules: List[str],
                    property_name: str,
                    batch_size: int = 64,
                    device: str = 'cpu') -> np.ndarray:
    """Run inference for a machine learning model

    Args:
        models: List of models to evaluate. Either a SchNet model or
           the bytes corresponding to a serialized model
        molecules: XYZ-format structures of molecules to be evaluate
        property_name: Name of the property being predicted
        batch_size: Number of molecules to evaluate per batch
        device: Device on which to run the computation
    """

    # Make sure the models are converted to Torch models
    if isinstance(models[0], TorchMessage):
        models = [m.get_model(device) for m in models]
    elif isinstance(models[0], (Path, str)):
        models = [torch.load(m, map_location='cpu')
                  for m in models]  # Load to main memory first

    # Make the dataset
    with TemporaryDirectory() as td:
        # Convert the molecules to ase.Atoms objects
        atoms = [next(read_xyz(StringIO(x), slice(None))) for x in molecules]

        # Save the data to an ASE Atoms database
        run_file = os.path.join(td, 'run_data.db')
        db = AtomsData(run_file, available_properties=[])
        db.add_systems(atoms, [{} for _ in atoms])

        # Build the data loader
        loader = AtomsLoader(db, batch_size=batch_size)

        # Run the models
        y_preds = []
        for model in models:
            y_pred = []
            model.to(device)  # Move the model to the device
            for batch in loader:
                # Push the batch to the device
                batch = {k: v.to(device) for k, v in batch.items()}

                # Run it and save results
                pred = model(batch)
                y_pred.append(pred[property_name].detach().cpu().numpy())
            y_preds.append(np.squeeze(np.concatenate(y_pred)))

        return np.vstack(y_preds).T
Example #13
0
def read_centers(filename=None):
    # read
    atoms = list(read_xyz(filename))[0]
    symbols = atoms.get_chemical_symbols()
    positions = atoms.get_positions()

    # split X (wannier centers) and atoms.
    # map( (filter( (lambda x: x[0]=='X'), zip(symbols,positions))))
    # wannier_centers= [xpos[1] for xpos in zip(symbols,positions) if xpos[0] ]
    wannier_centers = [
        xpos[1] for xpos in zip(symbols, positions) if xpos[0] == 'X'
    ]
    asymbols = [xpos[0] for xpos in zip(symbols, positions) if xpos[0] != 'X']
    aposes = [xpos[1] for xpos in zip(symbols, positions) if xpos[0] != 'X']
    cell = atoms.get_cell()
    atoms = Atoms(symbols=asymbols, positions=aposes, cell=cell)

    return wannier_centers, atoms
Example #14
0
def make_schnetpack_data(dataset,
                         dbpath,
                         properties,
                         xyz_col='xyz',
                         conformers=None,
                         overwrite=True):
    """Convert a Pandas dictionary to a SchNet database

    Args:
        dataset (pd.DataFrame): Dataset to convert
        dbpath (string): Path to database to be saved
        properties ([string]): List of properties to include in the dataset
        conformers (str): Name of column with conformers as xyz
        xyz_col (string): Name of the column with the XYZ data
        overwrite (True): Whether to overwrite the database
    """

    # If needed, delete the previous database
    if os.path.exists(dbpath) and overwrite:
        os.unlink(dbpath)

    # Convert all entries to ase.Atoms objects
    atoms = dataset[xyz_col].apply(lambda x: read_xyz(StringIO(x)).__next__())

    # Every column besides the training set will be a property
    prop_cols = set(properties).difference([xyz_col])
    property_list = [
        dict(zip(prop_cols, [np.atleast_1d(row[p]) for p in prop_cols]))
        for i, row in dataset.iterrows()
    ]

    # Add conformers to the property list, but it isn't a required property when loading entries
    if conformers is not None:
        for d, c in zip(property_list, dataset[conformers]):
            d['conformers'] = np.atleast_1d(c)

    # Initialize the object
    db = AtomsData(dbpath,
                   required_properties=properties,
                   conformers=conformers is not None)

    # Add every system to the db object
    db.add_systems(atoms, property_list)
    return db
Example #15
0
def get_initial_structure(smiles: str) -> Tuple[Atoms, pybel.Molecule]:
    """Generate an initial guess for a molecular structure
    
    Args:
        smiles: SMILES string
    Returns: 
        Generate an Atoms object
    """

    # Make the 3D structure
    mol = pybel.readstring("smi", smiles)
    mol.make3D()

    # Convert it to ASE
    atoms = next(read_xyz(StringIO(mol.write('xyz')), slice(None)))
    atoms.charge = mol.charge
    atoms.set_initial_charges([a.formalcharge for a in mol.atoms])

    return atoms, mol
Example #16
0
def run_model(model, data, xyz_col, additional_cols=None, progbar=True):
    """Runs a SchNetPack model on the column of a dataframe containing XYZ files

    Args:
        model (AtomisticModel): Model to be evaluated
        data (DataFrame): Data to be evaluated
        xyz_col (string): Column containing the XYZ data
        additional_cols ([string]): Any other columns to add to the input (e.g., B3LYP results)
        progbar (boolean): Whether to display a progress bar
    Returns:
        (ndarray) Predictions from the model
    """

    # Get default value for additional_cols
    if additional_cols is None:
        additional_cols = []

    # Make the tool to convert ase.Atoms to SchNet inputs
    c = AtomsConverter()

    results = []
    for xyz, more_data in tqdm(list(
            zip(data[xyz_col], data[additional_cols].values)),
                               disable=not progbar,
                               leave=False):
        # Convert the XYZ file to an ASE object
        atoms = next(read_xyz(StringIO(xyz)))

        # Generate it in the input format needed
        inputs = c.convert_atoms(atoms)

        # Add in the additional columns
        for i, col in enumerate(additional_cols):
            inputs[col] = torch.Tensor(np.expand_dims(more_data[i], 0))

        # Run it through the model
        outputs = model(inputs)

        # Get the value in numpy format
        results.append(np.squeeze(outputs['y'].cpu().data.numpy()))

    return np.array(results)
Example #17
0
def read_gamess_us(filename):
    """Method to read geometry from a GAMESS-US output
    """

    f = filename
    if isinstance(filename, str):
        f = open(filename)

    lines = f.readlines()

    done = False
    i = 0
    xyzstring = ''

    for l, line in enumerate(lines):
        if line.find('CHARGE         X                   Y                   Z'
                     ) >= 0:
            while not done:
                i += 1

                if not lines[l + i] == '\n':
                    sym = lines[l + i].strip().split()[0]
                    for c in range(2, 5):
                        pos = map(float, lines[l + i].strip().split()[2:5])

                    xyzstring += sym + ' ' + str(pos[0] * Bohr) + ' ' + str(
                        pos[1] * Bohr) + ' ' + str(pos[2] * Bohr) + '\n'

                else:
                    done = True
        if done:
            break

    xyzstring = str(i - 1) + '\n\n' + xyzstring
    atoms = read_xyz(StringIO(xyzstring))

    if type(filename) == str:
        f.close()

    return atoms
Example #18
0
def read_gamess_us_input(filename):
    """Method to read geometry from an GAMESS-US input file."""
    f = filename
    if isinstance(filename, str):
        f = open(filename)
    lines = f.readlines()

    # Find geometry region of input file.

    done = False
    i = 0
    xyzstring = ''

    for l, line in enumerate(lines):
        if line.find(' $data') > -1:

            i += 2
            while not done:
                i += 1

                if not (lines[l + i].find('$end') > -1):
                    sym = lines[l + i].strip().split()[0]
                    pos = map(float, lines[l + i].strip().split()[2:5])

                    xyzstring += sym + ' ' + str(pos[0]) + ' ' + str(
                        pos[1]) + ' ' + str(pos[2]) + '\n'

                else:
                    done = True
        if done:
            break

    xyzstring = str(i - 3) + '\n\n' + xyzstring
    atoms = read_xyz(StringIO(xyzstring))

    if type(filename) == str:
        f.close()

    return atoms
def get_initial_structure(smiles: str) -> Tuple[Atoms, Dict[int, Set[int]]]:
    """Generate an initial guess for a molecular structure
    
    Args:
        smiles: SMILES string
    Returns: 
        An ASE atoms object, bond graph
    """
    
    # Make the 3D structure
    mol = pybel.readstring("smi", smiles)
    mol.make3D()
    
    # Convert it to ASE
    atoms = next(read_xyz(StringIO(mol.write('xyz')), slice(None)))
    
    # Get the bonding graph
    g = nx.Graph()
    g.add_nodes_from(range(len(mol.atoms)))
    for bond in OBMolBondIter(mol.OBMol):
        g.add_edge(bond.GetBeginAtomIdx() - 1, bond.GetEndAtomIdx() - 1, data={"rotor": bond.IsRotor()})
    return atoms, g
Example #20
0
def read_gamess_us(filename):
    """Method to read geometry from a GAMESS-US output
    """

    f = filename
    if isinstance(filename, str):
        f = open(filename)

    lines = f.readlines()

    done = False
    i = 0
    xyzstring = ''

    for l, line in enumerate(lines):
        if line.find('CHARGE         X                   Y                   Z') >= 0:
            while not done:
                i += 1

                if not lines[l+i] == '\n':
                    sym = lines[l+i].strip().split()[0]
                    for c in range(2,5):
                        pos = map(float, lines[l+i].strip().split()[2:5])
                    
                    xyzstring += sym + ' ' + str(pos[0]*Bohr)+ ' ' + str(pos[1]*Bohr)+ ' ' + str(pos[2]*Bohr)+'\n'

                else:
                    done = True
        if done:
            break

    xyzstring = str(i-1) + '\n\n' + xyzstring
    atoms = read_xyz(StringIO(xyzstring))

    if type(filename) == str:
        f.close()

    return atoms
Example #21
0
def read_gamess_us_input(filename):
    """Method to read geometry from an GAMESS-US input file."""
    f = filename
    if isinstance(filename, str):
        f = open(filename)
    lines = f.readlines()

    # Find geometry region of input file.

    done = False
    i = 0
    xyzstring = ''

    for l, line in enumerate(lines):
        if line.find(' $data') > -1:

            i += 2
            while not done:
                i += 1

                if not (lines[l+i].find('$end') > -1):
                    sym = lines[l+i].strip().split()[0]
                    pos = map(float, lines[l+i].strip().split()[2:5])
                    
                    xyzstring += sym + ' ' + str(pos[0])+ ' ' + str(pos[1])+ ' ' + str(pos[2])+'\n'

                else:
                    done = True
        if done:
            break

    xyzstring = str(i-3) + '\n\n' + xyzstring
    atoms = read_xyz(StringIO(xyzstring))

    if type(filename) == str:
        f.close()

    return atoms
Example #22
0
def read(filename, index=-1, format=None):
    """Read Atoms object(s) from file.

    filename: str
        Name of the file to read from.
    index: int or slice
        If the file contains several configurations, the last configuration
        will be returned by default.  Use index=n to get configuration
        number n (counting from zero).
    format: str
        Used to specify the file-format.  If not given, the
        file-format will be guessed by the *filetype* function. If
        it's 'babel', will try to use the OpenBabel library.

    Known formats:

    =========================  ===========
    format                     short name
    =========================  ===========
    GPAW restart-file          gpw
    Dacapo netCDF output file  dacapo
    Old ASE netCDF trajectory  nc
    Virtual Nano Lab file      vnl
    ASE pickle trajectory      traj
    GPAW text output           gpaw-text
    CUBE file                  cube
    XCrySDen Structure File    xsf  
    Dacapo text output         dacapo-text
    XYZ-file                   xyz
    VASP POSCAR/CONTCAR file   vasp
    Protein Data Bank          pdb
    VTK XML Image Data         vti
    VTK XML Structured Grid    vts
    VTK XML Unstructured Grid  vtu
    =========================  ===========

    """
    p = filename.rfind('@')
    if p != -1:
        try:
            index = string2index(filename[p + 1:])
        except ValueError:
            pass
        else:
            filename = filename[:p]

    if format is None:
        format = filetype(filename)

    if format.startswith('gpw'):
        import gpaw
        r = gpaw.io.open(filename, 'r')
        positions = r.get('CartesianPositions') * Bohr
        numbers = r.get('AtomicNumbers')
        cell = r.get('UnitCell') * Bohr
        pbc = r.get('BoundaryConditions')
        tags = r.get('Tags')
        magmoms = r.get('MagneticMoments')

        atoms = Atoms(positions=positions,
                      numbers=numbers,
                      cell=cell,
                      pbc=pbc)
        if tags.any():
            atoms.set_tags(tags)
        if magmoms.any():
            atoms.set_initial_magnetic_moments(magmoms)

        return atoms

    if format == 'xyz':
        from ase.io.xyz import read_xyz
        return read_xyz(filename, index)

    if format == 'traj':
        from ase.io.trajectory import read_trajectory
        return read_trajectory(filename, index)

    if format == 'cube':
        from ase.io.cube import read_cube
        return read_cube(filename, index)

    if format == 'nc':
        from ase.io.netcdf import read_netcdf
        return read_netcdf(filename, index)

    if format == 'gpaw-text':
        from ase.io.gpawtext import read_gpaw_text
        return read_gpaw_text(filename, index)

    if format == 'dacapo-text':
        from ase.io.dacapo import read_dacapo_text
        return read_dacapo_text(filename)

    if format == 'dacapo':
        from ase.io.dacapo import read_dacapo
        return read_dacapo(filename)
    
    if format == 'xsf':
        from ase.io.xsf import read_xsf
        return read_xsf(filename, index)

    if format == 'vasp':
        from ase.io.vasp import read_vasp
        return read_vasp(filename)
    
    if format == 'mol':
        from ase.io.mol import read_mol
        return read_mol(filename)

    if format == 'pdb':
        from ase.io.pdb import read_pdb
        return read_pdb(filename)

    if format == 'cif':
        from ase.io.cif import read_cif
        return read_cif(filename)

    if format == 'babel':
        from ase.io.babel import read_babel
        return read_babel(filename, index=index)

    if format == 'vti':
        from ase.io.vtkxml import read_vti
        return read_vti(filename)

    if format == 'vts':
        from ase.io.vtkxml import read_vts
        return read_vts(filename)

    if format == 'vtu':
        from ase.io.vtkxml import read_vtu
        return read_vtu(filename)

    if format == 'iwm':
        from ase.io.iwm import read_iwm
        return read_iwm(filename)

    if format == 'Cmdft':
        from ase.io.cmdft import read_I_info
        return read_I_info(filename)

    raise RuntimeError('That can *not* happen!')
Example #23
0
    # Get the system information
    host_info = get_platform_info()

    # Set the random seed
    np.random.seed(args.random)
    rng = np.random.RandomState(args.random)

    # Download the QM9 dataset and get the molecule of interest
    qm9_path = get_qm9_path()
    with gzip.open(qm9_path, 'rt') as fp:
        for _, d in zip(range(args.mol), fp):
            pass
        mol_info = json.loads(d)

    # Parse the molecule coordinates into an ASE object
    atoms = next(read_xyz(StringIO(mol_info['xyz'])))

    # Open an experiment directory
    start_time = datetime.utcnow()
    out_dir = os.path.join('runs', f'{start_time.strftime("%d%b%y-%H%M%S")}')
    os.makedirs(out_dir)

    # Save the parameters and host information
    with open(os.path.join(out_dir, 'run_params.json'), 'w') as fp:
        json.dump(run_params, fp, indent=2)
    with open(os.path.join(out_dir, 'host_info.json'), 'w') as fp:
        json.dump(host_info, fp, indent=2)

    # Initialize the ASE calculator
    calc = Psi4(memory='500MB', **_fidelity[args.fidelity])
Example #24
0
    "-f",
    "--format",
    action="store",
    type="string",
    default="xyz",
    help="format of the output file: POSCAR, (xyz in preparation)")

(options, args) = parser.parse_args()

print_debug = False

if (num < 2):
    parser.print_help()
else:

    file_xyz = read_xyz(input_file, slice(0, None, 1))
    natoms = file_xyz[0].get_number_of_atoms()

    # if possible read unit-cell
    if (os.path.isfile(options.cell)):
        cell = [[], [], []]
        f = open(options.cell, "r")
        ls = f.read().splitlines()
        for i in range(3):
            l = ls[i].split()
            cell[i] = [float(l[0]), float(l[1]), float(l[2])]
        for step in file_xyz:
            step.set_cell(cell)
            step.set_pbc([True, True, True])

    # --------------- Atoms -----------------
butadiene = """10

C       3.649801161546418      5.442281389577507      3.863313703750026
C       5.051651240044169      5.368220758269772      4.162165876906096
C       5.750174626862403      4.162261915959347      4.240449977068684
C       7.150130182125531      4.155384186721486      4.537328602062397
H       3.218154657585170      4.565210696328925      3.522601038049320
H       3.077656122062729      6.375092902842770      3.826039498180272
H       5.478464901706067      6.370680001794822      4.422235395756437
H       5.320549047980879      3.220584852467720      3.974551561510350
H       7.723359150977955      3.224855971783890      4.574146712279462
H       7.580803493981530      5.034479218283977      4.877211530909463
"""

h = 0.3
atoms = Cluster(read_xyz(StringIO.StringIO(butadiene)))
atoms.minimal_box(3., h)
atoms.set_calculator(GPAW(h=h))
if 0:
    dyn = FIRE(atoms)
    dyn.run(fmax=0.05)
    atoms.write('butadiene.xyz')

vibname = 'fcvib'
vib = Vibrations(atoms, name=vibname)
vib.run()

# Modul
a = FranckCondon(atoms, vibname, minfreq=250)

# excited state forces
Example #26
0
from ase.io.xyz import read_xyz

from gpaw import GPAW
from gpaw.mixer import Mixer
from gpaw import ConvergenceError
from gpaw.mpi import rank

from gpaw.eigensolvers.rmm_diis import RMM_DIIS

from gpaw import setup_paths

# Use setups from the $PWD and $PWD/.. first
setup_paths.insert(0, '.')
setup_paths.insert(0, '../')

atoms = read_xyz('../Au102_revised.xyz')

prefix = 'Au_cluster'
L = 32.0
atoms.set_cell((L,L,L),scale_atoms=False)
atoms.center()
atoms.set_pbc(1)
r = [1, 1, 1]
atoms = atoms.repeat(r)
n = [240 * ri for ri in r]
# nbands (>=1683) is the number of bands per cluster
nbands = 3*6*6*16 # 1728
for ri in r: nbands = nbands*ri
mixer = Mixer(beta=0.1, nmaxold=5, weight=100.0)
# the next three lines decrease memory usage
es = RMM_DIIS(keep_htpsit=False)
butadiene = """10

C       3.649801161546418      5.442281389577507      3.863313703750026
C       5.051651240044169      5.368220758269772      4.162165876906096
C       5.750174626862403      4.162261915959347      4.240449977068684
C       7.150130182125531      4.155384186721486      4.537328602062397
H       3.218154657585170      4.565210696328925      3.522601038049320
H       3.077656122062729      6.375092902842770      3.826039498180272
H       5.478464901706067      6.370680001794822      4.422235395756437
H       5.320549047980879      3.220584852467720      3.974551561510350
H       7.723359150977955      3.224855971783890      4.574146712279462
H       7.580803493981530      5.034479218283977      4.877211530909463
"""

h = 0.3
atoms = Cluster(read_xyz(StringIO.StringIO(butadiene)))
atoms.minimal_box(3.0, h)
atoms.set_calculator(GPAW(h=h))
if 0:
    dyn = FIRE(atoms)
    dyn.run(fmax=0.05)
    atoms.write("butadiene.xyz")

vibname = "fcvib"
vib = Vibrations(atoms, name=vibname)
vib.run()

# Modul
a = FranckCondon(atoms, vibname, minfreq=250)

# excited state forces
Example #28
0
def train_schnet(
    model: Union[TorchMessage, torch.nn.Module, Path],
    database: Dict[str, float],
    num_epochs: int,
    reset_weights: bool = True,
    property_name: str = 'output',
    test_set: Optional[List[str]] = None,
    device: str = 'cpu',
    batch_size: int = 32,
    validation_split: float = 0.1,
    bootstrap: bool = False,
    random_state: int = 1,
    learning_rate: float = 1e-3,
    patience: int = None,
    timeout: float = None
) -> Union[Tuple[TorchMessage, pd.DataFrame], Tuple[TorchMessage, pd.DataFrame,
                                                    List[float]]]:
    """Train a SchNet model

    Args:
        model: Model to be retrained
        database: Mapping of XYZ format structure to property
        num_epochs: Number of training epochs
        property_name: Name of the property being predicted
        reset_weights: Whether to re-initialize weights before training, or start training from previous
        test_set: Hold-out set. If provided, function will return the performance of the model on those weights
        device: Device (e.g., 'cuda', 'cpu') used for training
        batch_size: Batch size during training
        validation_split: Fraction to training set to use for the validation loss
        bootstrap: Whether to take a bootstrap sample of the training set before training
        random_state: Random seed used for generating validation set and bootstrap sampling
        learning_rate: Initial learning rate for optimizer
        patience: Patience until learning rate is lowered. Default: epochs / 8
        timeout: Maximum training time in seconds
    Returns:
        - model: Retrained model
        - history: Training history
        - test_pred: Predictions on ``test_set``, if provided
    """

    # Make sure the models are converted to Torch models
    if isinstance(model, TorchMessage):
        model = model.get_model(device)
    elif isinstance(model, (Path, str)):
        model = torch.load(model,
                           map_location='cpu')  # Load to main memory first

    # If desired, re-initialize weights
    if reset_weights:
        for module in model.modules():
            if hasattr(module, 'reset_parameters'):
                module.reset_parameters()

    # Separate the database into molecules and properties
    xyz, y = zip(*database.items())
    xyz = np.array(xyz)
    y = np.array(y)

    # Convert the xyz files to ase Atoms
    atoms = np.array([next(read_xyz(StringIO(x), slice(None))) for x in xyz])

    # Make the training and validation splits
    rng = np.random.RandomState(random_state)
    train_split = rng.rand(len(xyz)) > validation_split
    train_X = atoms[train_split]
    train_y = y[train_split]
    valid_X = atoms[~train_split]
    valid_y = y[~train_split]

    # Perform a bootstrap sample of the training data
    if bootstrap:
        sample = rng.choice(len(train_X), size=(len(train_X), ), replace=True)
        train_X = train_X[sample]
        train_y = train_y[sample]

    # Start the training process
    with TemporaryDirectory() as td:
        # Save the data to an ASE Atoms database
        train_file = os.path.join(td, 'train_data.db')
        db = AtomsData(train_file, available_properties=[property_name])
        db.add_systems(train_X, [{property_name: i} for i in train_y])
        train_loader = AtomsLoader(db, batch_size=batch_size, shuffle=True)

        valid_file = os.path.join(td, 'valid_data.db')
        db = AtomsData(valid_file, available_properties=[property_name])
        db.add_systems(valid_X, [{property_name: i} for i in valid_y])
        valid_loader = AtomsLoader(db, batch_size=batch_size)

        # Make the trainer
        opt = optim.Adam(model.parameters(), lr=learning_rate)

        loss = trn.build_mse_loss(['delta'])
        metrics = [spk.metrics.MeanSquaredError('delta')]
        if patience is None:
            patience = num_epochs // 8
        hooks = [
            trn.CSVHook(log_path=td, metrics=metrics),
            trn.ReduceLROnPlateauHook(opt,
                                      patience=patience,
                                      factor=0.8,
                                      min_lr=1e-6,
                                      stop_after_min=True)
        ]

        if timeout is not None:
            hooks.append(TimeoutHook(timeout))

        trainer = trn.Trainer(
            model_path=td,
            model=model,
            hooks=hooks,
            loss_fn=loss,
            optimizer=opt,
            train_loader=train_loader,
            validation_loader=valid_loader,
            checkpoint_interval=num_epochs + 1  # Turns off checkpointing
        )

        trainer.train(device, n_epochs=num_epochs)

        # Load in the best model
        model = torch.load(os.path.join(td, 'best_model'))

        # If desired, report the performance on a test set
        test_pred = None
        if test_set is not None:
            test_pred = evaluate_schnet([model],
                                        test_set,
                                        property_name=property_name,
                                        batch_size=batch_size,
                                        device=device)

        # Move the model off of the GPU to save memory
        if 'cuda' in device:
            model.to('cpu')

        # Load in the training results
        train_results = pd.read_csv(os.path.join(td, 'log.csv'))

        # Return the results
        if test_pred is None:
            return TorchMessage(model), train_results
        else:
            return TorchMessage(model), train_results, test_pred[:, 0].tolist()
Example #29
0
def read(filename, index=-1, format=None):
    """Read Atoms object(s) from file.

    filename: str
        Name of the file to read from.
    index: int or slice
        If the file contains several configurations, the last configuration
        will be returned by default.  Use index=n to get configuration
        number n (counting from zero).
    format: str
        Used to specify the file-format.  If not given, the
        file-format will be guessed by the *filetype* function.

    Known formats:

    =========================  ===========
    format                     short name
    =========================  ===========
    GPAW restart-file          gpw
    Dacapo netCDF output file  dacapo
    Old ASE netCDF trajectory  nc
    Virtual Nano Lab file      vnl
    ASE pickle trajectory      traj
    ASE bundle trajectory      bundle
    GPAW text output           gpaw-text
    CUBE file                  cube
    XCrySDen Structure File    xsf
    Dacapo text output         dacapo-text
    XYZ-file                   xyz
    VASP POSCAR/CONTCAR file   vasp
    VASP OUTCAR file           vasp_out
    SIESTA STRUCT file         struct_out
    ABINIT input file          abinit
    V_Sim ascii file           v_sim
    Protein Data Bank          pdb
    CIF-file                   cif
    FHI-aims geometry file     aims
    FHI-aims output file       aims_out
    VTK XML Image Data         vti
    VTK XML Structured Grid    vts
    VTK XML Unstructured Grid  vtu
    TURBOMOLE coord file       tmol
    TURBOMOLE gradient file    tmol-gradient
    exciting input             exi
    AtomEye configuration      cfg
    WIEN2k structure file      struct
    DftbPlus input file        dftb
    CASTEP geom file           cell
    CASTEP output file         castep
    CASTEP trajectory file     geom
    ETSF format                etsf.nc
    DFTBPlus GEN format        gen
    CMR db/cmr-file            db
    CMR db/cmr-file            cmr
    LAMMPS dump file           lammps
    =========================  ===========

    """
    if isinstance(filename, str):
        p = filename.rfind('@')
        if p != -1:
            try:
                index = string2index(filename[p + 1:])
            except ValueError:
                pass
            else:
                filename = filename[:p]

    if isinstance(index, str):
        index = string2index(index)

    if format is None:
        format = filetype(filename)

    if format.startswith('gpw'):
        import gpaw
        r = gpaw.io.open(filename, 'r')
        positions = r.get('CartesianPositions') * Bohr
        numbers = r.get('AtomicNumbers')
        cell = r.get('UnitCell') * Bohr
        pbc = r.get('BoundaryConditions')
        tags = r.get('Tags')
        magmoms = r.get('MagneticMoments')
        energy = r.get('PotentialEnergy') * Hartree

        if r.has_array('CartesianForces'):
            forces = r.get('CartesianForces') * Hartree / Bohr
        else:
            forces = None

        atoms = Atoms(positions=positions, numbers=numbers, cell=cell, pbc=pbc)
        if tags.any():
            atoms.set_tags(tags)

        if magmoms.any():
            atoms.set_initial_magnetic_moments(magmoms)
        else:
            magmoms = None

        atoms.calc = SinglePointCalculator(energy, forces, None, magmoms,
                                           atoms)

        return atoms

    if format == 'castep':
        from ase.io.castep import read_castep
        return read_castep(filename, index)

    if format == 'castep_cell':
        import ase.io.castep
        return ase.io.castep.read_cell(filename, index)

    if format == 'castep_geom':
        import ase.io.castep
        return ase.io.castep.read_geom(filename, index)

    if format == 'exi':
        from ase.io.exciting import read_exciting
        return read_exciting(filename, index)

    if format == 'xyz':
        from ase.io.xyz import read_xyz
        return read_xyz(filename, index)

    if format == 'traj':
        from ase.io.trajectory import read_trajectory
        return read_trajectory(filename, index)

    if format == 'bundle':
        from ase.io.bundletrajectory import read_bundletrajectory
        return read_bundletrajectory(filename, index)

    if format == 'cube':
        from ase.io.cube import read_cube
        return read_cube(filename, index)

    if format == 'nc':
        from ase.io.netcdf import read_netcdf
        return read_netcdf(filename, index)

    if format == 'gpaw-text':
        from ase.io.gpawtext import read_gpaw_text
        return read_gpaw_text(filename, index)

    if format == 'dacapo-text':
        from ase.io.dacapo import read_dacapo_text
        return read_dacapo_text(filename)

    if format == 'dacapo':
        from ase.io.dacapo import read_dacapo
        return read_dacapo(filename)

    if format == 'xsf':
        from ase.io.xsf import read_xsf
        return read_xsf(filename, index)

    if format == 'vasp':
        from ase.io.vasp import read_vasp
        return read_vasp(filename)

    if format == 'vasp_out':
        from ase.io.vasp import read_vasp_out
        return read_vasp_out(filename, index)

    if format == 'abinit':
        from ase.io.abinit import read_abinit
        return read_abinit(filename)

    if format == 'v_sim':
        from ase.io.v_sim import read_v_sim
        return read_v_sim(filename)

    if format == 'mol':
        from ase.io.mol import read_mol
        return read_mol(filename)

    if format == 'pdb':
        from ase.io.pdb import read_pdb
        return read_pdb(filename, index)

    if format == 'cif':
        from ase.io.cif import read_cif
        return read_cif(filename, index)

    if format == 'struct':
        from ase.io.wien2k import read_struct
        return read_struct(filename)

    if format == 'struct_out':
        from ase.io.siesta import read_struct
        return read_struct(filename)

    if format == 'vti':
        from ase.io.vtkxml import read_vti
        return read_vti(filename)

    if format == 'vts':
        from ase.io.vtkxml import read_vts
        return read_vts(filename)

    if format == 'vtu':
        from ase.io.vtkxml import read_vtu
        return read_vtu(filename)

    if format == 'aims':
        from ase.io.aims import read_aims
        return read_aims(filename)

    if format == 'aims_out':
        from ase.io.aims import read_aims_output
        return read_aims_output(filename, index)

    if format == 'iwm':
        from ase.io.iwm import read_iwm
        return read_iwm(filename)

    if format == 'Cmdft':
        from ase.io.cmdft import read_I_info
        return read_I_info(filename)

    if format == 'tmol':
        from ase.io.turbomole import read_turbomole
        return read_turbomole(filename)

    if format == 'tmol-gradient':
        from ase.io.turbomole import read_turbomole_gradient
        return read_turbomole_gradient(filename)

    if format == 'cfg':
        from ase.io.cfg import read_cfg
        return read_cfg(filename)

    if format == 'dftb':
        from ase.io.dftb import read_dftb
        return read_dftb(filename)

    if format == 'sdf':
        from ase.io.sdf import read_sdf
        return read_sdf(filename)

    if format == 'etsf':
        from ase.io.etsf import ETSFReader
        return ETSFReader(filename).read_atoms()

    if format == 'gen':
        from ase.io.gen import read_gen
        return read_gen(filename)

    if format == 'db':
        from ase.io.cmr_io import read_db
        return read_db(filename, index)

    if format == 'lammps':
        from ase.io.lammps import read_lammps_dump
        return read_lammps_dump(filename, index)

    raise RuntimeError('File format descriptor ' + format + ' not recognized!')
Example #30
0
def read(filename, index=-1, format=None):
    """Read Atoms object(s) from file.

    filename: str
        Name of the file to read from.
    index: int or slice
        If the file contains several configurations, the last configuration
        will be returned by default.  Use index=n to get configuration
        number n (counting from zero).
    format: str
        Used to specify the file-format.  If not given, the
        file-format will be guessed by the *filetype* function.

    Known formats:

    =========================  ===========
    format                     short name
    =========================  ===========
    GPAW restart-file          gpw
    Dacapo netCDF output file  dacapo
    Old ASE netCDF trajectory  nc
    Virtual Nano Lab file      vnl
    ASE pickle trajectory      traj
    ASE bundle trajectory      bundle
    GPAW text output           gpaw-text
    CUBE file                  cube
    XCrySDen Structure File    xsf
    Dacapo text output         dacapo-text
    XYZ-file                   xyz
    VASP POSCAR/CONTCAR file   vasp
    VASP OUTCAR file           vasp_out
    SIESTA STRUCT file         struct_out
    ABINIT input file          abinit
    V_Sim ascii file           v_sim
    Protein Data Bank          pdb
    CIF-file                   cif
    FHI-aims geometry file     aims
    FHI-aims output file       aims_out
    VTK XML Image Data         vti
    VTK XML Structured Grid    vts
    VTK XML Unstructured Grid  vtu
    TURBOMOLE coord file       tmol
    TURBOMOLE gradient file    tmol-gradient
    exciting input             exi
    AtomEye configuration      cfg
    WIEN2k structure file      struct
    DftbPlus input file        dftb
    CASTEP geom file           cell
    CASTEP output file         castep
    CASTEP trajectory file     geom
    ETSF format                etsf.nc
    DFTBPlus GEN format        gen
    CMR db/cmr-file            db
    CMR db/cmr-file            cmr
    LAMMPS dump file           lammps
    =========================  ===========

    """
    if isinstance(filename, str):
        p = filename.rfind('@')
        if p != -1:
            try:
                index = string2index(filename[p + 1:])
            except ValueError:
                pass
            else:
                filename = filename[:p]

    if isinstance(index, str):
        index = string2index(index)

    if format is None:
        format = filetype(filename)

    if format.startswith('gpw'):
        import gpaw
        r = gpaw.io.open(filename, 'r')
        positions = r.get('CartesianPositions') * Bohr
        numbers = r.get('AtomicNumbers')
        cell = r.get('UnitCell') * Bohr
        pbc = r.get('BoundaryConditions')
        tags = r.get('Tags')
        magmoms = r.get('MagneticMoments')
        energy = r.get('PotentialEnergy') * Hartree

        if r.has_array('CartesianForces'):
            forces = r.get('CartesianForces') * Hartree / Bohr
        else:
            forces = None

        atoms = Atoms(positions=positions,
                      numbers=numbers,
                      cell=cell,
                      pbc=pbc)
        if tags.any():
            atoms.set_tags(tags)

        if magmoms.any():
            atoms.set_initial_magnetic_moments(magmoms)
        else:
            magmoms = None

        atoms.calc = SinglePointCalculator(energy, forces, None, magmoms,
                                           atoms)

        return atoms

    if format == 'castep':
        from ase.io.castep import read_castep
        return read_castep(filename, index)

    if format == 'castep_cell':
        import ase.io.castep
        return ase.io.castep.read_cell(filename, index)

    if format == 'castep_geom':
        import ase.io.castep
        return ase.io.castep.read_geom(filename, index)

    if format == 'exi':
        from ase.io.exciting import read_exciting
        return read_exciting(filename, index)

    if format == 'xyz':
        from ase.io.xyz import read_xyz
        return read_xyz(filename, index)

    if format == 'traj':
        from ase.io.trajectory import read_trajectory
        return read_trajectory(filename, index)

    if format == 'bundle':
        from ase.io.bundletrajectory import read_bundletrajectory
        return read_bundletrajectory(filename, index)

    if format == 'cube':
        from ase.io.cube import read_cube
        return read_cube(filename, index)

    if format == 'nc':
        from ase.io.netcdf import read_netcdf
        return read_netcdf(filename, index)

    if format == 'gpaw-text':
        from ase.io.gpawtext import read_gpaw_text
        return read_gpaw_text(filename, index)

    if format == 'dacapo-text':
        from ase.io.dacapo import read_dacapo_text
        return read_dacapo_text(filename)

    if format == 'dacapo':
        from ase.io.dacapo import read_dacapo
        return read_dacapo(filename)

    if format == 'xsf':
        from ase.io.xsf import read_xsf
        return read_xsf(filename, index)

    if format == 'vasp':
        from ase.io.vasp import read_vasp
        return read_vasp(filename)

    if format == 'vasp_out':
        from ase.io.vasp import read_vasp_out
        return read_vasp_out(filename, index)

    if format == 'abinit':
        from ase.io.abinit import read_abinit
        return read_abinit(filename)

    if format == 'v_sim':
        from ase.io.v_sim import read_v_sim
        return read_v_sim(filename)

    if format == 'mol':
        from ase.io.mol import read_mol
        return read_mol(filename)

    if format == 'pdb':
        from ase.io.pdb import read_pdb
        return read_pdb(filename, index)

    if format == 'cif':
        from ase.io.cif import read_cif
        return read_cif(filename, index)

    if format == 'struct':
        from ase.io.wien2k import read_struct
        return read_struct(filename)

    if format == 'struct_out':
        from ase.io.siesta import read_struct
        return read_struct(filename)

    if format == 'vti':
        from ase.io.vtkxml import read_vti
        return read_vti(filename)

    if format == 'vts':
        from ase.io.vtkxml import read_vts
        return read_vts(filename)

    if format == 'vtu':
        from ase.io.vtkxml import read_vtu
        return read_vtu(filename)

    if format == 'aims':
        from ase.io.aims import read_aims
        return read_aims(filename)

    if format == 'aims_out':
        from ase.io.aims import read_aims_output
        return read_aims_output(filename, index)

    if format == 'iwm':
        from ase.io.iwm import read_iwm
        return read_iwm(filename)

    if format == 'Cmdft':
        from ase.io.cmdft import read_I_info
        return read_I_info(filename)

    if format == 'tmol':
        from ase.io.turbomole import read_turbomole
        return read_turbomole(filename)

    if format == 'tmol-gradient':
        from ase.io.turbomole import read_turbomole_gradient
        return read_turbomole_gradient(filename)

    if format == 'cfg':
        from ase.io.cfg import read_cfg
        return read_cfg(filename)

    if format == 'dftb':
        from ase.io.dftb import read_dftb
        return read_dftb(filename)

    if format == 'sdf':
        from ase.io.sdf import read_sdf
        return read_sdf(filename)

    if format == 'etsf':
        from ase.io.etsf import ETSFReader
        return ETSFReader(filename).read_atoms()

    if format == 'gen':
        from ase.io.gen import read_gen
        return read_gen(filename)

    if format == 'db':
        from ase.io.cmr_io import read_db
        return read_db(filename, index)

    if format == 'lammps':
        from ase.io.lammps import read_lammps_dump
        return read_lammps_dump(filename, index)

    raise RuntimeError('File format descriptor '+format+' not recognized!')
Example #31
0
from ase.io.xyz import read_xyz

from gpaw import GPAW
from gpaw.mixer import Mixer
from gpaw import ConvergenceError
from gpaw.mpi import rank

from gpaw.eigensolvers.rmm_diis_old import RMM_DIIS

from gpaw import setup_paths

# Use setups from the $PWD and $PWD/.. first
setup_paths.insert(0, '.')
setup_paths.insert(0, '../')

atoms = read_xyz('../Au102_revised.xyz')

prefix = 'Au_cluster'
L = 32.0
atoms.set_cell((L,L,L),scale_atoms=False)
atoms.center()
atoms.set_pbc(1)
r = [1, 1, 1]
atoms = atoms.repeat(r)
n = [240 * ri for ri in r]
# nbands (>=1683) is the number of bands per cluster
nbands = 3*6*6*16 # 1728
for ri in r: nbands = nbands*ri
mixer = Mixer(beta=0.1, nmaxold=5, weight=100.0)
# the next three lines decrease memory usage
es = RMM_DIIS(keep_htpsit=False)
Example #32
0
natoms = 0

if(num < 2):
    parser.print_help()
else:
    # >>>>>>>>>>>>>>>>>>>>> READ GEOMETRY <<<<<<<<<<<<<<<<<<<<
    if(iformat == "geometry.in"):
        atoms = read_aims(ifile)
#    elif(iformat == "cube"):
#        atoms = read_cube(sys.argv[num-1])
#    elif(iformat == "xsf"):
#        atoms = read_xsf(sys.argv[num-1],read_data=True)
    elif(iformat == "POSCAR"):
        atoms = read_vasp(ifile)
    elif(iformat == "xyz"):
        atoms           = read_xyz(ifile)
        cfile           = options.xyzcell
        ThereIsCellFile = os.path.isfile(cfile)
        if(ThereIsCellFile):
            cell = [[],[],[]]
            f    = open(cfile, "r")
            ls   = f.read().splitlines()
            for i in range(3):
                l = ls[i].split()
                cell[i] = [float(l[0]), float(l[1]), float(l[2])]
            atoms.set_cell(cell)
            atoms.set_pbc([True,True,True])


    if(options.vector == True):
        a = options.atoms