Ejemplo n.º 1
0
def set_multiplicity(mol, multiplicity, charge, radical_map=None):
    """
    Set the multiplicity of `mol` to `multiplicity` and change radicals as needed
    if a `radical_map`, which is an RMG Molecule object with the same atom order, is given,
    it'll be used to set radicals (useful if bond orders aren't known for a molecule)
    """
    mol.multiplicity = multiplicity
    if radical_map is not None:
        if not isinstance(radical_map, Molecule):
            raise TypeError(
                'radical_map sent to set_multiplicity() has to be a Molecule object. Got {0}'
                .format(type(radical_map)))
        set_radicals_by_map(mol, radical_map)
    radicals = mol.getRadicalCount()
    if mol.multiplicity != radicals + 1:
        # this is not the trivial "multiplicity = number of radicals + 1" case
        # either the number of radicals was not identified correctly from the 3D structure (i.e., should be lone pairs),
        # or their spin isn't determined correctly
        if mol.multiplicity > radicals + 1:
            # there are sites that should have radicals, but were'nt identified as such.
            # try adding radicals according to missing valances
            add_rads_by_atom_valance(mol)
            if mol.multiplicity > radicals + 1:
                # still problematic, currently there's no automated solution to this case, raise an error
                raise SpeciesError(
                    'A multiplicity of {0} was given, but only {1} radicals were identified. '
                    'Cannot infer 2D graph representation for this species.\nMore info:{2}\n{3}'
                    .format(mol.multiplicity, radicals, mol.toSMILES(),
                            mol.toAdjacencyList()))
        if len(mol.atoms) == 1 and mol.multiplicity == 1 and mol.atoms[
                0].radicalElectrons == 4:
            # This is a singlet atomic C or Si
            mol.atoms[0].radicalElectrons = 0
            mol.atoms[0].lonePairs = 2
        if mol.multiplicity < radicals + 1:
            # make sure all cabene and nitrene sites, if exist, have lone pairs rather than two unpaired electrons
            for atom in mol.atoms:
                if atom.radicalElectrons == 2:
                    atom.radicalElectrons = 0
                    atom.lonePairs += 1
    # final check: an even number of radicals results in an odd multiplicity, and vice versa
    if divmod(mol.multiplicity, 2)[1] == divmod(radicals, 2)[1]:
        if not charge:
            raise SpeciesError(
                'Number of radicals ({0}) and multiplicity ({1}) for {2} do not match.\n{3}'
                .format(radicals, mol.multiplicity, mol.toSMILES(),
                        mol.toAdjacencyList()))
        else:
            logging.warning(
                'Number of radicals ({0}) and multiplicity ({1}) for {2} do not match. It might be OK since '
                'this species is charged and charged molecules are currently not percieved well in ARC.'
                '\n{3}'.format(radicals, mol.multiplicity, mol.toSMILES(),
                               mol.toAdjacencyList()))
Ejemplo n.º 2
0
def molecules_from_xyz(xyz, multiplicity=None, charge=0):
    """
    Creating RMG:Molecule objects from xyz with correct atom labeling
    `xyz` is in a string format
    returns `s_mol` (with only single bonds) and `b_mol` (with best guesses for bond orders)
    This function is based on the MolGraph.perceive_smiles method
    Returns None for b_mol is unsuccessful to infer bond orders
    If `multiplicity` is given, the returned species multiplicity will be set to it.
    """
    if xyz is None:
        return None, None
    if not isinstance(xyz, (str, unicode)):
        raise SpeciesError('xyz must be a string format, got: {0}'.format(
            type(xyz)))
    xyz = standardize_xyz_string(xyz)
    coords, symbols, _, _, _ = get_xyz_matrix(xyz)
    mol_graph = MolGraph(symbols=symbols, coords=coords)
    infered_connections = mol_graph.infer_connections()
    if infered_connections:
        mol_s1 = mol_graph.to_rmg_mol(
        )  # An RMG Molecule with single bonds, atom order corresponds to xyz
    else:
        mol_s1, _ = s_bonds_mol_from_xyz(xyz)
    if mol_s1 is None:
        logging.error(
            'Could not create a 2D graph representation from xyz:\n{0}'.format(
                xyz))
        return None, None
    mol_s1_updated = update_molecule(mol_s1, to_single_bonds=True)
    pybel_mol = xyz_to_pybel_mol(xyz)
    if pybel_mol is not None:
        inchi = pybel_to_inchi(pybel_mol)
        mol_bo = rmg_mol_from_inchi(
            inchi
        )  # An RMG Molecule with bond orders, but without preserved atom order
        if mol_bo is not None:
            if multiplicity is not None:
                try:
                    set_multiplicity(mol_bo, multiplicity, charge)
                except SpeciesError as e:
                    logging.warning(
                        'Cannot infer 2D graph connectivity, failed to set species multiplicity with the '
                        'following error:\n{0}'.format(e.message))
                    return None, None
            mol_s1_updated.multiplicity = mol_bo.multiplicity
            order_atoms(ref_mol=mol_s1_updated, mol=mol_bo)
            try:
                set_multiplicity(mol_s1_updated,
                                 mol_bo.multiplicity,
                                 charge,
                                 radical_map=mol_bo)
            except SpeciesError as e:
                logging.warning(
                    'Cannot infer 2D graph connectivity, failed to set species multiplicity with the '
                    'following error:\n{0}'.format(e.message))
                return mol_s1_updated, None
    else:
        mol_bo = None
    s_mol, b_mol = mol_s1_updated, mol_bo
    return s_mol, b_mol
Ejemplo n.º 3
0
def xyz_to_pybel_mol(xyz):
    """
    Convert xyz in string format into an Open Babel molecule object
    """
    if not isinstance(xyz, (str, unicode)):
        raise SpeciesError('xyz must be a string format, got: {0}'.format(type(xyz)))
    try:
        pybel_mol = pybel.readstring('xyz', xyz_string_to_xyz_file_format(xyz))
    except IOError:
        return None
    return pybel_mol
Ejemplo n.º 4
0
def s_bonds_mol_from_xyz(xyz):
    """
    Create a single bonded molecule from xyz using RMG's connectTheDots()
    """
    mol = Molecule()
    coordinates = list()
    if not isinstance(xyz, (str, unicode)):
        raise SpeciesError('xyz must be a string format, got: {0}'.format(type(xyz)))
    for line in xyz.split('\n'):
        if line:
            atom = Atom(element=str(line.split()[0]))
            coordinates.append([float(line.split()[1]), float(line.split()[2]), float(line.split()[3])])
            atom.coords = np.array(coordinates[-1], np.float64)
            mol.addAtom(atom)
    mol.connectTheDots()  # only adds single bonds, but we don't care
    return mol, coordinates
Ejemplo n.º 5
0
def molecules_from_xyz(xyz):
    """
    Creating RMG:Molecule objects from xyz with correct atom labeling
    `xyz` is in a string format
    returns `s_mol` (with only single bonds) and `b_mol` (with best guesses for bond orders)
    This function is based on the MolGraph.perceive_smiles method
    Returns None for b_mol is unsuccessful to infer bond orders
    """
    if xyz is None:
        return None, None
    if not isinstance(xyz, (str, unicode)):
        raise SpeciesError('xyz must be a string format, got: {0}'.format(
            type(xyz)))
    xyz = standardize_xyz_string(xyz)
    coords, symbols, _, _, _ = get_xyz_matrix(xyz)
    mol_graph = MolGraph(symbols=symbols, coords=coords)
    infered_connections = mol_graph.infer_connections()
    if infered_connections:
        mol_s1 = mol_graph.to_rmg_mol(
        )  # An RMG Molecule with single bonds, atom order corresponds to xyz
    else:
        mol_s1, _ = s_bonds_mol_from_xyz(xyz)
    if mol_s1 is None:
        logging.error(
            'Could not create a 2D graph representation from xyz:\n{0}'.format(
                xyz))
        return None, None
    mol_s1_updated = update_molecule(mol_s1, to_single_bonds=True)
    pybel_mol = xyz_to_pybel_mol(xyz)
    if pybel_mol is not None:
        inchi = pybel_to_inchi(pybel_mol)
        mol_bo = rmg_mol_from_inchi(
            inchi
        )  # An RMG Molecule with bond orders, but without preserved atom order
    else:
        mol_bo = None
    order_atoms(ref_mol=mol_s1_updated, mol=mol_bo)
    s_mol, b_mol = mol_s1_updated, mol_bo
    return s_mol, b_mol
Ejemplo n.º 6
0
def set_rdkit_dihedrals(conf, rd_mol, index_map, rd_scan, deg_increment=None, deg_abs=None):
    """
    A helper function for setting dihedral angles
    `conf` is the RDKit conformer with the current xyz information
    `rd_mol` is the RDKit molecule
    `indx_map` is an atom index mapping dictionary, keys are xyz_index, values are rd_index
    `rd_scan` is the torsion scan atom indices corresponding to the RDKit conformer indices
    Either `deg_increment` or `deg_abs` must be specified for the dihedral increment
    Returns xyz in an array format ordered according to the map,
    the elements in the xyz should be identified by the calling function from the context
    """
    if deg_increment is None and deg_abs is None:
        raise SpeciesError('Cannot set dihedral without either a degree increment or an absolute degree')
    if deg_increment is not None:
        deg0 = rdMT.GetDihedralDeg(conf, rd_scan[0], rd_scan[1], rd_scan[2], rd_scan[3])  # get original dihedral
        deg = deg0 + deg_increment
    else:
        deg = deg_abs
    rdMT.SetDihedralDeg(conf, rd_scan[0], rd_scan[1], rd_scan[2], rd_scan[3], deg)
    new_xyz = list()
    for i in range(rd_mol.GetNumAtoms()):
        new_xyz.append([conf.GetAtomPosition(index_map[i]).x, conf.GetAtomPosition(index_map[i]).y,
                        conf.GetAtomPosition(index_map[i]).z])
    return new_xyz
Ejemplo n.º 7
0
 def _check_job_ess_status(self):
     """
     Check the status of the job ran by the electronic structure software (ESS)
     Possible statuses: `initializing`, `running`, `errored: {error type / message}`, `unconverged`, `done`
     """
     if os.path.exists(self.local_path_to_output_file):
         os.remove(self.local_path_to_output_file)
     if os.path.exists(self.local_path_to_orbitals_file):
         os.remove(self.local_path_to_orbitals_file)
     if self.ess_settings['ssh']:
         self._download_output_file()
     with open(self.local_path_to_output_file, 'rb') as f:
         lines = f.readlines()
         if self.software == 'gaussian':
             for line in lines[-1:-20:-1]:
                 if 'Normal termination of Gaussian' in line:
                     break
             else:
                 for line in lines[::-1]:
                     if 'Error' in line or 'NtrErr' in line or 'Erroneous' in line or 'malloc' in line\
                             or 'galloc' in line:
                         reason = ''
                         if 'l9999.exe' in line or 'l103.exe' in line:
                             return 'unconverged'
                         elif 'l502.exe' in line:
                             return 'unconverged SCF'
                         elif 'l103.exe' in line:
                             return 'l103 internal coordinate error'
                         elif 'Erroneous write' in line or 'Write error in NtrExt1' in line:
                             reason = 'Ran out of disk space.'
                         elif 'l716.exe' in line:
                             reason = 'Angle in z-matrix outside the allowed range 0 < x < 180.'
                         elif 'l301.exe' in line:
                             reason = 'Input Error. Either charge, multiplicity, or basis set was not specified ' \
                                      'correctly. Or, an atom specified does not match any standard atomic symbol.'
                         elif 'NtrErr Called from FileIO' in line:
                             reason = 'Operation on .chk file was specified, but .chk was not found.'
                         elif 'l101.exe' in line:
                             reason = 'Input Error. The blank line after the coordinate section is missing, ' \
                                      'or charge/multiplicity was not specified correctly.'
                         elif 'l202.exe' in line:
                             reason = 'During the optimization process, either the standard orientation ' \
                                      'or the point group of the molecule has changed.'
                         elif 'l401.exe' in line:
                             reason = 'The projection from the old to the new basis set has failed.'
                         elif 'malloc failed' in line or 'galloc' in line:
                             reason = 'Memory allocation failed (did you ask for too much?)'
                         elif 'A SYNTAX ERROR WAS DETECTED' in line:
                             reason = 'Check .inp carefully for syntax errors in keywords.'
                         return 'errored: {0}; {1}'.format(line, reason)
                 return 'errored: Unknown reason'
             return 'done'
         elif self.software == 'qchem':
             done = False
             error_message = ''
             for line in lines[::-1]:
                 if 'Thank you very much for using Q-Chem' in line:
                     done = True
                 elif 'SCF failed' in line:
                     return 'errored: {0}'.format(line)
                 elif 'error' in line and 'DIIS' not in line:
                     # these are *normal* lines: "SCF converges when DIIS error is below 1.0E-08", or
                     # "Cycle       Energy         DIIS Error"
                     error_message = line
                 elif 'Invalid charge/multiplicity combination' in line:
                     raise SpeciesError('The multiplicity and charge combination for species {0} are wrong.'.format(
                         self.species_name))
                 if 'opt' in self.job_type or 'conformer' in self.job_type or 'ts' in self.job_type:
                     if 'MAXIMUM OPTIMIZATION CYCLES REACHED' in line:
                         return 'errored: unconverged, max opt cycles reached'
                     elif 'OPTIMIZATION CONVERGED' in line and done:  # `done` should already be assigned
                         return 'done'
             if done:
                 return 'done'
             else:
                 if error_message:
                     return 'errored: ' + error_message
                 else:
                     return 'errored: Unknown reason'
         elif self.software == 'molpro':
             for line in lines[::-1]:
                 if 'molpro calculation terminated' in line.lower()\
                         or 'variable memory released' in line.lower():
                     return 'done'
                 elif 'No convergence' in line:
                     return 'unconverged'
                 elif 'A further' in line and 'Mwords of memory are needed' in line and 'Increase memory to' in line:
                     # e.g.: `A further 246.03 Mwords of memory are needed for the triples to run. Increase memory to 996.31 Mwords.`
                     return 'errored: additional memory (mW) required: {0}'.format(line.split()[2])
                 elif 'insufficient memory available - require' in line:
                     # e.g.: `insufficient memory available - require              228765625  have
                     #        62928590
                     #        the request was for real words`
                     # add_mem = (float(line.split()[-2]) - float(prev_line.split()[0])) / 1e6
                     return 'errored: additional memory (mW) required: {0}'.format(float(line.split()[-2]) / 1e6)
             for line in lines[::-1]:
                 if 'the problem occurs' in line:
                     return 'errored: ' + line
             return 'errored: Unknown reason'
Ejemplo n.º 8
0
def determine_ess_status(output_path, species_label, job_type, software=None):
    """
    Determine the reason that caused an ESS job to crash, assign error keywords for troubleshooting.

    Args:
        output_path (str): The path to the ESS output file.
        species_label (str): The species label.
        job_type (str): The job type (e.g., 'opt, 'freq', 'ts', 'sp').
        software (str, optional): The ESS software.

    Returns:
        status (str): The status. Either 'done' or 'errored'.
    Returns:
        keywords (list): The standardized error keywords.
    Returns:
        error (str): A description of the error.
    Returns:
        line (str): The parsed line from the ESS output file indicating the error.
    """
    if software is None:
        software = determine_ess(log_file=output_path)

    keywords, error, = list(), ''
    with open(output_path, 'r') as f:
        lines = f.readlines()

        if software == 'gaussian':
            for line in lines[-1:-20:-1]:
                if 'Normal termination' in line:
                    return 'done', list(), '', ''
            for i, line in enumerate(lines[::-1]):
                if 'termination' in line:
                    if 'l9999.exe' in line or 'link 9999' in line:
                        keywords = ['Unconverged',
                                    'GL9999']  # GL stand for Gaussian Link
                        error = 'Unconverged'
                    elif 'l101.exe' in line:
                        keywords = ['InputError', 'GL101']
                        error = 'The blank line after the coordinate section is missing, ' \
                                'or charge/multiplicity was not specified correctly.'
                    elif 'l103.exe' in line:
                        keywords = ['InternalCoordinateError', 'GL103']
                        error = 'Internal coordinate error'
                    elif 'l108.exe' in line:
                        keywords = ['InputError', 'GL108']
                        error = 'There are two blank lines between z-matrix and ' \
                                'the variables, expected only one.'
                    elif 'l202.exe' in line:
                        keywords = ['OptOrientation', 'GL202']
                        error = 'During the optimization process, either the standard ' \
                                'orientation or the point group of the molecule has changed.'
                    elif 'l301.exe' in line:
                        keywords = ['GL301']
                    elif 'l401.exe' in line:
                        keywords = ['GL401']
                    elif 'l502.exe' in line:
                        keywords = ['SCF', 'GL502']
                        error = 'Unconverged SCF.'
                    elif 'l716.exe' in line:
                        keywords = ['ZMat', 'GL716']
                        error = 'Angle in z-matrix outside the allowed range 0 < x < 180.'
                    elif 'l906.exe' in line:
                        keywords = ['MP2', 'GL906']
                        error = 'The MP2 calculation has failed. It may be related to pseudopotential. ' \
                                'Basis sets (CEP-121G*) that are used with polarization functions, ' \
                                'where no polarization functions actually exist.'
                    elif 'l913.exe' in line:
                        keywords = ['MaxOptCycles', 'GL913']
                        error = 'Maximum optimization cycles reached.'
                    if any([
                            keyword in ['GL301', 'GL401']
                            for keyword in keywords
                    ]):
                        additional_info = lines[len(lines) - i - 2]
                        if 'No data on chk file' in additional_info \
                                or 'Basis set data is not on the checkpoint file' in additional_info:
                            keywords = ['CheckFile']
                            error = additional_info.rstrip()
                        elif 'GL301' in keywords:
                            keywords.append('InputError')
                            error = 'Either charge, multiplicity, or basis set was not ' \
                                    'specified correctly. Alternatively, a specified atom does not match any ' \
                                    'standard atomic symbol.'
                        elif 'GL401' in keywords:
                            keywords.append('BasisSet')
                            error = 'The projection from the old to the new basis set has failed.'
                elif 'Erroneous write' in line or 'Write error in NtrExt1' in line:
                    keywords = ['DiskSpace']
                    error = 'Ran out of disk space.'
                    line = ''
                elif 'NtrErr' in line:
                    keywords = ['CheckFile']
                    error = 'An operation on the check file was specified, but a .chk was not found or is incomplete.'
                    line = ''
                elif 'malloc failed' in line or 'galloc' in line:
                    keywords = ['Memory']
                    error = 'Memory allocation failed (did you ask for too much?)'
                    line = ''
                elif 'PGFIO/stdio: No such file or directory' in line:
                    keywords = ['Scratch']
                    error = 'Wrongly specified the scratch directory. Correct the "GAUSS_SCRDIR" ' \
                            'variable in the submit script, it should point to an existing directory. ' \
                            'Make sure to add "mkdir -p $GAUSS_SCRDIR" to your submit script.'
                    line = ''
                if 'a syntax error was detected' in line.lower():
                    keywords = ['Syntax']
                    error = 'There was a syntax error in the Gaussian input file. Check your Gaussian input file ' \
                            'template under arc/job/inputs.py. Alternatively, perhaps the level of theory is not ' \
                            'supported by Gaussian in the format it was given.'
                    line = ''
                if keywords:
                    break
            error = error if error else 'Gaussian job terminated for an unknown reason. ' \
                                        'It is possible there was a server node failure.'
            keywords = keywords if keywords else ['Unknown']
            return 'errored', keywords, error, line

        elif software == 'qchem':
            done = False
            for line in lines[::-1]:
                if 'Thank you very much for using Q-Chem' in line:
                    # if this is an opt job, we must also check that the max num of cycles hasn't been reached,
                    # so don't mark as done yet
                    if 'opt' not in job_type:
                        done = True
                        break
                elif 'SCF failed' in line:
                    keywords = ['SCF']
                    error = 'SCF failed'
                    break
                elif 'error' in line and 'DIIS' not in line:
                    # these are **normal** lines that we should not capture:
                    # "SCF converges when DIIS error is below 1.0E-08", or
                    # "Cycle       Energy         DIIS Error"
                    keywords = ['SCF', 'DIIS']
                    error = 'SCF failed'
                    break
                elif 'Invalid charge/multiplicity combination' in line:
                    raise SpeciesError(
                        'The multiplicity and charge combination for species {0} are wrong.'
                        .format(species_label))
                if 'opt' in job_type or 'conformer' in job_type or 'ts' in job_type:
                    if 'MAXIMUM OPTIMIZATION CYCLES REACHED' in line:
                        keywords = ['MaxOptCycles']
                        error = 'Maximum optimization cycles reached.'
                    elif 'OPTIMIZATION CONVERGED' in line and done:  # `done` should already be assigned
                        done = True
            if done:
                return 'done', keywords, '', ''
            error = error if error else 'QChem job terminated for an unknown reason.'
            keywords = keywords if keywords else ['Unknown']
            return 'errored', keywords, error, line

        elif software == 'molpro':
            for line in lines[::-1]:
                if 'molpro calculation terminated' in line.lower() \
                        or 'variable memory released' in line.lower():
                    return 'done', list(), '', ''
                elif 'No convergence' in line:
                    keywords = ['Unconverged']
                    error = 'Unconverged'
                    break
                elif 'A further' in line and 'Mwords of memory are needed' in line and 'Increase memory to' in line:
                    # e.g.: `A further 246.03 Mwords of memory are needed for the triples to run.
                    # Increase memory to 996.31 Mwords.` (w/o the line break)
                    keywords = ['Memory']
                    error = 'Additional memory required: {0} MW'.format(
                        line.split()[2])
                    break
                elif 'insufficient memory available - require' in line:
                    # e.g.: `insufficient memory available - require              228765625  have
                    #        62928590
                    #        the request was for real words`
                    # add_mem = (float(line.split()[-2]) - float(prev_line.split()[0])) / 1e6
                    keywords = ['Memory']
                    error = 'Additional memory required: {0} MW'.format(
                        float(line.split()[-2]) / 1e6)
                    break
                elif 'the problem occurs' in line:
                    keywords = ['Unknown']
                    error = 'Unknown'
                    break
            error = error if error else 'Molpro job terminated for an unknown reason.'
            keywords = keywords if keywords else ['Unknown']
            if keywords:
                return 'errored', keywords, error, line
            return 'done', list(), '', ''