def get_nb_elec_for_element(symbol: str, mendeleev: dict): """Returns the number of electrons for a specific element. Parameters ---------- symbol : str The atom symbol of the element. mendeleev : dict Content of AlexGustafsson's Mendeleev Table YAML file, which can be found at https://github.com/AlexGustafsson/molecular-data. Returns ------- nb_elec : int Number of electrons. Raises ------ AbinError If there is no atomic number defined in mendeleev for one of the constituting atoms of the molecule. """ nb_elec = 0 # Scan the mendeleev table and get the atomic number of our atom for element in mendeleev: if (element['symbol'] == symbol): nb_elec = element['number'] if nb_elec == 0: raise abin_errors.AbinError( "ERROR: There is no atomic number defined for %s in AlexGustafsson's Mendeleev Table YAML file (mendeleev.yml)" % symbol) return nb_elec
def chains_qchem_render(mendeleev: dict, clusters_cfg: dict, config: dict, file_data: dict, job_specs: dict, misc: dict): """Renders the job script and the input file associated with the Q-CHEM program in CHAINS. Parameters ---------- mendeleev : dict Content of AlexGustafsson's Mendeleev Table YAML file (found at https://github.com/AlexGustafsson/molecular-data). Unused in this function. clusters_cfg : dict Content of the YAML clusters configuration file. config : dict Content of the YAML configuration file. file_data : dict Information extracted by the scanning function from the geometry file. job_specs : dict Contains all information related to the job. misc : dict Contains all the additional variables that did not pertain to the other arguments. Returns ------- rendered_content : dict Dictionary containing the text of all the rendered files in the form of <filename>: <rendered_content>. rendered_script : str Name of the rendered job script, necessary to launch the job. Notes ----- Pay a particular attention to the render_vars dictionaries, they contain all the definitions of the variables appearing in your Jinja templates. """ # ========================================================= # # Preparation step # # ========================================================= # # Check config file # ================= # Check if a "general" block has been defined in the config file if not config.get('general'): raise abin_errors.AbinError( 'ERROR: There is no "general" key defined in the "%s" configuration file.' % misc['config_name']) # Check if a "qchem" block has been defined in the config file if not config.get('qchem'): raise abin_errors.AbinError( 'ERROR: There is no "qchem" key defined in the "%s" configuration file.' % misc['config_name']) # Check the options defined in the config file copy_files = config['qchem'].get('copy_files', False) if not isinstance(copy_files, bool): raise abin_errors.AbinError( 'ERROR: The "copy_files" value given in the "qchem" block of the "%s" configuration file is not a boolean (neither "True" nor "False").' % misc['config_name']) benchmark = config['qchem'].get('benchmark', False) if not isinstance(benchmark, bool): raise abin_errors.AbinError( 'ERROR: The "benchmark" value given in the "qchem" block of the "%s" configuration file is not a boolean (neither "True" nor "False").' % misc['config_name']) # Define the templates # ==================== # Define the names of the templates. template_input = "qchem.in.jinja" template_script = "qchem_job.sh.jinja" # Check if the specified templates exist in the "templates" directory of ABIN LAUNCHER. abin_errors.check_abspath( os.path.join(misc['templates_dir'], template_input), "Jinja template for the qchem input file", "file") abin_errors.check_abspath( os.path.join(misc['templates_dir'], template_script), "Jinja template for the qchem job script", "file") # Define rendered files # ===================== # Define the names of the rendered files. rendered_input = misc['mol_name'] + ".in" rendered_script = "qchem_job.sh" # Initialize the dictionary that will be returned by the function rendered_content = {} # ========================================================= # # Rendering the input file # # ========================================================= # print("{:<80}".format( "\nRendering the jinja template for the qchem input file ... "), end="") # Defining the Jinja variables # ============================ # Define the memory usage mem_total = job_specs['cores'] * job_specs['mem_per_cpu'] mem_static = int(0.02 * mem_total) if int(0.02 * mem_total) > 200 else 200 # Variables not associated with the config file input_render_vars = { "mem_total": mem_total, "mem_static": mem_static, "coordinates": file_data['atomic_coordinates'] } # Variables associated with the "general" block of the config file try: input_render_vars.update({ "charge": config['general']['charge'], "multiplicity": config['general']['multiplicity'] }) except KeyError as error: raise abin_errors.AbinError( 'ERROR: The "%s" key is missing in the "general" block of the "%s" configuration file.' % (error, misc['config_name'])) # Check if a "keywords" block has been defined in the "qchem" block of the config file if not config['qchem'].get('keywords'): raise abin_errors.AbinError( 'ERROR: There is no "keywords" key in the "qchem" block of the "%s" configuration file.' % misc['config_name']) # Variables associated with the "keywords" block of the "qchem" block in the config file try: input_render_vars.update({ "job_type": config['qchem']['keywords']['job_type'], "exchange": config['qchem']['keywords']['exchange'], "basis_set": config['qchem']['keywords']['basis_set'], "scf_algorithm": config['qchem']['keywords']['scf_algorithm'], "max_scf_cycles": config['qchem']['keywords']['max_scf_cycles'], "cis_n_roots": config['qchem']['keywords']['cis_n_roots'], "iqmol_fchk": config['qchem']['keywords']['iqmol_fchk'] }) except KeyError as error: raise abin_errors.AbinError( 'ERROR: The "%s" key is missing in the "keywords" block of the "qchem" block in the "%s" configuration file.' % (error, misc['config_name'])) # Rendering the file # ================== rendered_content[rendered_input] = jinja_render(misc['templates_dir'], template_input, input_render_vars) print('%12s' % "[ DONE ]") # ========================================================= # # Rendering the job script # # ========================================================= # # Get the path to the "check_scripts" directory because the job script needs to execute qchem_check.py chains_path = os.path.dirname(misc['code_dir']) check_script_path = os.path.join(chains_path, "check_scripts") # If we need to copy the output files to their respective results directory, load the CHAINS configuration file to get the necessary information if copy_files: chains_config_file = abin_errors.check_abspath( os.path.join(chains_path, "configs", "chains_config.yml"), "CHAINS configuration YAML file", "file") print("{:<80}".format("\nLoading CHAINS configuration YAML file ..."), end="") with open(chains_config_file, 'r') as chains: chains_config = yaml.load(chains, Loader=yaml.FullLoader) print('%12s' % "[ DONE ]") print("{:<80}".format( "\nRendering the jinja template for the qchem job script ..."), end="") # Defining the mandatory Jinja variables # ====================================== # Variables not associated with the config file script_render_vars = { "mol_name": misc['mol_name'], "config_file": misc['config_name'], "job_walltime": job_specs['walltime'], "job_cores": job_specs['cores'], "job_mem_per_cpu": job_specs['mem_per_cpu'], # in MB "partition": job_specs['partition'], "chains_dir": chains_path, "check_dir": check_script_path, "copy_files": copy_files, # Associated with the config file, but it has already been verified "benchmark": benchmark # Associated with the config file, but it has already been verified } # Variables associated with the "general" block of the config file try: script_render_vars.update({ "user_email": config['general']['user_email'], "mail_type": config['general']['mail_type'] }) except KeyError as error: raise abin_errors.AbinError( 'ERROR: The "%s" key is missing in the "general" block of the "%s" configuration file.' % (error, misc['config_name'])) # Variables associated with the clusters configuration file try: script_render_vars.update({ "set_env": clusters_cfg[job_specs['cluster_name']]['profiles'][ job_specs['profile']]['set_env'], "command": clusters_cfg[job_specs['cluster_name']]['profiles'][ job_specs['profile']]['command'] }) except KeyError as error: raise abin_errors.AbinError( 'ERROR: The "%s" key is missing in the "%s" profile of the clusters configuration file.' % (error, job_specs['profile'])) # Defining the specific Jinja variables # ===================================== # Variables specific to the copy_files portion of the template if copy_files: # Variables not associated with the config file script_render_vars.update({"job_script": rendered_script}) # Variables associated with the CHAINS configuration file try: if config['qchem']['keywords']['basis_set'].lower( ) == "def2-tzvp": # The output directories are different for the TZVP basis set script_render_vars.update({ "output_dir": chains_config['output_qchem_tzvp'], "results_dir": chains_config['results_dir_tzvp'] }) else: script_render_vars.update({ "output_dir": chains_config['output_qchem'], "results_dir": chains_config['results_dir'] }) except KeyError as error: raise abin_errors.AbinError( 'ERROR: The "%s" key is missing in the CHAINS configuration file (chains_config.yml).' % error) # Variables specific to the benchmarking template if benchmark: script_render_vars.update({ "benchmark_path": "${CECIHOME}/BENCHMARK", "prefix": job_specs['profile'] + "_" + job_specs['cluster_name'], "profile": job_specs['profile'], "cluster_name": job_specs['cluster_name'], "jobscale_label": job_specs['scale_label'], "job_walltime": job_specs['walltime'], "job_mem_per_cpu": job_specs['mem_per_cpu'], # in MB "scaling_function": job_specs['scaling_fct'], "scale_index": job_specs['scale_index'] }) # Rendering the file # ================== rendered_content[rendered_script] = jinja_render(misc['templates_dir'], template_script, script_render_vars) print('%12s' % "[ DONE ]") return rendered_content, rendered_script
def chains_gaussian_render(mendeleev: dict, clusters_cfg: dict, config: dict, file_data: dict, job_specs: dict, misc: dict): """Renders the job script and the input file associated with the GAUSSIAN program in CHAINS. Parameters ---------- mendeleev : dict Content of AlexGustafsson's Mendeleev Table YAML file (found at https://github.com/AlexGustafsson/molecular-data). Unused in this function. clusters_cfg : dict Content of the YAML clusters configuration file. config : dict Content of the YAML configuration file. file_data : dict Information extracted by the scanning function from the geometry file. job_specs : dict Contains all information related to the job. misc : dict Contains all the additional variables that did not pertain to the other arguments. Returns ------- rendered_content : dict Dictionary containing the text of all the rendered files in the form of <filename>: <rendered_content>. rendered_script : str Name of the rendered job script, necessary to launch the job. Notes ----- Pay a particular attention to the render_vars dictionaries, they contain all the definitions of the variables appearing in your Jinja templates. """ # ========================================================= # # Preparation step # # ========================================================= # # Check config file # ================= # Check if a "general" block has been defined in the config file if not config.get('general'): raise abin_errors.AbinError( 'ERROR: There is no "general" key defined in the "%s" configuration file.' % misc['config_name']) # Check if a "gaussian" block has been defined in the config file if not config.get('gaussian'): raise abin_errors.AbinError( 'ERROR: There is no "gaussian" key defined in the "%s" configuration file.' % misc['config_name']) # Check the options defined in the config file auto_restart = config['gaussian'].get('auto_restart', False) if not isinstance(auto_restart, bool): raise abin_errors.AbinError( 'ERROR: The "auto_restart" value given in the "gaussian" block of the "%s" configuration file is not a boolean (neither "True" nor "False").' % misc['config_name']) benchmark = config['gaussian'].get('benchmark', False) if not isinstance(benchmark, bool): raise abin_errors.AbinError( 'ERROR: The "benchmark" value given in the "gaussian" block of the "%s" configuration file is not a boolean (neither "True" nor "False").' % misc['config_name']) copy_files = config['gaussian'].get('copy_files', False) if not isinstance(copy_files, bool): raise abin_errors.AbinError( 'ERROR: The "copy_files" value given in the "gaussian" block of the "%s" configuration file is not a boolean (neither "True" nor "False").' % misc['config_name']) if copy_files: ip_calc = str(config['gaussian'].get('ip_calc', "no_key")).lower() if ip_calc == "no_key": # If there was no "ip_calc" key in the config file, use the scale index to define how the ionization potential will be calculated if job_specs['scale_index'] > 650: ip_calc = "vertical" else: ip_calc = "adiabatic" elif ip_calc not in ["none", "vertical", "adiabatic"]: raise abin_errors.AbinError( 'ERROR: The "ip_calc" value given in the "gaussian" block of the "%s" configuration file is neither "None", "Vertical" nor "Adiabatic" (This is not case sensitive).' % misc['config_name']) else: ip_calc = None # Define the templates # ==================== # Define the names of the templates. template_input = "gaussian.com.jinja" template_script = "gaussian_job.sh.jinja" # Check if the specified templates exist in the "templates" directory of ABIN LAUNCHER. abin_errors.check_abspath( os.path.join(misc['templates_dir'], template_input), "Jinja template for the gaussian input file", "file") abin_errors.check_abspath( os.path.join(misc['templates_dir'], template_script), "Jinja template for the gaussian job script", "file") # Define rendered files # ===================== # Define the names of the rendered files. rendered_input = misc['mol_name'] + ".com" rendered_script = "gaussian_job.sh" # Initialize the dictionary that will be returned by the function rendered_content = {} # ========================================================= # # Rendering the input file # # ========================================================= # print("{:<80}".format( "\nRendering the jinja template for the gaussian input file ... "), end="") # Defining the mandatory Jinja variables # ====================================== # Variables not associated with the config file input_render_vars = { "mol_name": misc['mol_name'], "mem_total": job_specs['cores'] * job_specs['mem_per_cpu'], "job_cores": job_specs['cores'], "coordinates": file_data['atomic_coordinates'], "ip_calc": ip_calc # Associated with the config file, but it has already been verified } # Variables associated with the "general" block of the config file try: input_render_vars.update({ "charge": config['general']['charge'], "multiplicity": config['general']['multiplicity'] }) except KeyError as error: raise abin_errors.AbinError( 'ERROR: The "%s" key is missing in the "general" block of the "%s" configuration file.' % (error, misc['config_name'])) # Check if a "keywords" block has been defined in the "gaussian" block of the config file if not config['gaussian'].get('keywords'): raise abin_errors.AbinError( 'ERROR: There is no "keywords" key in the "gaussian" block of the "%s" configuration file.' % misc['config_name']) # Variables associated with the "keywords" block of the "gaussian" block in the config file try: input_render_vars.update({ "method": config['gaussian']['keywords']['method'], "basis_set": config['gaussian']['keywords']['basis_set'], "other": config['gaussian']['keywords']['other'] }) except KeyError as error: raise abin_errors.AbinError( 'ERROR: The "%s" key is missing in the "keywords" block of the "gaussian" block in the "%s" configuration file.' % (error, misc['config_name'])) # Defining the specific Jinja variables # ===================================== # Variables specific to the ip_calc == vertical portion of the template if ip_calc == "vertical": # Determine the new charge and multiplicity of the cation charge_cation = int(config['general']['charge']) + 1 if int( config['general']['multiplicity'] ) == 1: # In the case of a singlet ground state, we break a pair of electrons and thus "create" a new unpaired electron multiplicity_cation = 2 else: multiplicity_cation = int(config['general']['multiplicity'] ) - 1 # We removed one unpaired electron # Variables associated with the config file but they have already been verified input_render_vars.update({ "charge_cation": charge_cation, "multiplicity_cation": multiplicity_cation }) # Rendering the file # ================== rendered_content[rendered_input] = jinja_render(misc['templates_dir'], template_input, input_render_vars) print('%12s' % "[ DONE ]") # ========================================================= # # Rendering the job script # # ========================================================= # # Get the path to the "check_scripts" directory because the job script needs to execute gaussian_check.py chains_path = os.path.dirname(misc['code_dir']) check_script_path = os.path.join(chains_path, "check_scripts") # If we need to copy the output files to their respective results directory (or compute the ionization potentials through the cation), load the CHAINS configuration file to get the necessary information if copy_files: chains_config_file = abin_errors.check_abspath( os.path.join(chains_path, "configs", "chains_config.yml"), "CHAINS configuration YAML file", "file") print("{:<80}".format("\nLoading CHAINS configuration YAML file ..."), end="") with open(chains_config_file, 'r') as chains: chains_config = yaml.load(chains, Loader=yaml.FullLoader) print('%12s' % "[ DONE ]") print("{:<80}".format( "\nRendering the jinja template for the gaussian job script ..."), end="") # Defining the mandatory Jinja variables # ====================================== # Variables not associated with the config file script_render_vars = { "mol_name": misc['mol_name'], "config_file": misc['config_name'], "job_walltime": job_specs['walltime'], "job_cores": job_specs['cores'], "job_mem_per_cpu": job_specs['mem_per_cpu'], # in MB "cluster_name": job_specs['cluster_name'], "partition": job_specs['partition'], "chains_dir": chains_path, "check_dir": check_script_path, "auto_restart": auto_restart, # Associated with the config file, but it has already been verified "copy_files": copy_files, # Associated with the config file, but it has already been verified "benchmark": benchmark # Associated with the config file, but it has already been verified } # Variables associated with the "general" block of the config file try: script_render_vars.update({ "user_email": config['general']['user_email'], "mail_type": config['general']['mail_type'] }) except KeyError as error: raise abin_errors.AbinError( 'ERROR: The "%s" key is missing in the "general" block of the "%s" configuration file.' % (error, misc['config_name'])) # Variables associated with the clusters configuration file try: script_render_vars.update({ "set_env": clusters_cfg[job_specs['cluster_name']]['profiles'][ job_specs['profile']]['set_env'], "command": clusters_cfg[job_specs['cluster_name']]['profiles'][ job_specs['profile']]['command'] }) except KeyError as error: raise abin_errors.AbinError( 'ERROR: The "%s" key is missing in the "%s" profile of the clusters configuration file.' % (error, job_specs['profile'])) # Defining the specific Jinja variables # ===================================== # Variables specific to the copy_files portion of the template if copy_files: # Variables not associated with the config file script_render_vars.update({ "ip_calc": ip_calc, # Associated with the config file, but it has already been verified "job_script": rendered_script }) # Variables associated with the CHAINS configuration file try: script_render_vars.update({ "output_dir": chains_config['output_gaussian'], "results_dir": chains_config['results_dir'] }) if ip_calc == 'vertical' or ip_calc == 'adiabatic': script_render_vars.update( {"ip_file": chains_config['ip_file']}) except KeyError as error: raise abin_errors.AbinError( 'ERROR: The "%s" key is missing in the CHAINS configuration file (chains_config.yml).' % error) # Variables specific to the benchmarking template if benchmark: script_render_vars.update({ "benchmark_path": "${CECIHOME}/BENCHMARK", "prefix": job_specs['profile'] + "_" + job_specs['cluster_name'], "profile": job_specs['profile'], "cluster_name": job_specs['cluster_name'], "jobscale_label": job_specs['scale_label'], "job_walltime": job_specs['walltime'], "job_mem_per_cpu": job_specs['mem_per_cpu'], # in MB "scaling_function": job_specs['scaling_fct'], "scale_index": job_specs['scale_index'] }) # Rendering the file # ================== rendered_content[rendered_script] = jinja_render(misc['templates_dir'], template_script, script_render_vars) print('%12s' % "[ DONE ]") return rendered_content, rendered_script
def xyz_scan(mol_content: list): """Scans the content of an XYZ geometry file and extracts the chemical formula and atomic coordinates of the molecule. Parameters ---------- mol_content : list Content of the XYZ geometry file. Each element of the list is a line of the file. Returns ------- file_data : dict The extracted information of the file, following the pattern { 'chemical_formula' : { }, 'atomic_coordinates' : [ ] } Raises ------ AbinError If the number of atomic coordinates lines does not match the number of atoms mentioned in the first line of the .xyz file. """ # Initialize the file_data dictionary that will be returned by the function file_data = {'chemical_formula': {}, 'atomic_coordinates': []} # Determining the number of atoms (first line of the xyz file) nb_atoms = int(mol_content[0]) # Initialize a variable will be used to check if the number of coordinate lines matches the number of atoms of the molecule checksum_nlines = 0 # Define the pattern of the atomic coordinates lines (They look like 'Si -0.31438 1.89081 0.00000') # This is based on regular expressions (regex), consult https://docs.python.org/3/library/re.html for details # You can also paste everything inside the raw string (r'<here>') on https://regex101.com for an explanation of this particular regex (use your .xyz file as a test string on the site) pattern = re.compile( r'^\s*(?P<atomSymbol>[a-zA-Z]{1,3})(?:\s+-?\d+\.\d+){3}\s*$') # Scanning the content of the XYZ file to determine the chemical formula and atomic coordinates of the molecule # We only start at the 3rd line ([2:]) because the first two won't contain any coordinates for line in mol_content[2:]: matching_line = pattern.match(line) # If the line matches our pattern if matching_line is not None: checksum_nlines += 1 # Store the line in the 'atomic_coordinates' key to be rendered in the input file later on file_data['atomic_coordinates'].append(line) # Count the number of occurrences of the atom type atom_type = matching_line.group("atomSymbol") if atom_type not in file_data['chemical_formula']: file_data['chemical_formula'][atom_type] = 1 else: file_data['chemical_formula'][atom_type] += 1 # Check if the number of lines matches the number of atoms defined in the first line of the .xyz file if checksum_nlines != nb_atoms: raise abin_errors.AbinError( "ERROR: Number of atomic coordinates lines (%s) doesn't match the number of atoms mentioned in the first line of the .xyz file (%s) !" % (checksum_nlines, nb_atoms)) print("") print(''.center(35, '-')) print("{:<16} {:<18}".format('Atom Type', 'Number of atoms')) print(''.center(35, '-')) for atom, nb_atom in file_data['chemical_formula'].items(): print("{:<16} {:<18}".format(atom, nb_atom)) print(''.center(35, '-')) print("{:<16} {:<18}".format('Total', sum(file_data['chemical_formula'].values()))) print(''.center(35, '-')) print("\n%s lines of atomic coordinates have been registered." % checksum_nlines) # Scanning complete, now return file_data return file_data