def _expand_group(group_str): if apf.has_match(group_ptt, group_str): count, part = ap_cast(apf.first_capture(group_ptt, group_str)) parts = [part] * count else: parts = [group_str] return parts
def xyz_string(geo, comment=''): """ write the cartesian geometry to a .xyz string """ natms = len(_symbols(geo)) assert not apf.has_match(app.NEWLINE, comment) geo_str = string(geo) xyz_str = '{:d}\n{:s}\n{:s}'.format(natms, comment, geo_str) return xyz_str
def _check_name_string(output_string): """ checks to see if the orca program string is in the output """ pattern = 'MRCC program system' prog_string = apf.has_match(pattern, output_string) return prog_string
def _check_name_string(output_string): """ checks to see if the orca program string is in the output """ pattern = app.escape('* O R C A *') prog_string = apf.has_match(pattern, output_string) return prog_string
def is_valid(ick): """ Determine if an InChIKey has the proper form. :param ick: InChIKey :type ick: str :rtype: bool """ assert isinstance(ick, (str, bytes, bytearray)) return apf.has_match(Parse.PATTERN, ick)
def _check_name_string(output_str): """ checks to see if the cfour program string is in the output """ pattern = (app.escape('* CFOUR Coupled-Cluster techniques ') + app.escape('for Computational Chemistry *')) prog_string = apf.has_match(pattern, output_str) return prog_string
def collider_enhance_factors(rxn_dstr): """ Parses the data string for a reaction in the reactions block for the line containing the names of several bath gases and their corresponding collision enhancement factors. :param rxn_dstr: data string for species in reaction block :type rxn_dstr: str :return params: Collision enhanncement factors for each bath gas :rtype: dict[bath name: enhancement factors] """ bad_strings = ('DUP', 'LOW', 'TROE', 'CHEB', 'PLOG', CHEMKIN_ARROW) species_char = app.one_of_these([ app.LETTER, app.DIGIT, app.escape('('), app.escape(')'), app.UNDERSCORE ]) species_name = app.one_or_more(species_char) # Loop over the lines and search for string with collider facts if apf.has_match('LOW', rxn_dstr) or apf.has_match( 'TROE', rxn_dstr) or apf.has_match( 'M=', rxn_dstr) or apf.has_match('M =', rxn_dstr): params = {} for line in rxn_dstr.splitlines(): if not any(apf.has_match(string, line) for string in bad_strings): factor_pattern = ( app.capturing(species_name) + app.zero_or_more(app.SPACE) + app.escape('/') + app.zero_or_more(app.SPACE) + app.capturing(app.NUMBER) + app.zero_or_more(app.SPACE) + app.escape('/') + app.zero_or_more(app.SPACE)) baths = apf.all_captures(factor_pattern, line) if baths: for bath in baths: params[bath[0]] = float(bath[1]) # If nothing was put into the dictionary, set it to None if not params: params = None else: params = None return params
def from_xyz_string(xyz_str): """ read a cartesian geometry from a .xyz string """ lines = xyz_str.splitlines() assert apf.has_match(app.UNSIGNED_INTEGER, lines[0]) natms = int(lines[0]) # comment_line = lines[1] geo_str = '\n'.join(lines[2:natms + 2]) geo = from_string(geo_str, angstroms=True, strict=True) return geo
def collider_enhance_factors(rxn_dstr): """ Parses the data string for a reaction in the reactions block for the line containing the names of several bath gases and their corresponding collision enhancement factors. :param rxn_dstr: data string for species in reaction block :type rxn_dstr: str :return factors: Collision enhanncement factors for each bath gas :rtype: dict[bath name: enhancement factors] """ first_str = _first_line_pattern(rct_ptt=SPECIES_NAMES_PATTERN, prd_ptt=SPECIES_NAMES_PATTERN, param_ptt=COEFF_PATTERN) bad_strings = ('DUP', 'LOW', 'TROE', 'CHEB', 'PLOG', first_str) species_char = app.one_of_these([ app.LETTER, app.DIGIT, app.escape('('), app.escape(')'), app.UNDERSCORE ]) species_name = app.one_or_more(species_char) # Loop over the lines and search for string with collider facts factors = {} if apf.has_match('LOW', rxn_dstr) or apf.has_match('TROE', rxn_dstr): for line in rxn_dstr.splitlines(): if not any(apf.has_match(string, line) for string in bad_strings): factor_pattern = (app.capturing(species_name) + app.escape('/') + app.maybe(app.SPACE) + app.capturing(app.NUMBER) + app.escape('/')) baths = apf.all_captures(factor_pattern, line) if baths: factors = {} for bath in baths: factors[bath[0]] = float(bath[1]) return factors
def from_string(geo_str, angstroms=True, strict=True): """ read a cartesian geometry from a string """ pattern = app.LINESPACES.join([ app.capturing(ATOM_SYMBOL_PATTERN), app.capturing(app.FLOAT), app.capturing(app.FLOAT), app.capturing(app.FLOAT), ]) if strict: # first check the string line_pattern = app.maybe(app.LINESPACES).join( [app.LINE_START, pattern, app.LINE_END]) lines = apf.strip_spaces(geo_str).splitlines() assert all(apf.has_match(line_pattern, line) for line in lines) mcaps = apf.all_captures(pattern, geo_str) mvals = apc.multis(mcaps, dtypes=(str, float, float, float)) syms = tuple(mval[0] for mval in mvals) xyzs = tuple(mval[1:] for mval in mvals) geo = from_data(syms, xyzs, angstroms=angstroms) return geo
def opt_zmatrix(output_str): """ Reads the optimized Z-Matrix from the output file string. Returns the Z-Matrix in Bohr and Radians. :param output_str: string of the program's output file :type output_str: str :rtype: automol molecular geometry data structure """ # Reads the matrix from the beginning of the output symbs, key_mat, name_mat = ar.vmat.read( output_str, start_ptt=app.padded(app.NEWLINE).join( [app.escape('Symbolic Z-matrix:'), app.LINE, '']), symb_ptt=ar.par.Pattern.ATOM_SYMBOL + app.maybe(app.UNSIGNED_INTEGER), key_ptt=app.one_of_these([app.UNSIGNED_INTEGER, app.VARIABLE_NAME]), line_end_ptt=app.maybe(app.UNSIGNED_INTEGER), last=False) # Reads the values from the end of the output if all(x is not None for x in (symbs, key_mat, name_mat)): grad_val = app.one_of_these([app.FLOAT, 'nan', '-nan']) if len(symbs) == 1: val_mat = ((None, None, None), ) else: val_dct = ar.setval.read(output_str, start_ptt=app.padded(app.NEWLINE).join([ app.padded('Optimized Parameters', app.NONNEWLINE), app.LINE, app.LINE, app.LINE, app.LINE, '' ]), entry_sep_ptt='', entry_start_ptt=app.escape('!'), sep_ptt=app.maybe(app.LINESPACES).join([ app.escape('-DE/DX ='), grad_val, app.escape('!'), app.NEWLINE ]), last=True) val_mat = ar.setval.convert_dct_to_matrix(val_dct, name_mat) # Check for the pattern err_ptt = app.LINESPACES.join( [app.escape('-DE/DX ='), app.one_of_these(['nan', '-nan'])]) if 'Optimized Parameters' in output_str: test_str = output_str.split('Optimized Parameters')[1] if apf.has_match(err_ptt, test_str): print('Warning: Bad gradient value (nan)', 'in "Optimized Parameters" list.') # For the case when variable names are used instead of integer keys: # (otherwise, does nothing) key_dct = dict(map(reversed, enumerate(symbs))) key_dct[None] = 0 key_mat = [[ key_dct[val] + 1 if not isinstance(val, numbers.Real) else val for val in row ] for row in key_mat] symb_ptt = app.STRING_START + app.capturing(ar.par.Pattern.ATOM_SYMBOL) symbs = [apf.first_capture(symb_ptt, symb) for symb in symbs] # Call the automol constructor zma = automol.zmat.from_data(symbs, key_mat, val_mat, name_mat, one_indexed=True, angstrom=True, degree=True) else: zma = None return zma
def opt_zmatrix(output_string): """ get optimized z-matrix geometry from output """ # read the matrix from the beginning of the output syms, key_mat, name_mat = ar.zmatrix.matrix.read( output_string, start_ptt=app.padded(app.NEWLINE).join( [app.escape('Symbolic Z-matrix:'), app.LINE, '']), sym_ptt=ar.par.Pattern.ATOM_SYMBOL + app.maybe(app.UNSIGNED_INTEGER), key_ptt=app.one_of_these([app.UNSIGNED_INTEGER, app.VARIABLE_NAME]), line_end_ptt=app.maybe(app.UNSIGNED_INTEGER), last=False) # read the values from the end of the output grad_val = app.one_of_these([app.FLOAT, 'nan', '-nan']) if len(syms) == 1: val_dct = {} else: val_dct = ar.zmatrix.setval.read( output_string, start_ptt=app.padded(app.NEWLINE).join([ app.padded('Optimized Parameters', app.NONNEWLINE), app.LINE, app.LINE, app.LINE, app.LINE, '' ]), entry_sep_ptt='', entry_start_ptt=app.escape('!'), sep_ptt=app.maybe(app.LINESPACES).join([ app.escape('-DE/DX ='), grad_val, app.escape('!'), app.NEWLINE ]), last=True) # Check for ptt err_ptt = app.LINESPACES.join( [app.escape('-DE/DX ='), app.one_of_these(['nan', '-nan'])]) if 'Optimized Parameters' in output_string: test_str = output_string.split('Optimized Parameters')[1] if apf.has_match(err_ptt, test_str): print('Warning: Bad gradient value (nan)', 'in "Optimized Parameters" list.') # for the case when variable names are used instead of integer keys: # (otherwise, does nothing) key_dct = dict(map(reversed, enumerate(syms))) key_dct[None] = 0 key_mat = [[ key_dct[val] + 1 if not isinstance(val, numbers.Real) else val for val in row ] for row in key_mat] sym_ptt = app.STRING_START + app.capturing(ar.par.Pattern.ATOM_SYMBOL) syms = [apf.first_capture(sym_ptt, sym) for sym in syms] # call the automol constructor zma = automol.zmatrix.from_data(syms, key_mat, name_mat, val_dct, one_indexed=True, angstrom=True, degree=True) return zma
def is_valid(ick): """ is this a valid InChIKey? """ assert isinstance(ick, (str, bytes, bytearray)) return apf.has_match(Parse.PATTERN, ick)