def parse_xyz_from_file(path): """ Parse xyz coordinated from: .xyz - XYZ file .gjf - Gaussian input file .out or .log - ESS output file (Gaussian, QChem, Molpro) other - Molpro or QChem input file """ lines = _get_lines_from_file(path) file_extension = os.path.splitext(path)[1] xyz = None relevant_lines = list() if file_extension == '.xyz': relevant_lines = lines[2:] elif file_extension == '.gjf': start_parsing = False for line in lines: if start_parsing and line and line != '\n' and line != '\r\n': relevant_lines.append(line) elif start_parsing: break else: splits = line.split() if len(splits) == 2 and all([s.isdigit() for s in splits]): start_parsing = True elif 'out' in file_extension or 'log' in file_extension: log = determine_qm_software(fullpath=path) coords, number, _ = log.loadGeometry() xyz = get_xyz_string(coords=coords, numbers=number) else: record = False for line in lines: if '$end' in line or '}' in line: break if record and len(line.split()) == 4: relevant_lines.append(line) elif '$molecule' in line: record = True elif 'geometry={' in line: record = True if not relevant_lines: raise ParserError( 'Could not parse xyz coordinates from file {0}'.format(path)) if xyz is None and relevant_lines: xyz = ''.join([line for line in relevant_lines if line]) return standardize_xyz_string(xyz)
def test_standardize_xyz_string(self): """Test the standardize_xyz_string function""" xyz = """ C -0.67567701 1.18507660 0.04672449 H -0.25592948 1.62415961 0.92757746 H -2.26870864 1.38030564 0.05865317 O -0.36671999 -0.21081064 0.01630374 H -0.73553821 -0.63718986 0.79332805 C -0.08400571 1.86907236 -1.19973252 H -0.50375517 1.42998100 -2.08057962 H -0.31518819 2.91354759 -1.17697025 H 0.97802159 1.73893214 -1.20769117 O -3.69788377 1.55609096 0.07050345 O -4.28667752 0.37487691 0.04916102 H -4.01978712 -0.12970163 0.82103635 """ expected_xyz = """ C -0.67567701 1.18507660 0.04672449 H -0.25592948 1.62415961 0.92757746 H -2.26870864 1.38030564 0.05865317 O -0.36671999 -0.21081064 0.01630374 H -0.73553821 -0.63718986 0.79332805 C -0.08400571 1.86907236 -1.19973252 H -0.50375517 1.42998100 -2.08057962 H -0.31518819 2.91354759 -1.17697025 H 0.97802159 1.73893214 -1.20769117 O -3.69788377 1.55609096 0.07050345 O -4.28667752 0.37487691 0.04916102 H -4.01978712 -0.12970163 0.82103635""" new_xyz = converter.standardize_xyz_string(xyz) self.assertEqual(new_xyz, expected_xyz) gaussian_format = """ 1 8 0 3.132319 0.769111 -0.080869 2 8 0 3.387436 -2.116759 -0.038585 3 6 0 -2.369193 -0.546956 0.566827 4 6 0 -3.153606 0.171059 1.663074 5 6 0 -2.728027 -2.026445 0.459268 6 6 0 2.331560 -1.734235 -0.921481 7 6 0 3.650113 2.049169 0.275835 8 6 0 -0.931216 -0.186900 0.428193 9 6 0 1.352858 -0.755151 -0.308464 10 6 0 1.794338 0.522302 0.098410 11 6 0 0.011593 -1.079560 -0.135497 12 6 0 -0.448289 1.082102 0.804298 13 6 0 0.893169 1.436443 0.649904 14 1 0 -2.891135 -0.053945 -0.499139 15 1 0 2.748799 -1.311472 -1.847528 16 1 0 1.809915 -2.658319 -1.182148 17 1 0 -3.112208 1.258826 1.567630 18 1 0 -4.207732 -0.116551 1.619167 19 1 0 -2.768847 -0.097847 2.656934 20 1 0 -2.294986 -2.598417 1.292175 21 1 0 -3.813897 -2.151504 0.498488 22 1 0 -2.382172 -2.478656 -0.474306 23 1 0 3.525166 2.241957 1.347801 24 1 0 4.712607 2.018400 0.032537 25 1 0 3.166236 2.845374 -0.301663 26 1 0 -0.305960 -2.070003 -0.442894 27 1 0 -1.122381 1.816000 1.229392 28 1 0 1.217512 2.421293 0.964523 29 1 0 3.889221 -1.315416 0.166971 30 8 0 -3.433048 0.461721 -1.530756 31 8 0 -2.894879 1.761778 -1.591557 32 1 0 -2.124573 1.652495 -2.176005 """ expected_xyz = """O 3.132319 0.769111 -0.080869 O 3.387436 -2.116759 -0.038585 C -2.369193 -0.546956 0.566827 C -3.153606 0.171059 1.663074 C -2.728027 -2.026445 0.459268 C 2.331560 -1.734235 -0.921481 C 3.650113 2.049169 0.275835 C -0.931216 -0.186900 0.428193 C 1.352858 -0.755151 -0.308464 C 1.794338 0.522302 0.098410 C 0.011593 -1.079560 -0.135497 C -0.448289 1.082102 0.804298 C 0.893169 1.436443 0.649904 H -2.891135 -0.053945 -0.499139 H 2.748799 -1.311472 -1.847528 H 1.809915 -2.658319 -1.182148 H -3.112208 1.258826 1.567630 H -4.207732 -0.116551 1.619167 H -2.768847 -0.097847 2.656934 H -2.294986 -2.598417 1.292175 H -3.813897 -2.151504 0.498488 H -2.382172 -2.478656 -0.474306 H 3.525166 2.241957 1.347801 H 4.712607 2.018400 0.032537 H 3.166236 2.845374 -0.301663 H -0.305960 -2.070003 -0.442894 H -1.122381 1.816000 1.229392 H 1.217512 2.421293 0.964523 H 3.889221 -1.315416 0.166971 O -3.433048 0.461721 -1.530756 O -2.894879 1.761778 -1.591557 H -2.124573 1.652495 -2.176005""" new_xyz = converter.standardize_xyz_string(gaussian_format) self.assertEqual(new_xyz, expected_xyz)