def read_reference_data(f): # noqa C901 eV_to_kcalmol = 0.036749326 / 0.0015946679 e_next, f_next, geo_next = False, False, False n_atoms = None R, z, E, F = [], [], [], [] geo_idx = 0 for line in f: if n_atoms: cols = line.split() if e_next: E.append(float(cols[5])) e_next = False elif f_next: a = int(cols[1]) - 1 F.append(list(map(float, cols[2:5]))) if a == n_atoms - 1: f_next = False elif geo_next: if 'atom' in cols: a_count += 1 # noqa: F821 R.append(list(map(float, cols[1:4]))) if geo_idx == 0: z.append(io._z_str_to_z_dict[cols[4]]) if a_count == n_atoms: geo_next = False geo_idx += 1 elif 'Energy and forces in a compact form:' in line: e_next = True elif 'Total atomic forces (unitary forces cleaned) [eV/Ang]:' in line: f_next = True elif ('Atomic structure (and velocities) as used in the preceding time step:' in line): geo_next = True a_count = 0 elif 'The structure contains' in line and 'atoms, and a total of' in line: n_atoms = int(line.split()[3]) print('Number atoms per geometry: {:>7d}'.format(n_atoms)) continue if geo_idx > 0 and geo_idx % 1000 == 0: sys.stdout.write( "\rNumber geometries found so far: {:>7d}".format(geo_idx)) sys.stdout.flush() sys.stdout.write( "\rNumber geometries found so far: {:>7d}".format(geo_idx)) sys.stdout.flush() print('\n' + ui.info_str('[INFO]') + ' Energies and forces have been converted from eV to kcal/mol(/Ang)') R = np.array(R).reshape(-1, n_atoms, 3) z = np.array(z) E = np.array(E) * eV_to_kcalmol F = np.array(F).reshape(-1, n_atoms, 3) * eV_to_kcalmol f.close() return (R, z, E, F)
base_vars = { 'type': 'd', 'name': dataset['name'].astype(str), 'theory': dataset['theory'].astype(str), 'z': dataset['z'], 'R': R, 'E': E, 'F': F, } base_vars['md5'] = io.dataset_md5(base_vars) subset_file_name = '%s_%s.npz' % ( os.path.splitext(os.path.basename(dataset_path))[0], s, ) file_exists = os.path.isfile(subset_file_name) if file_exists and args.overwrite: print(ui.info_str('[INFO]') + ' Overwriting existing model file.') if not file_exists or args.overwrite: np.savez_compressed(subset_file_name, **base_vars) ui.progr_toggle(is_done=True, disp_str='Extracted %s dataset saved to \'%s\'' % (s, subset_file_name)) else: print( ui.warn_str('[WARN]') + ' %s dataset \'%s\' already exists.' % (s.capitalize(), subset_file_name) + '\n Run \'python %s -o %s %s\' to overwrite.\n' % (os.path.basename(__file__), model_path, dataset_path) ) sys.exit()
'--overwrite', dest='overwrite', action='store_true', help='overwrite existing dataset file') args = parser.parse_args() geometries = args.geometries forces = args.forces energies = args.energies energy_col = args.energy_col name = os.path.splitext(os.path.basename(geometries.name))[0] dataset_file_name = name + '.npz' dataset_exists = os.path.isfile(dataset_file_name) if dataset_exists and args.overwrite: print ui.info_str('[INFO]') + ' Overwriting existing dataset file.' if not dataset_exists or args.overwrite: print 'Writing dataset to \'%s\'...' % dataset_file_name else: sys.exit( ui.fail_str('[FAIL]') + ' Dataset \'%s\' already exists.' % dataset_file_name) print 'Reading geometries...' R, z = read_concat_xyz(geometries) print 'Reading forces...' F, _ = read_concat_xyz(forces) print 'Reading energies from column %d...' % energy_col E = read_out_file(energies, energy_col)
parser.add_argument( '-o', '--overwrite', dest='overwrite', action='store_true', help='overwrite existing dataset file', ) args = parser.parse_args() dataset = args.dataset name = os.path.splitext(os.path.basename(dataset.name))[0] dataset_file_name = name + '.npz' dataset_exists = os.path.isfile(dataset_file_name) if dataset_exists and args.overwrite: print(ui.info_str('[INFO]') + ' Overwriting existing dataset file.') if not dataset_exists or args.overwrite: print('Writing dataset to \'%s\'...' % dataset_file_name) else: sys.exit( ui.fail_str('[FAIL]') + ' Dataset \'%s\' already exists.' % dataset_file_name) R, z, E, F = read_reference_data(dataset) # Prune all arrays to same length. n_mols = min(min(R.shape[0], F.shape[0]), E.shape[0]) if n_mols != R.shape[0] or n_mols != F.shape[0] or n_mols != E.shape[0]: print( ui.warn_str('[WARN]') + ' Incomplete output detected: Final dataset was pruned to %d points.' %
help = 'path to dataset file') parser.add_argument('-o', '--overwrite', dest='overwrite', action='store_true', help='overwrite existing xyz dataset file') args = parser.parse_args() dataset_path, dataset = args.dataset name = os.path.splitext(os.path.basename(dataset_path))[0] dataset_file_name = name + '.xyz' xyz_exists = os.path.isfile(dataset_file_name) if xyz_exists and args.overwrite: print ui.info_str('[INFO]') + ' Overwriting existing xyz dataset file.' if not xyz_exists or args.overwrite: print 'Writing dataset to \'%s\'...' % dataset_file_name else: sys.exit( ui.fail_str('[FAIL]') + ' Dataset \'%s\' already exists.' % dataset_file_name) z = dataset['z'] R = dataset['R'] F = dataset['F'] E = dataset['E'] n = R.shape[0] try: with open(dataset_file_name, 'w') as f: