def read_reference_data(f):  # noqa C901
    eV_to_kcalmol = 0.036749326 / 0.0015946679

    e_next, f_next, geo_next = False, False, False
    n_atoms = None
    R, z, E, F = [], [], [], []

    geo_idx = 0
    for line in f:
        if n_atoms:
            cols = line.split()
            if e_next:
                E.append(float(cols[5]))
                e_next = False
            elif f_next:
                a = int(cols[1]) - 1
                F.append(list(map(float, cols[2:5])))
                if a == n_atoms - 1:
                    f_next = False
            elif geo_next:
                if 'atom' in cols:
                    a_count += 1  # noqa: F821
                    R.append(list(map(float, cols[1:4])))

                    if geo_idx == 0:
                        z.append(io._z_str_to_z_dict[cols[4]])

                    if a_count == n_atoms:
                        geo_next = False
                        geo_idx += 1
            elif 'Energy and forces in a compact form:' in line:
                e_next = True
            elif 'Total atomic forces (unitary forces cleaned) [eV/Ang]:' in line:
                f_next = True
            elif ('Atomic structure (and velocities) as used in the preceding time step:'
                  in line):
                geo_next = True
                a_count = 0
        elif 'The structure contains' in line and 'atoms,  and a total of' in line:
            n_atoms = int(line.split()[3])
            print('Number atoms per geometry:      {:>7d}'.format(n_atoms))
            continue

        if geo_idx > 0 and geo_idx % 1000 == 0:
            sys.stdout.write(
                "\rNumber geometries found so far: {:>7d}".format(geo_idx))
            sys.stdout.flush()
    sys.stdout.write(
        "\rNumber geometries found so far: {:>7d}".format(geo_idx))
    sys.stdout.flush()
    print('\n' + ui.info_str('[INFO]') +
          ' Energies and forces have been converted from eV to kcal/mol(/Ang)')

    R = np.array(R).reshape(-1, n_atoms, 3)
    z = np.array(z)
    E = np.array(E) * eV_to_kcalmol
    F = np.array(F).reshape(-1, n_atoms, 3) * eV_to_kcalmol

    f.close()
    return (R, z, E, F)
    base_vars = {
        'type': 'd',
        'name': dataset['name'].astype(str),
        'theory': dataset['theory'].astype(str),
        'z': dataset['z'],
        'R': R,
        'E': E,
        'F': F,
    }
    base_vars['md5'] = io.dataset_md5(base_vars)

    subset_file_name = '%s_%s.npz' % (
        os.path.splitext(os.path.basename(dataset_path))[0],
        s,
    )
    file_exists = os.path.isfile(subset_file_name)
    if file_exists and args.overwrite:
        print(ui.info_str('[INFO]') + ' Overwriting existing model file.')
    if not file_exists or args.overwrite:
        np.savez_compressed(subset_file_name, **base_vars)
        ui.progr_toggle(is_done=True, disp_str='Extracted %s dataset saved to \'%s\'' % (s, subset_file_name))
    else:
        print(
            ui.warn_str('[WARN]')
            + ' %s dataset \'%s\' already exists.' % (s.capitalize(), subset_file_name)
            + '\n       Run \'python %s -o %s %s\' to overwrite.\n'
            % (os.path.basename(__file__), model_path, dataset_path)
        )
        sys.exit()
Beispiel #3
0
                    '--overwrite',
                    dest='overwrite',
                    action='store_true',
                    help='overwrite existing dataset file')
args = parser.parse_args()
geometries = args.geometries
forces = args.forces
energies = args.energies
energy_col = args.energy_col

name = os.path.splitext(os.path.basename(geometries.name))[0]
dataset_file_name = name + '.npz'

dataset_exists = os.path.isfile(dataset_file_name)
if dataset_exists and args.overwrite:
    print ui.info_str('[INFO]') + ' Overwriting existing dataset file.'
if not dataset_exists or args.overwrite:
    print 'Writing dataset to \'%s\'...' % dataset_file_name
else:
    sys.exit(
        ui.fail_str('[FAIL]') +
        ' Dataset \'%s\' already exists.' % dataset_file_name)

print 'Reading geometries...'
R, z = read_concat_xyz(geometries)

print 'Reading forces...'
F, _ = read_concat_xyz(forces)

print 'Reading energies from column %d...' % energy_col
E = read_out_file(energies, energy_col)
parser.add_argument(
    '-o',
    '--overwrite',
    dest='overwrite',
    action='store_true',
    help='overwrite existing dataset file',
)
args = parser.parse_args()
dataset = args.dataset

name = os.path.splitext(os.path.basename(dataset.name))[0]
dataset_file_name = name + '.npz'

dataset_exists = os.path.isfile(dataset_file_name)
if dataset_exists and args.overwrite:
    print(ui.info_str('[INFO]') + ' Overwriting existing dataset file.')
if not dataset_exists or args.overwrite:
    print('Writing dataset to \'%s\'...' % dataset_file_name)
else:
    sys.exit(
        ui.fail_str('[FAIL]') +
        ' Dataset \'%s\' already exists.' % dataset_file_name)

R, z, E, F = read_reference_data(dataset)

# Prune all arrays to same length.
n_mols = min(min(R.shape[0], F.shape[0]), E.shape[0])
if n_mols != R.shape[0] or n_mols != F.shape[0] or n_mols != E.shape[0]:
    print(
        ui.warn_str('[WARN]') +
        ' Incomplete output detected: Final dataset was pruned to %d points.' %
Beispiel #5
0
        help  = 'path to dataset file')
parser.add_argument('-o',
                    '--overwrite',
                    dest='overwrite',
                    action='store_true',
                    help='overwrite existing xyz dataset file')

args = parser.parse_args()
dataset_path, dataset = args.dataset

name = os.path.splitext(os.path.basename(dataset_path))[0]
dataset_file_name = name + '.xyz'

xyz_exists = os.path.isfile(dataset_file_name)
if xyz_exists and args.overwrite:
    print ui.info_str('[INFO]') + ' Overwriting existing xyz dataset file.'
if not xyz_exists or args.overwrite:
    print 'Writing dataset to \'%s\'...' % dataset_file_name
else:
    sys.exit(
        ui.fail_str('[FAIL]') +
        ' Dataset \'%s\' already exists.' % dataset_file_name)

z = dataset['z']
R = dataset['R']
F = dataset['F']
E = dataset['E']
n = R.shape[0]

try:
    with open(dataset_file_name, 'w') as f: