예제 #1
0
def read_nonstd_ext_xyz(f):
    n_atoms = None

    R, z, E, F = [], [], [], []
    for i, line in enumerate(f):
        line = line.strip()
        if not n_atoms:
            n_atoms = int(line)
            print('Number atoms per geometry: {:,}'.format(n_atoms))

        file_i, line_i = divmod(i, n_atoms + 2)

        if line_i == 1:
            try:
                e = float(line)
            except ValueError:
                pass
            else:
                E.append(e)

        cols = line.split()
        if line_i >= 2:
            R.append(list(map(float, cols[1:4])))
            if file_i == 0:  # first molecule
                z.append(io._z_str_to_z_dict[cols[0]])
            F.append(list(map(float, cols[4:7])))

        if file_i % 1000 == 0:
            sys.stdout.write('\rNumber geometries found so far: {:,}'.format(file_i))
            sys.stdout.flush()
    sys.stdout.write('\rNumber geometries found so far: {:,}'.format(file_i))
    sys.stdout.flush()
    print()

    R = np.array(R).reshape(-1, n_atoms, 3)
    z = np.array(z)
    E = None if not E else np.array(E)
    F = np.array(F).reshape(-1, n_atoms, 3)

    if F.shape[0] != R.shape[0]:
        sys.exit(
            ui.color_str('[FAIL]', fore_color=ui.RED, bold=True)
            + ' Force labels are missing from dataset or are incomplete!'
        )

    f.close()
    return (R, z, E, F)
예제 #2
0
    '--overwrite',
    dest='overwrite',
    action='store_true',
    help='overwrite existing xyz dataset file',
)

args = parser.parse_args()
dataset_path, dataset = args.dataset

name = os.path.splitext(os.path.basename(dataset_path))[0]
dataset_file_name = name + '.xyz'

xyz_exists = os.path.isfile(dataset_file_name)
if xyz_exists and args.overwrite:
    print(
        ui.color_str('[INFO]', bold=True) +
        ' Overwriting existing xyz dataset file.')
if not xyz_exists or args.overwrite:
    print(
        ui.color_str('[INFO]', bold=True) +
        ' Writing dataset to \'{}\'...'.format(dataset_file_name))
else:
    sys.exit(
        ui.color_str('[FAIL]', fore_color=ui.RED, bold=True) +
        ' Dataset \'{}\' already exists.'.format(dataset_file_name))

R = dataset['R']
z = dataset['z']
F = dataset['F']

lattice = dataset['lattice'] if 'lattice' in dataset else None
예제 #3
0
파일: models.py 프로젝트: fonsecag/MLFF
def sgdml_all_default(train_indices, args):
    from sgdml.cli import create, train, validate, select, test
    from sgdml.utils import ui, io

    ui.print_step_title("STEP 1", "Cross-validation task creation")
    task_dir = create(**args)
    dataset = args["dataset"][1]

    if (train_indices is not None) and not (type(train_indices) == int):
        #  CHANGE TRAINING INDICES
        #  AND RELATED ARRAYS
        R_train = dataset["R"][train_indices]
        F_train = dataset["F"][train_indices]
        E_train = dataset["E"][train_indices]

        for file in os.listdir(task_dir):
            if file.endswith(".npz"):
                name = os.path.join(task_dir, file)
                a = dict(np.load(name, allow_pickle=True))
                a["R_train"] = R_train
                a["F_train"] = F_train
                if "E_train" in a:
                    a["E_train"] = E_train
                a["idxs_train"] = train_indices
                np.savez_compressed(name, **a)

    ui.print_step_title("STEP 2", "Training")
    task_dir_arg = io.is_dir_with_file_type(task_dir, "task")
    args["task_dir"] = task_dir_arg
    model_dir_or_file_path = train(**args)

    ui.print_step_title("STEP 3", "Validation")
    model_dir_arg = io.is_dir_with_file_type(model_dir_or_file_path,
                                             "model",
                                             or_file=True)

    valid_dataset = args["valid_dataset"]
    validate(
        model_dir_arg,
        valid_dataset,
        overwrite=False,
        max_processes=args["max_processes"],
        use_torch=args["use_torch"],
    )

    ui.print_step_title("STEP 4", "Hyper-parameter selection")
    model_file_name = select(model_dir_arg, args["overwrite"],
                             args["max_processes"], args["model_file"])

    ui.print_step_title("STEP 5", "Testing")
    model_dir_arg = io.is_dir_with_file_type(model_file_name,
                                             "model",
                                             or_file=True)
    test_dataset = args["test_dataset"]

    test(
        model_dir_arg,
        test_dataset,
        args["n_test"],
        overwrite=False,
        max_processes=args["max_processes"],
        use_torch=args["use_torch"],
    )

    print("\n" + ui.color_str(
        "  DONE  ", fore_color=ui.BLACK, back_color=ui.GREEN, bold=True) +
          " Training assistant finished sucessfully.")
    print("         This is your model file: '{}'".format(model_file_name))

    if "glob" in globals():
        global glob
        del glob
예제 #4
0
    '-o',
    '--overwrite',
    dest='overwrite',
    action='store_true',
    help='overwrite existing dataset file',
)
args = parser.parse_args()
dataset = args.dataset

name = os.path.splitext(os.path.basename(dataset.name))[0]
dataset_file_name = name + '.npz'

dataset_exists = os.path.isfile(dataset_file_name)
if dataset_exists and args.overwrite:
    print(
        ui.color_str('[INFO]', bold=True) +
        ' Overwriting existing dataset file.')
if not dataset_exists or args.overwrite:
    print('Writing dataset to \'{}\'...'.format(dataset_file_name))
else:
    sys.exit(
        ui.color_str('[FAIL]', fore_color=ui.RED, bold=True) +
        ' Dataset \'{}\' already exists.'.format(dataset_file_name))

mols = read(dataset.name, index=':')

lattice, R, z, E, F = None, None, None, None, None

calc = mols[0].get_calculator()

print("\rNumber geometries found: {:,}\n".format(len(mols)))
예제 #5
0
    '-o',
    '--overwrite',
    dest='overwrite',
    action='store_true',
    help='overwrite existing dataset file',
)
args = parser.parse_args()
dataset = args.dataset


name = os.path.splitext(os.path.basename(dataset.name))[0]
dataset_file_name = name + '.npz'

dataset_exists = os.path.isfile(dataset_file_name)
if dataset_exists and args.overwrite:
    print(ui.color_str('[INFO]', bold=True) + ' Overwriting existing dataset file.')
if not dataset_exists or args.overwrite:
    print('Writing dataset to \'{}\'...'.format(dataset_file_name))
else:
    sys.exit(
        ui.color_str('[FAIL]', fore_color=ui.RED, bold=True)
        + ' Dataset \'{}\' already exists.'.format(dataset_file_name)
    )

lattice, R, z, E, F = None, None, None, None, None

mols = read(dataset.name, index=':')
calc = mols[0].get_calculator()
is_extxyz = calc is not None
if is_extxyz:
예제 #6
0
def sgdml_all_default(train_indices, args):
    from sgdml.cli import create, train, validate, select, test
    from sgdml.utils import ui, io

    ui.print_step_title('STEP 1', 'Cross-validation task creation')
    task_dir = create(**args)
    dataset = args['dataset'][1]

    if (train_indices is not None) and not (type(train_indices) == int):
        # CHANGE TRAINING INDICES
        # AND RELATED ARRAYS
        R_train = dataset['R'][train_indices]
        F_train = dataset['F'][train_indices]
        E_train = dataset['E'][train_indices]

        for file in os.listdir(task_dir):
            if file.endswith('.npz'):
                name = os.path.join(task_dir, file)
                a = dict(np.load(name, allow_pickle=True))
                a['R_train'] = R_train
                a['F_train'] = F_train
                if 'E_train' in a:
                    a['E_train'] = R_train
                a['idxs_train'] = train_indices
                np.savez_compressed(name, **a)

    ui.print_step_title('STEP 2', 'Training')
    task_dir_arg = io.is_dir_with_file_type(task_dir, 'task')
    args['task_dir'] = task_dir_arg
    model_dir_or_file_path = train(**args)

    ui.print_step_title('STEP 3', 'Validation')
    model_dir_arg = io.is_dir_with_file_type(model_dir_or_file_path,
                                             'model',
                                             or_file=True)

    valid_dataset = args['valid_dataset']
    validate(
        model_dir_arg,
        valid_dataset,
        overwrite=False,
        max_processes=args['max_processes'],
        use_torch=args['use_torch'],
    )

    ui.print_step_title('STEP 4', 'Hyper-parameter selection')
    model_file_name = select(model_dir_arg, args['overwrite'],
                             args['max_processes'], args['model_file'])

    ui.print_step_title('STEP 5', 'Testing')
    model_dir_arg = io.is_dir_with_file_type(model_file_name,
                                             'model',
                                             or_file=True)
    test_dataset = args['test_dataset']

    test(
        model_dir_arg,
        test_dataset,
        args['n_test'],
        overwrite=False,
        max_processes=args['max_processes'],
        use_torch=args['use_torch'],
    )

    print('\n' + ui.color_str(
        '  DONE  ', fore_color=ui.BLACK, back_color=ui.GREEN, bold=True) +
          ' Training assistant finished sucessfully.')
    print('         This is your model file: \'{}\''.format(model_file_name))

    if "glob" in globals():
        global glob
        del glob