def read_nonstd_ext_xyz(f): n_atoms = None R, z, E, F = [], [], [], [] for i, line in enumerate(f): line = line.strip() if not n_atoms: n_atoms = int(line) print('Number atoms per geometry: {:,}'.format(n_atoms)) file_i, line_i = divmod(i, n_atoms + 2) if line_i == 1: try: e = float(line) except ValueError: pass else: E.append(e) cols = line.split() if line_i >= 2: R.append(list(map(float, cols[1:4]))) if file_i == 0: # first molecule z.append(io._z_str_to_z_dict[cols[0]]) F.append(list(map(float, cols[4:7]))) if file_i % 1000 == 0: sys.stdout.write('\rNumber geometries found so far: {:,}'.format(file_i)) sys.stdout.flush() sys.stdout.write('\rNumber geometries found so far: {:,}'.format(file_i)) sys.stdout.flush() print() R = np.array(R).reshape(-1, n_atoms, 3) z = np.array(z) E = None if not E else np.array(E) F = np.array(F).reshape(-1, n_atoms, 3) if F.shape[0] != R.shape[0]: sys.exit( ui.color_str('[FAIL]', fore_color=ui.RED, bold=True) + ' Force labels are missing from dataset or are incomplete!' ) f.close() return (R, z, E, F)
'--overwrite', dest='overwrite', action='store_true', help='overwrite existing xyz dataset file', ) args = parser.parse_args() dataset_path, dataset = args.dataset name = os.path.splitext(os.path.basename(dataset_path))[0] dataset_file_name = name + '.xyz' xyz_exists = os.path.isfile(dataset_file_name) if xyz_exists and args.overwrite: print( ui.color_str('[INFO]', bold=True) + ' Overwriting existing xyz dataset file.') if not xyz_exists or args.overwrite: print( ui.color_str('[INFO]', bold=True) + ' Writing dataset to \'{}\'...'.format(dataset_file_name)) else: sys.exit( ui.color_str('[FAIL]', fore_color=ui.RED, bold=True) + ' Dataset \'{}\' already exists.'.format(dataset_file_name)) R = dataset['R'] z = dataset['z'] F = dataset['F'] lattice = dataset['lattice'] if 'lattice' in dataset else None
def sgdml_all_default(train_indices, args): from sgdml.cli import create, train, validate, select, test from sgdml.utils import ui, io ui.print_step_title("STEP 1", "Cross-validation task creation") task_dir = create(**args) dataset = args["dataset"][1] if (train_indices is not None) and not (type(train_indices) == int): # CHANGE TRAINING INDICES # AND RELATED ARRAYS R_train = dataset["R"][train_indices] F_train = dataset["F"][train_indices] E_train = dataset["E"][train_indices] for file in os.listdir(task_dir): if file.endswith(".npz"): name = os.path.join(task_dir, file) a = dict(np.load(name, allow_pickle=True)) a["R_train"] = R_train a["F_train"] = F_train if "E_train" in a: a["E_train"] = E_train a["idxs_train"] = train_indices np.savez_compressed(name, **a) ui.print_step_title("STEP 2", "Training") task_dir_arg = io.is_dir_with_file_type(task_dir, "task") args["task_dir"] = task_dir_arg model_dir_or_file_path = train(**args) ui.print_step_title("STEP 3", "Validation") model_dir_arg = io.is_dir_with_file_type(model_dir_or_file_path, "model", or_file=True) valid_dataset = args["valid_dataset"] validate( model_dir_arg, valid_dataset, overwrite=False, max_processes=args["max_processes"], use_torch=args["use_torch"], ) ui.print_step_title("STEP 4", "Hyper-parameter selection") model_file_name = select(model_dir_arg, args["overwrite"], args["max_processes"], args["model_file"]) ui.print_step_title("STEP 5", "Testing") model_dir_arg = io.is_dir_with_file_type(model_file_name, "model", or_file=True) test_dataset = args["test_dataset"] test( model_dir_arg, test_dataset, args["n_test"], overwrite=False, max_processes=args["max_processes"], use_torch=args["use_torch"], ) print("\n" + ui.color_str( " DONE ", fore_color=ui.BLACK, back_color=ui.GREEN, bold=True) + " Training assistant finished sucessfully.") print(" This is your model file: '{}'".format(model_file_name)) if "glob" in globals(): global glob del glob
'-o', '--overwrite', dest='overwrite', action='store_true', help='overwrite existing dataset file', ) args = parser.parse_args() dataset = args.dataset name = os.path.splitext(os.path.basename(dataset.name))[0] dataset_file_name = name + '.npz' dataset_exists = os.path.isfile(dataset_file_name) if dataset_exists and args.overwrite: print( ui.color_str('[INFO]', bold=True) + ' Overwriting existing dataset file.') if not dataset_exists or args.overwrite: print('Writing dataset to \'{}\'...'.format(dataset_file_name)) else: sys.exit( ui.color_str('[FAIL]', fore_color=ui.RED, bold=True) + ' Dataset \'{}\' already exists.'.format(dataset_file_name)) mols = read(dataset.name, index=':') lattice, R, z, E, F = None, None, None, None, None calc = mols[0].get_calculator() print("\rNumber geometries found: {:,}\n".format(len(mols)))
'-o', '--overwrite', dest='overwrite', action='store_true', help='overwrite existing dataset file', ) args = parser.parse_args() dataset = args.dataset name = os.path.splitext(os.path.basename(dataset.name))[0] dataset_file_name = name + '.npz' dataset_exists = os.path.isfile(dataset_file_name) if dataset_exists and args.overwrite: print(ui.color_str('[INFO]', bold=True) + ' Overwriting existing dataset file.') if not dataset_exists or args.overwrite: print('Writing dataset to \'{}\'...'.format(dataset_file_name)) else: sys.exit( ui.color_str('[FAIL]', fore_color=ui.RED, bold=True) + ' Dataset \'{}\' already exists.'.format(dataset_file_name) ) lattice, R, z, E, F = None, None, None, None, None mols = read(dataset.name, index=':') calc = mols[0].get_calculator() is_extxyz = calc is not None if is_extxyz:
def sgdml_all_default(train_indices, args): from sgdml.cli import create, train, validate, select, test from sgdml.utils import ui, io ui.print_step_title('STEP 1', 'Cross-validation task creation') task_dir = create(**args) dataset = args['dataset'][1] if (train_indices is not None) and not (type(train_indices) == int): # CHANGE TRAINING INDICES # AND RELATED ARRAYS R_train = dataset['R'][train_indices] F_train = dataset['F'][train_indices] E_train = dataset['E'][train_indices] for file in os.listdir(task_dir): if file.endswith('.npz'): name = os.path.join(task_dir, file) a = dict(np.load(name, allow_pickle=True)) a['R_train'] = R_train a['F_train'] = F_train if 'E_train' in a: a['E_train'] = R_train a['idxs_train'] = train_indices np.savez_compressed(name, **a) ui.print_step_title('STEP 2', 'Training') task_dir_arg = io.is_dir_with_file_type(task_dir, 'task') args['task_dir'] = task_dir_arg model_dir_or_file_path = train(**args) ui.print_step_title('STEP 3', 'Validation') model_dir_arg = io.is_dir_with_file_type(model_dir_or_file_path, 'model', or_file=True) valid_dataset = args['valid_dataset'] validate( model_dir_arg, valid_dataset, overwrite=False, max_processes=args['max_processes'], use_torch=args['use_torch'], ) ui.print_step_title('STEP 4', 'Hyper-parameter selection') model_file_name = select(model_dir_arg, args['overwrite'], args['max_processes'], args['model_file']) ui.print_step_title('STEP 5', 'Testing') model_dir_arg = io.is_dir_with_file_type(model_file_name, 'model', or_file=True) test_dataset = args['test_dataset'] test( model_dir_arg, test_dataset, args['n_test'], overwrite=False, max_processes=args['max_processes'], use_torch=args['use_torch'], ) print('\n' + ui.color_str( ' DONE ', fore_color=ui.BLACK, back_color=ui.GREEN, bold=True) + ' Training assistant finished sucessfully.') print(' This is your model file: \'{}\''.format(model_file_name)) if "glob" in globals(): global glob del glob