Exemple #1
0
def test_radial_gaussian():
    from pyscf import dft, gto
    mol = gto.M(atom='O  0  0  0; H  0 1 0 ; H 0 0 1', basis='6-31g')
    mf = dft.RKS(mol)
    # mf.xc = 'LDA'
    mf.grids.level = 5
    mf.kernel()

    basis_instructions = {
        "basis": {
            "file": os.path.join(test_dir, "basis-test")
        },
        'projector': 'gaussian',
        'grid': 'analytical'
    }

    basis_instructions = ConfigFile({
        "engine": {
            "application": 'pyscf'
        },
        "preprocessor": basis_instructions
    })['preprocessor']

    print(basis_instructions)
    projector = xc.projector.DensityProjector(
        mol=mol, basis_instructions=basis_instructions)
    coeff_analytical = projector.get_basis_rep(mf.make_rdm1())

    basis_instructions = {
        "basis": {
            "file": os.path.join(test_dir, "basis-test"),
            'sigma': 20
        },
        'projector': 'gaussian',
        'grid': 'radial'
    }

    basis_instructions = ConfigFile({
        "engine": {
            "application": 'pyscf'
        },
        "preprocessor": basis_instructions
    })['preprocessor']

    rho = pyscf.dft.numint.get_rho(mf._numint, mol, mf.make_rdm1(), mf.grids)

    projector = xc.projector.DensityProjector(
        basis_instructions=basis_instructions,
        grid_coords=mf.grids.coords,
        grid_weights=mf.grids.weights)
    coeff_grid = projector.get_basis_rep(
        rho,
        np.array([[0, 0, 0], [0, 1, 0], [0, 0, 1]]) / Bohr, ['X', 'X', 'X'])
    assert np.allclose(np.linalg.norm(coeff_analytical['X']),
                       np.linalg.norm(coeff_grid['X']))
Exemple #2
0
def plot_basis(basis):
    """ Plots a set of basis functions specified in .json file"""

    basis_instructions = ConfigFile(basis)
    projector = xc.projector.DensityProjector(
        unitcell=np.eye(3),
        grid=np.ones(3),
        basis_instructions=basis_instructions['preprocessor'])

    for spec in projector.basis:
        if not len(spec) == 1: continue
        basis = projector.basis[spec]
        if isinstance(basis, list):
            r = torch.from_numpy(
                np.linspace(
                    0, np.max([np.max(b_) for b in basis for b_ in b['r_o']]),
                    500))
        else:
            r = torch.from_numpy(np.linspace(0, np.max(basis['r_o']), 500))
        W = projector.get_W(basis)
        radials = projector.radials(r, basis, W=W)
        for l, rad in enumerate(radials):
            if not isinstance(rad, list):
                rad = [rad]
            for ir, rl in enumerate(rad):
                if ir == 0:
                    plt.plot(r,
                             rl,
                             label='l = {}'.format(l),
                             color='C{}'.format(l))
                else:
                    plt.plot(r, rl, color='C{}'.format(l))
        # plt.ylim(0,1)
        plt.legend()
        plt.show()
Exemple #3
0
def test_gaussian_projector(torch=''):
    density_getter = xc.utils.SiestaDensityGetter(binary=True)
    rho, unitcell, grid = density_getter.get_density(
        os.path.join(test_dir, 'h2o.RHO'))

    basis_instructions = {
        "basis": {
            "file": os.path.join(test_dir, "basis-test"),
            'sigma': 2
        },
        'projector': 'gaussian',
        'grid': 'euclidean'
    }

    basis_instructions = ConfigFile({
        "engine": {
            "application": 'siesta'
        },
        "preprocessor": basis_instructions
    })['preprocessor']

    density_projector = xc.projector.DensityProjector(
        unitcell=unitcell, grid=grid, basis_instructions=basis_instructions)

    positions = np.array([[0.0, 0.0, 0.0], [-0.75846035, -0.59257417, 0.0],
                          [0.75846035, -0.59257417, 0.0]]) / xc.constants.Bohr

    basis_rep = density_projector.get_basis_rep(rho,
                                                positions=positions,
                                                species=['X', 'X', 'X'])

    with open(os.path.join(test_dir, 'h2o_gaussian_rep.pckl'), 'rb') as file:
        ref = pickle.load(file)
    for spec in basis_rep:
        assert np.allclose(basis_rep[spec], ref[spec])
Exemple #4
0
def test_radial_projector():
    from pyscf import dft, gto
    mol = gto.M(atom='O  0  0  0; H  0 1 0 ; H 0 0 1', basis='6-31g*')
    mf = dft.RKS(mol)
    mf.xc = 'PBE'
    mf.grids.level = 5
    mf.kernel()

    rho = pyscf.dft.numint.get_rho(mf._numint, mol, mf.make_rdm1(), mf.grids)

    basis_instructions = {
        'basis': {
            'O': {
                'n': 2,
                'l': 3,
                'r_o': 1
            },
            'H': {
                'n': 2,
                'l': 2,
                'r_o': 1.5
            }
        },
        'projector': 'ortho',
        'grid': 'radial'
    }

    basis_instructions = ConfigFile({
        "engine": {
            "application": 'pyscf'
        },
        "preprocessor": basis_instructions
    })['preprocessor']

    density_projector = xc.projector.DensityProjector(
        grid_coords=mf.grids.coords,
        grid_weights=mf.grids.weights,
        basis_instructions=basis_instructions)

    positions = np.array([[0.0, 0.0, 0.0], [0, 1, 0.0], [0, 0, 1]
                          ]) / xc.constants.Bohr

    basis_rep = density_projector.get_basis_rep(rho,
                                                positions=positions,
                                                species=['O', 'H', 'H'])

    if save_test_radial_projector:
        with open(os.path.join(test_dir, 'h2o_rad.pckl'), 'wb') as file:
            pickle.dump(basis_rep, file)
    with open(os.path.join(test_dir, 'h2o_rad.pckl'), 'rb') as file:
        basis_rep_ref = pickle.load(file)

    for spec in basis_rep:
        assert np.allclose(basis_rep[spec], basis_rep_ref[spec])
Exemple #5
0
def test_pre():
    try:
        shutil.rmtree(test_dir + '/driver_data_tmp')
    except:
        pass
    os.chdir(test_dir)
    shcopytree(test_dir + '/driver_data', test_dir + '/driver_data_tmp')
    cwd = os.getcwd()
    os.chdir(test_dir + '/driver_data_tmp')

    run_engine_driver('benzene_small.traj',
                      'pre_rad.json',
                      workdir='workdir_engine')

    pre_driver('benzene_small.traj', 'workdir_engine', 'pre_rad.json',
               'data.hdf5/test/test')
    pre = ConfigFile('pre_rad.json')
    pre['preprocessor']['grad'] = 1
    open('pre_rad.json', 'w').write(json.dumps(pre.__dict__))
    pre_driver('benzene_small.traj', 'workdir_engine', 'pre_rad.json',
               'data.hdf5/test/test1')

    pre = ConfigFile('pre_rad.json')
    pre['preprocessor']['grad'] = 2
    open('pre_rad.json', 'w').write(json.dumps(pre.__dict__))
    pre_driver('benzene_small.traj', 'workdir_engine', 'pre_rad.json',
               'data.hdf5/test/test2')

    with h5py.File('data.hdf5', 'r') as f:
        for hashkey in f['/test/test/density']:
            data0 = f['/test/test/density/' + hashkey][:]
        for hashkey in f['/test/test1/density']:
            data1 = f['/test/test1/density/' + hashkey][:]
        for hashkey in f['/test/test2/density']:
            data2 = f['/test/test2/density/' + hashkey][:]

    assert data0.shape[-1] * 2 == data1.shape[-1]
    assert data0.shape[-1] * 4 == data2.shape[-1]
    os.chdir(cwd)
    shutil.rmtree(test_dir + '/driver_data_tmp')
Exemple #6
0
def get_grid_cv(hdf5, preprocessor, inputfile, spec_agnostic=False):
    if isinstance(preprocessor, str):
        pre = ConfigFile(preprocessor)
    else:
        pre = preprocessor
    inp = json.loads(open(inputfile, 'r').read())

    with h5py.File(hdf5[0], 'r') as datafile:
        if not isinstance(hdf5[1], list):
            hdf5[1] = [hdf5[1]]

        all_species = []
        for set in hdf5[1]:
            all_species.append(''.join(find_attr_in_tree(datafile, set, 'species')))

    if pre:
        basis = pre['preprocessor']
    else:
        basis = {spec: {'n': 1, 'l': 1, 'r_o': 1} for spec in ''.join(all_species)}
        basis.update({'extension': 'RHOXC'})

    pipeline = get_default_pipeline(basis,
                                    all_species,
                                    symmetrizer_type=pre.get('symmetrizer_type', 'trace'),
                                    spec_agnostic=spec_agnostic)

    if 'hyperparameters' in inp:
        hyper = inp['hyperparameters']
    else:
        print('No hyperparameters specified, fitting default pipeline to data')

    hyper = to_full_hyperparameters(hyper, pipeline.get_params())

    cv = inp.get('cv', 2)
    n_jobs = inp.get('n_jobs', 1)
    verbose = inp.get('verbose', 1)

    pipe = Pipeline([('ml', pipeline)])
    grid_cv = GridSearchCV(pipe, hyper, cv=cv, n_jobs=n_jobs, refit=True, verbose=verbose, return_train_score=True)
    return grid_cv
Exemple #7
0
    def __init__(self, path):
        model_paths = glob(path + '/*')
        for mp in model_paths:
            if 'bas.json' == os.path.basename(mp):
                mp = json.loads(open(mp, 'r').read())

                self.basis = ConfigFile({
                    'preprocessor': mp,
                    'engine': {
                        'application': 'pyscf'
                    }
                })['preprocessor']

        super().__init__(path)
Exemple #8
0
def run_engine_driver(xyz, preprocessor, workdir='.tmp/'):

    pre = make_nested_absolute(ConfigFile(preprocessor))
    try:
        os.mkdir(workdir)
    except FileExistsError:
        pass

    driver(read(xyz, ':'),
           pre['engine'].pop('application', 'siesta'),
           workdir=workdir,
           nworkers=pre.get('n_workers', 1),
           kwargs=pre.get('engine', {}))
    # shutil.move(workdir + '/results.traj', './results.traj')
    shutil.copy(workdir + '/results.traj', './results.traj')
    if workdir == '.tmp/':
        shutil.rmtree(workdir)
Exemple #9
0
def test_gaussian_serialized():
    density_getter = xc.utils.SiestaDensityGetter(binary=True)
    rho, unitcell, grid = density_getter.get_density(
        os.path.join(test_dir, 'h2o.RHO'))

    basis_instructions = {
        "basis": {
            "file": os.path.join(test_dir, "basis-test"),
            'sigma': 2
        },
        'projector': 'gaussian',
        'grid': 'euclidean'
    }

    basis_instructions = ConfigFile({
        "engine": {
            "application": 'siesta'
        },
        "preprocessor": basis_instructions
    })['preprocessor']

    density_projector = xc.projector.DensityProjector(
        unitcell=unitcell, grid=grid, basis_instructions=basis_instructions)

    basis_models, projector_models = xc.ml.pipeline.serialize_projector(
        density_projector)

    my_box = torch.Tensor([[0, grid[i]] for i in range(3)])
    unitcell = torch.from_numpy(unitcell).double()
    grid = torch.from_numpy(grid).double()
    positions = torch.from_numpy(
        np.array([[0.0, 0.0, 0.0], [-0.75846035, -0.59257417, 0.0],
                  [0.75846035, -0.59257417, 0.0]]) / xc.constants.Bohr)
    rho = torch.from_numpy(rho)
    coeffs = []
    for pos in positions:
        rad, ang, box = basis_models['X'](pos, unitcell, grid, my_box)
        coeffs.append(projector_models['X'](rho, pos, unitcell, grid, rad, ang,
                                            box).detach().numpy())

    basis_rep = {'X': np.array(coeffs)}

    with open(os.path.join(test_dir, 'h2o_gaussian_rep.pckl'), 'rb') as file:
        ref = pickle.load(file)
    for spec in basis_rep:
        assert np.allclose(basis_rep[spec], ref[spec])
Exemple #10
0
def basis_to_hash(basis):
    """
    Convert a given basis to a unique identifier

    Parameters
    ---------

    basis: dict
        Contains the basis like so : {'species1': {'n': 1, 'l': 2}...}

    Returns
    --------

    hash: str
        Encoding of the basis set
    """
    try:
        return basis.get_hash()
    except AttributeError:
        return ConfigFile({"preprocessor": basis}).get_hash()
Exemple #11
0
def merge_data_driver(file, base, ref, out, optE0=False, pre=''):

    if pre:
        pre = ConfigFile(pre)
        basis_key = basis_to_hash(pre['basis'])
    else:
        basis_key = None

    datafile = h5py.File(file, 'a')

    if optE0:
        E0 = opt_E0(datafile, base, ref)
    else:
        print('Warning: E0 is not being optimzed for merged dataset. Might produce' +\
        'unexpected behavior')

    merge_sets(datafile, base, basis_key, new_name=out + '/base', E0=E0)
    for key in E0:
        E0[key] = 0

    merge_sets(datafile, ref, None, new_name=out + '/ref', E0=E0)
Exemple #12
0
def sample_driver(preprocessor, size, hdf5, dest='sample.npy', cutoff=0.0):
    """ Given a dataset, perform sampling in feature space"""

    pre = make_nested_absolute(ConfigFile(preprocessor))

    datafile = h5py.File(hdf5[0], 'r')
    basis = pre['preprocessor']
    basis_key = basis_to_hash(basis)
    data = load_sets(datafile, hdf5[1], hdf5[1], basis_key, cutoff)
    symmetrizer_instructions = {'symmetrizer_type': pre.get('symmetrizer_type', 'trace')}
    symmetrizer_instructions.update({'basis': basis})
    species = [''.join(find_attr_in_tree(datafile, hdf5[1], 'species'))]

    sampler_pipeline = get_default_pipeline(basis,
                                            species,
                                            symmetrizer_type=symmetrizer_instructions['symmetrizer_type'],
                                            pca_threshold=1)

    sampler_pipeline = Pipeline(sampler_pipeline.steps)
    sampler_pipeline.steps[-1] = ('sampler', SampleSelector(size))
    sampler_pipeline.fit(data)
    sample = sampler_pipeline.predict(data)
    np.save(dest, np.array(sample).flatten())
Exemple #13
0
def pre_driver(xyz, srcdir, preprocessor, dest='.tmp/'):
    """ Preprocess electron densities obtained from electronic structure
    calculations
    """
    preprocessor_path = preprocessor
    pre = ConfigFile(preprocessor)
    pre = make_nested_absolute(pre)

    atoms = read(xyz, ':')

    preprocessor = get_preprocessor(pre, atoms, srcdir)

    if 'hdf5' in dest:
        dest_split = dest.split('/')
        file, system, method = dest_split + [''] * (3 - len(dest_split))
        workdir = '.tmp'
        delete_workdir = True
    else:
        workdir = dest
        delete_workdir = False

    try:
        os.mkdir(workdir)
    except FileExistsError:
        delete_workdir = False

    basis_grid = get_basis_grid(pre)['preprocessor__basis_instructions']

    for basis_instr in basis_grid:
        preprocessor.basis_instructions = basis_instr
        if basis_instr.get('projector', 'ortho') == 'gaussian':
            if isinstance(basis_instr['basis'], dict):
                try:
                    bas = basis_instr['basis']['file']
                except KeyError:
                    bas = basis_instr['basis']['name']
            else:
                bas = basis_instr['basis']
            real_basis = get_real_basis(atoms,
                                        bas,
                                        spec_agnostic=basis_instr.get(
                                            'spec_agnostic', False))
            for key in real_basis:
                basis_instr[key] = real_basis[key]
            pre.update({'preprocessor': basis_instr})
            open(preprocessor_path, 'w').write(json.dumps(pre.__dict__))

        filename = os.path.join(workdir, basis_to_hash(basis_instr) + '.npy')
        data = preprocessor.fit_transform(None)
        np.save(filename, data)
        if 'hdf5' in dest:
            add_data_driver(hdf5=file,
                            system=system,
                            method=method,
                            density=filename,
                            add=[],
                            traj=xyz,
                            override=True)

            f = h5py.File(file)
            f[system].attrs.update({'species': preprocessor.species_string})
            f.close()
    if delete_workdir:
        shutil.rmtree(workdir)
Exemple #14
0
def test_jacobs_projector(rad_type, grid_type):

    projector_type = rad_type + grid_type
    positions = np.array([[0.0, 0.0, 0.0], [-0.75846035, -0.59257417, 0.0],
                          [0.75846035, -0.59257417, 0.0]]) / xc.constants.Bohr

    if grid_type == 'euclidean':
        application = 'siesta'
    else:
        application = 'pyscf'

    if rad_type == 'ortho':
        basis_instructions = {
            'basis': {
                'n': 2,
                'l': 3,
                'r_o': 1
            },
            'projector': rad_type,
            'grid': grid_type,
            'grad': 1
        }
    else:
        basis_instructions = {
            "projector": rad_type,
            "grid": grid_type,
            "basis": {
                "file": os.path.join(test_dir, "basis-test"),
                "sigma": 2
            },
            'grad': 1
        }

    basis_instructions = ConfigFile({
        "engine": {
            "application": application
        },
        "preprocessor": basis_instructions
    })['preprocessor']
    print(basis_instructions)

    if grid_type == 'radial':
        from pyscf import dft, gto
        mol = gto.M(atom='O  0  0  0; H  0 1 0 ; H 0 0 1', basis='6-31g*')
        mf = dft.RKS(mol)
        mf.xc = 'PBE'
        mf.grids.level = 5
        mf.kernel()
        rho = pyscf.dft.numint.get_rho(mf._numint, mol, mf.make_rdm1(),
                                       mf.grids)
        print('Rho shape', rho.shape)
        print('Weights shape', mf.grids.weights.shape)
        density_projector = xc.projector.DensityProjector(
            grid_coords=mf.grids.coords,
            grid_weights=mf.grids.weights,
            basis_instructions=basis_instructions)
    else:
        density_getter = xc.utils.SiestaDensityGetter(binary=True)
        rho, unitcell, grid = density_getter.get_density(
            os.path.join(test_dir, 'h2o.RHO'))
        density_projector = xc.projector.DensityProjector(
            unitcell=unitcell,
            grid=grid,
            basis_instructions=basis_instructions)

    rho = np.stack([rho, rho])

    basis_rep = density_projector.get_basis_rep(rho,
                                                positions=positions,
                                                species=['X', 'X', 'X'])
    for key, val in basis_rep.items():
        l = val.shape[-1] // 2
        assert np.allclose(val[..., :l], val[..., l:])

    if rad_type == 'ortho':
        symmetrize_instructions = {
            'symmetrizer_type': 'trace',
            'basis': basis_instructions
        }
        sym = xc.symmetrizer.Symmetrizer(symmetrize_instructions)
        D = sym.get_symmetrized(basis_rep)['X']
        l = D.shape[-1] // 2
        assert np.allclose(D[:, :l], D[:, l:])
Exemple #15
0
def fit_driver(preprocessor, hyper, hdf5=None, sets='', sample='', cutoff=0.0, model='', hyperopt=False):
    """ Fits a NXCPipeline to the provided data
    """
    inputfile = hyper
    if sets != '':
        hdf5 = parse_sets_input(sets)

    pre = make_nested_absolute(ConfigFile(preprocessor))
    basis_key = basis_to_hash(pre['preprocessor'])
    if 'gaussian' in pre['preprocessor'].get('projector_type','ortho')\
        and pre['preprocessor'].get('spec_agnostic',False):
        pre['preprocessor'].update(get_real_basis(None, pre['preprocessor']['X']['basis'], True))


    # A * in hdf5 (if sets != '') indicates that the predictions of a pretrained
    # model should be subtracted from the stored baseline energies
    # This is relevant for self-consistent training
    apply_to = []
    for pidx, path in enumerate(hdf5[1]):
        if path[0] == '*':
            apply_to.append(pidx)
            hdf5[1][pidx] = path[1:]

    grid_cv = get_grid_cv(hdf5, pre, inputfile, spec_agnostic=pre['preprocessor'].get('spec_agnostic', False))

    new_model = grid_cv.estimator
    param_grid = grid_cv.param_grid
    param_grid = {key: param_grid[key][0] for key in param_grid}
    new_model.set_params(**param_grid)

    if model:
        hyperopt = False
        new_model.steps[-1][1].steps[2:] = xc.ml.network.load_pipeline(model).steps

    datafile = h5py.File(hdf5[0], 'r')
    data = load_sets(datafile, hdf5[1], hdf5[2], basis_key, cutoff)

    if model:
        for set in apply_to:
            selection = (data[:, 0] == set)
            prediction = new_model.predict(data)[set][:, 0]
            print('Dataset {} old STD: {}'.format(set, np.std(data[selection][:, -1])))
            data[selection, -1] += prediction
            print('Dataset {} new STD: {}'.format(set, np.std(data[selection][:, -1])))

    if sample != '':
        sample = np.load(sample)
        data = data[sample]
        print("Using sample of size {}".format(len(sample)))

    np.random.shuffle(data)
    if hyperopt:
        estimator = grid_cv
    else:
        estimator = new_model

    real_targets = np.array(data[:, -1]).real.flatten()

    estimator.fit(data)

    dev = estimator.predict(data)[0].flatten() - real_targets
    dev0 = np.abs(dev - np.mean(dev))
    results = {
        'mean deviation': np.mean(dev).round(4),
        'rmse': np.std(dev).round(4),
        'mae': np.mean(dev0).round(4),
        'max': np.max(dev0).round(4)
    }

    if hyperopt:
        bp = estimator.best_params_
        bp = {key[len('ml__'):]: bp[key] for key in bp}
        open('best_params.json', 'w').write(json.dumps({'hyperparameters': bp}, indent=4))
        pd.DataFrame(estimator.cv_results_).to_csv('cv_results.csv')
        best_estimator = estimator.best_estimator_.steps[-1][1].start_at(2)
        best_estimator.save('best_model', True)
    else:
        estimator = estimator.steps[-1][1]
        estimator.start_at(2).save('best_model', True)
    return results
Exemple #16
0
def sc_driver(xyz,
              preprocessor,
              hyper,
              data='',
              maxit=5,
              tol=0.0005,
              sets='',
              nozero=False,
              model0='',
              hyperopt=False,
              keep_itdata=False):

    xyz = os.path.abspath(xyz)
    pre = make_nested_absolute(ConfigFile(preprocessor))
    engine_kwargs = pre.get('engine', {})
    if sets:
        sets = os.path.abspath(sets)

    # ============ Start from pre-trained model ================
    # serialize it for self-consistent deployment but keep original version
    # to continue training it
    model0_orig = ''
    if model0:
        if model0 == 'model0.jit':
            raise Exception('Please choose a different name/path for model0 as it' +\
            ' model0.jit would be overwritten by this routine')
        serialize(in_path=model0, jit_path='model0.jit', as_radial=False)

        model0_orig = model0
        model0_orig = os.path.abspath(model0_orig)

        model0 = 'model0.jit'
        model0 = os.path.abspath(model0)

        engine_kwargs = {'nxc': model0}
        engine_kwargs.update(pre.get('engine', {}))

    # If not nozero, automatically aligns energies between reference and
    # baseline data by removing mean deviation
    if nozero:
        E0 = 0
    else:
        E0 = None

    #============= Iteration 0 =================
    # Initial self-consistent calculation either with model0 or baseline method only
    # if not neuralxc model provided. Hyperparameter optimization done in first fit
    # and are kept for subsequent iterations.
    print('\n====== Iteration 0 ======')
    print('\nRunning SCF calculations ...')
    print('-----------------------------\n')
    mkdir('sc')
    shcopy(preprocessor, 'sc/pre.json')
    shcopy(hyper, 'sc/hyper.json')
    os.chdir('sc')

    iteration = 0
    if model0:
        open('sets.inp', 'w').write('data.hdf5 \n *system/it{} \t system/ref'.format(iteration))
    else:
        open('sets.inp', 'w').write('data.hdf5 \n system/it{} \t system/ref'.format(iteration))

    if sets:
        open('sets.inp', 'a').write('\n' + open(sets, 'r').read())
    mkdir('workdir')
    driver(read(xyz, ':'),
           pre['preprocessor'].get('application', 'siesta'),
           workdir='workdir',
           nworkers=pre.get('n_workers', 1),
           kwargs=engine_kwargs)
    print('\nProjecting onto basis ...')
    print('-----------------------------\n')
    pre_driver(xyz, 'workdir', preprocessor='pre.json', dest='data.hdf5/system/it{}'.format(iteration))
    add_data_driver(hdf5='data.hdf5',
                    system='system',
                    method='it0',
                    add=['energy'],
                    traj='workdir/results.traj',
                    override=True,
                    zero=E0)
    add_data_driver(hdf5='data.hdf5', system='system', method='ref', add=['energy'], traj=xyz, override=True, zero=E0)
    print('\nBaseline accuracy')
    print('-----------------------------\n')
    statistics_sc = \
    eval_driver(hdf5=['data.hdf5','system/it{}'.format(iteration),
            'system/ref'])

    open('statistics_sc', 'w').write(json.dumps(statistics_sc))
    print('\nFitting initial ML model ...')
    print('-----------------------------\n')
    statistics_fit = fit_driver(preprocessor='pre.json',
                                hyper='hyper.json',
                                model=model0_orig,
                                sets='sets.inp',
                                hyperopt=hyperopt)

    open('statistics_fit', 'w').write(json.dumps(statistics_fit))

    #=================== Iterations > 0 ==============
    it_label = 1
    for it_label in range(1, maxit + 1):
        if keep_itdata:
            iteration = it_label
        else:
            iteration = 0

        print('\n\n====== Iteration {} ======'.format(it_label))
        open('sets.inp', 'w').write('data.hdf5 \n *system/it{} \t system/ref'.format(iteration))

        if sets:
            open('sets.inp', 'a').write('\n' + open(sets, 'r').read())
        mkdir('workdir')

        shcopytreedel('best_model', 'model_it{}'.format(it_label))
        serialize('model_it{}'.format(it_label), 'model_it{}.jit'.format(it_label),
                  'radial' in pre['preprocessor'].get('projector_type', 'ortho'))

        engine_kwargs = {'nxc': '../../model_it{}.jit'.format(it_label), 'skip_calculated': False}
        engine_kwargs.update(pre.get('engine', {}))

        print('\nRunning SCF calculations ...')
        print('-----------------------------\n')
        driver(read(xyz, ':'),
               pre['preprocessor'].get('application', 'siesta'),
               workdir='workdir',
               nworkers=pre.get('n_workers', 1),
               kwargs=engine_kwargs)

        print('\nProjecting onto basis...')
        print('-----------------------------\n')
        pre_driver(xyz, 'workdir', preprocessor='pre.json', dest='data.hdf5/system/it{}'.format(iteration))

        add_data_driver(hdf5='data.hdf5',
                        system='system',
                        method='it{}'.format(iteration),
                        add=['energy'],
                        traj='workdir/results.traj',
                        override=True,
                        zero=E0)
        print('\nResults')
        print('-----------------------------\n')

        statistics_sc = \
        eval_driver(hdf5=['data.hdf5','system/it{}'.format(iteration),
                'system/ref'], printout=False)

        open('statistics_sc', 'a').write('\n' + json.dumps(statistics_sc))
        open('model_it{}/statistics_sc'.format(it_label), 'w').write('\n' + json.dumps(statistics_sc))
        results_df = pd.DataFrame([json.loads(line) for line in open('statistics_sc','r')])
        results_df.index.name = 'Iteration'
        print(results_df.to_markdown())
        print('')

        statistics_fit = fit_driver(preprocessor='pre.json', hyper='hyper.json', model='best_model', sets='sets.inp')
        open('statistics_fit', 'a').write('\n' + json.dumps(statistics_fit))

        if abs(statistics_fit['mae'] - statistics_sc['mae']) <= tol:
            print('=============== Self consistent training converged ============')
            break

    os.chdir('..')
    print('====== Testing ======\n')
    testfile = ''
    if os.path.isfile('testing.xyz'):
        testfile = '../testing.xyz'
    if os.path.isfile('testing.traj'):
        testfile = '../testing.traj'

    if testfile:
        mkdir('testing')

        shcopy('sc/data.hdf5'.format(iteration), 'testing/data.hdf5')
        shcopytree('sc/model_it{}.jit'.format(it_label), 'testing/nxc.jit')
        shcopytree('sc/model_it{}'.format(it_label), 'final_model/')
        shcopytree('sc/model_it{}.jit'.format(it_label), 'final_model.jit/')
        os.chdir('testing')
        mkdir('workdir')
        engine_kwargs = {'nxc': '../../nxc.jit'}
        engine_kwargs.update(pre.get('engine', {}))
        driver(read(testfile, ':'),
               pre['preprocessor'].get('application', 'siesta'),
               workdir='workdir',
               nworkers=pre.get('n_workers', 1),
               kwargs=engine_kwargs)
        add_data_driver(hdf5='data.hdf5',
                        system='system',
                        method='testing/ref',
                        add=['energy'],
                        traj=testfile,
                        override=True,
                        zero=E0)
        add_data_driver(hdf5='data.hdf5',
                        system='system',
                        method='testing/nxc',
                        add=['energy'],
                        traj='workdir/results.traj',
                        override=True,
                        zero=E0)

        print('\nTest results...')
        print('-----------------------------\n')

        statistics_test = eval_driver(hdf5=['data.hdf5', 'system/testing/nxc', 'system/testing/ref'])
        open('statistics_test', 'w').write(json.dumps(statistics_test))
        os.chdir('..')
    else:
        print('testing.traj or testing.xyz not found.')