def run_amp(fin): images = ase.io.read(fin, index=':') print(len(images)) calc = Amp(descriptor=Gaussian(), model=NeuralNetwork(hiddenlayers=(10, 10, 10))) calc.model.lossfunction = LossFunction(convergence={'energy_rmse': 0.001}) calc.model.lossfunction = LossFunction(force_coefficient=-0.1) calc.train(images=images, overwrite=True)
def calc_train_images(images, HL, E_conv, f_conv, ncore): Hidden_Layer=tuple(HL) print("Hidden Layer: {}".format(Hidden_Layer)) print("Energy convergence: {}".format(E_conv)) calc = Amp(descriptor=Gaussian(), model=NeuralNetwork(hiddenlayers=Hidden_Layer), cores=ncore) if f_conv <= 0.0: calc.model.lossfunction = LossFunction(convergence={'energy_rmse': E_conv}) else: calc.model.lossfunction = LossFunction(convergence={'energy_rmse': E_conv,'force_rmse':f_conv}) #calc.model.lossfunction = LossFunction(force_coefficient=-0.1) calc.train(images=images, overwrite=True) return
def train_data(images, setup_only=False): label = 'nodeplot_test/calc' train_images = images calc = Amp(descriptor=Gaussian(), model=NeuralNetwork(hiddenlayers=(5, 5)), label=label, cores=1) loss = LossFunction(convergence={'energy_rmse': 0.02, 'force_rmse': 0.02}) calc.model.lossfunction = loss if not setup_only: calc.train(images=train_images, ) for image in train_images: print("energy =", calc.get_potential_energy(image)) print("forces =", calc.get_forces(image)) else: images = hash_images(train_images) calc.descriptor.calculate_fingerprints(images=images, log=calc._log, parallel={'cores': 1}, calculate_derivatives=False) calc.model.fit(trainingimages=images, descriptor=calc.descriptor, log=calc._log, parallel={'cores': 1}, only_setup=True) return calc
def create_calc(self, label, dblabel): amp_label = os.path.join(self.calc_dir, label) amp_dblabel = os.path.join(self.calc_dir, dblabel) amp_name = amp_label + ".amp" if not os.path.exists(amp_name): print("Creating calculator {}...".format(amp_name)) loss_function = LossFunction( convergence=self.convergence, energy_coefficient=self.energy_coefficient, force_coefficient=self.force_coefficient, overfit=self.overfit, ) model = NeuralNetwork( hiddenlayers=self.hidden_layers, activation=self.activation, lossfunction=loss_function, weights=None, scalings=None, prescale=True, ) descriptor = Gaussian(cutoff=self.cutoff, Gs=self.Gs, fortran=True) calc = Amp(descriptor=descriptor, model=model, label=amp_label, dblabel=amp_dblabel) return calc else: print("Calculator {} already exists!".format(amp_name)) calc = Amp.load(amp_name, label=amp_label, dblabel=amp_dblabel) return calc
def re_train_images(images, HL, E_conv): Hidden_Layer=tuple(HL) print("Hidden Layer: {}".format(Hidden_Layer)) print("Energy convergence: {}".format(E_conv)) calc = Amp.load("amp.amp", cores=20) calc.model.lossfunction = LossFunction(convergence={'energy_rmse': E_conv}) calc.train(images=images, overwrite=True)
def train_test(): label = 'train_test_g5/calc' train_images = generate_data(2) elements = ['Pt', 'Cu'] G = make_symmetry_functions(elements=elements, type='G2', etas=np.logspace(np.log10(0.05), np.log10(5.), num=4)) G += make_symmetry_functions(elements=elements, type='G5', etas=[0.005], zetas=[1., 4.], gammas=[+1., -1.]) G = {element: G for element in elements} calc = Amp(descriptor=Gaussian(Gs=G), model=NeuralNetwork(hiddenlayers=(3, 3)), label=label, cores=1) loss = LossFunction(convergence={'energy_rmse': 0.02, 'force_rmse': 0.03}) calc.model.lossfunction = loss calc.train(images=train_images, ) for image in train_images: print("energy = %s" % str(calc.get_potential_energy(image))) print("forces = %s" % str(calc.get_forces(image))) # Test that we can re-load this calculator and call it again. del calc calc2 = Amp.load(label + '.amp') for image in train_images: print("energy = %s" % str(calc2.get_potential_energy(image))) print("forces = %s" % str(calc2.get_forces(image)))
def train_images(images, HL, E_conv): Hidden_Layer = tuple(HL) calc = Amp(descriptor=Gaussian(), model=NeuralNetwork(hiddenlayers=Hidden_Layer)) calc.model.lossfunction = LossFunction(convergence={'energy_rmse': E_conv}) #calc.model.lossfunction = LossFunction(force_coefficient=-0.1) calc.train(images=images, overwrite=True)
def train_model(energy_coeff=1.0, force_training=False, force_coeff=0.2): shuffle(image_list) # "Load balancing" if force_training: # slow train with energy and forces? MLIP.model.lossfunction = LossFunction(energy_coefficient=energy_coeff, force_coefficient=force_coeff, convergence={ 'energy_rmse': 0.1, 'force_rmse': 0.6 }) else: MLIP.model.lossfunction = LossFunction(convergence={ 'energy_rmse': 0.001, 'overfit': overfit_penalty }, force_coefficient=None, overfit=overfit_penalty) MLIP.train(images=image_list) MLIP.save(potential_file, overwrite=True)
def calc_train_images(images, HL, E_conv, f_conv, f_coeff, ncore): Hidden_Layer=tuple(HL) print("Hidden Layer: {}".format(Hidden_Layer)) print("Energy convergence: {}".format(E_conv)) cores={'localhost':ncore} # 'localhost' depress SSH, communication between nodes calc = Amp(descriptor=Gaussian(), model=NeuralNetwork(hiddenlayers=Hidden_Layer), cores=cores) if f_conv <= 0.0: convergence={'energy_rmse': E_conv} else: convergence={'energy_rmse': E_conv, 'force_rmse':f_conv} calc.model.lossfunction = LossFunction(convergence=convergence, force_coefficient=f_coeff) #calc.model.lossfunction = LossFunction(force_coefficient=-0.1) calc.train(images=images, overwrite=True) return
def test(): "FingerprintPlot test.""" generate_data(2, filename='fpplot-training.traj') calc = Amp(descriptor=Gaussian(), model=NeuralNetwork(), label='fpplot-test' ) calc.model.lossfunction = LossFunction(convergence={'energy_rmse': 1.00, 'force_rmse': 1.00}) calc.train(images='fpplot-training.traj') images = ase.io.Trajectory('fpplot-training.traj') fpplot = FingerprintPlot(calc) fpplot(images) fpplot(images, overlay=images[0]) fpplot(images, overlay=[images[1][2], images[0][-1]])
def calc_train_images(images, HL, E_conv, f_conv, f_coeff, ncore, amp_pot=None): Hidden_Layer=tuple(HL) print("Hidden Layer: {}".format(Hidden_Layer)) print("Energy convergence: {}".format(E_conv)) cores={'localhost':ncore} # 'localhost' depress SSH, communication between nodes ### load "amp.amp" if amp_pot: calc = Amp.load(amp_pot) calc = Amp(descriptor=Gaussian(), model=NeuralNetwork(hiddenlayers=Hidden_Layer), cores=cores) ### Global Search in Param Space Annealer(calc=calc, images=images, Tmax=20, Tmin=1, steps=4000) if f_conv <= 0.0: E_maxresid = E_conv*3 #convergence={'energy_rmse': E_conv} convergence={'energy_rmse': E_conv, 'energy_maxresid': E_maxresid} else: convergence={'energy_rmse': E_conv, 'force_rmse':f_conv} calc.model.lossfunction = LossFunction(convergence=convergence, force_coefficient=f_coeff) # setting calc.train(images=images, overwrite=True) return
def train_test(): """Gaussian/Neural train test.""" label = 'train_test/calc' train_images = generate_data(2) calc = Amp(descriptor=Gaussian(), model=NeuralNetwork(hiddenlayers=(3, 3)), label=label, cores=1) loss = LossFunction(convergence={'energy_rmse': 0.02, 'force_rmse': 0.03}) calc.model.lossfunction = loss calc.train(images=train_images, ) for image in train_images: print("energy = %s" % str(calc.get_potential_energy(image))) print("forces = %s" % str(calc.get_forces(image))) # Test that we can re-load this calculator and call it again. del calc calc2 = Amp.load(label + '.amp') for image in train_images: print("energy = %s" % str(calc2.get_potential_energy(image))) print("forces = %s" % str(calc2.get_forces(image)))
def non_periodic_0th_bfgs_step_test(): """Gaussian/Neural training non-periodic standard test. Compares results to that expected from separate mathematica calculations. """ images = [ Atoms(symbols='PdOPd2', pbc=np.array([False, False, False], dtype=bool), cell=np.array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]]), positions=np.array([[0., 0., 0.], [0., 2., 0.], [0., 0., 3.], [1., 0., 0.]])), Atoms(symbols='PdOPd2', pbc=np.array([False, False, False], dtype=bool), cell=np.array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]]), positions=np.array([[0., 1., 0.], [1., 2., 1.], [-1., 1., 2.], [1., 3., 2.]])), Atoms(symbols='PdO', pbc=np.array([False, False, False], dtype=bool), cell=np.array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]]), positions=np.array([[2., 1., -1.], [1., 2., 1.]])), Atoms(symbols='Pd2O', pbc=np.array([False, False, False], dtype=bool), cell=np.array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]]), positions=np.array([[-2., -1., -1.], [1., 2., 1.], [3., 4., 4.]])), Atoms(symbols='Cu', pbc=np.array([False, False, False], dtype=bool), cell=np.array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]]), positions=np.array([[0., 0., 0.]])) ] for image in images: image.set_calculator(EMT()) image.get_potential_energy(apply_constraint=False) image.get_forces(apply_constraint=False) # Parameters Gs = { 'O': [{ 'type': 'G2', 'element': 'Pd', 'eta': 0.8 }, { 'type': 'G4', 'elements': ['Pd', 'Pd'], 'eta': 0.2, 'gamma': 0.3, 'zeta': 1 }, { 'type': 'G4', 'elements': ['O', 'Pd'], 'eta': 0.3, 'gamma': 0.6, 'zeta': 0.5 }], 'Pd': [{ 'type': 'G2', 'element': 'Pd', 'eta': 0.2 }, { 'type': 'G4', 'elements': ['Pd', 'Pd'], 'eta': 0.9, 'gamma': 0.75, 'zeta': 1.5 }, { 'type': 'G4', 'elements': ['O', 'Pd'], 'eta': 0.4, 'gamma': 0.3, 'zeta': 4 }], 'Cu': [{ 'type': 'G2', 'element': 'Cu', 'eta': 0.8 }, { 'type': 'G4', 'elements': ['Cu', 'O'], 'eta': 0.2, 'gamma': 0.3, 'zeta': 1 }, { 'type': 'G4', 'elements': ['Cu', 'Cu'], 'eta': 0.3, 'gamma': 0.6, 'zeta': 0.5 }] } hiddenlayers = {'O': (2, ), 'Pd': (2, ), 'Cu': (2, )} weights = OrderedDict([ ('O', OrderedDict([(1, np.matrix([[-2.0, 6.0], [3.0, -3.0], [1.5, -0.9], [-2.5, -1.5]])), (2, np.matrix([[5.5], [3.6], [1.4]]))])), ('Pd', OrderedDict([(1, np.matrix([[-1.0, 3.0], [2.0, 4.2], [1.0, -0.7], [-3.0, 2.0]])), (2, np.matrix([[4.0], [0.5], [3.0]]))])), ('Cu', OrderedDict([(1, np.matrix([[0.0, 1.0], [-1.0, -2.0], [2.5, -1.9], [-3.5, 0.5]])), (2, np.matrix([[0.5], [1.6], [-1.4]]))])) ]) scalings = OrderedDict([ ('O', OrderedDict([('intercept', -2.3), ('slope', 4.5)])), ('Pd', OrderedDict([('intercept', 1.6), ('slope', 2.5)])), ('Cu', OrderedDict([('intercept', -0.3), ('slope', -0.5)])) ]) # Correct values if aseversion < 12: # EMT values have changed from 3.12.0 version ref_loss = 7144.8107853579895 ref_energyloss = (24.318837496016506**2.) * 5 ref_forceloss = (144.70282477494519**2.) * 5 ref_dloss_dparameters = np.array([ 0, 0, 0, 0, 0, 0, 0.01374139170953901, 0.36318423812749656, 0.028312691567496464, 0.6012336354445753, 0.9659002689921986, -1.289777005924742, -0.5718960934643078, -2.642566722179569, -1.196039924610482, 0, 0, -2.72563797131018, -0.9080181024866707, -0.7739948323226851, -0.29157894253717415, -2.0599829042717404, -0.6156374289895887, -0.006086517460749253, -0.829678548408266, 0.0008092646745710161, 0.04161302703491613, 0.0034264690790135606, -0.957800456897051, -0.006281929606579444, -0.2883588477371198, -4.245777410962108, -4.3174120941045535, -8.02385959091948, -3.240512651984099, -27.289862194988853, -26.8177742762544, -82.45107056051073, -80.68167683508715 ]) ref_energy_maxresid = 54.21915548269209 ref_force_maxresid = 791.6736436232306 else: ref_loss = 7144.807220773296 ref_energyloss = (24.318829702548342**2.) * 5 ref_forceloss = (144.70279593472887**2.) * 5 ref_dloss_dparameters = np.array([ 0, 0, 0, 0, 0, 0, 0.01374139170953901, 0.36318423812749656, 0.028312691567496464, 0.6012336354445753, 0.9659002689921986, -1.2897765357544038, -0.5718958286530584, -2.642565840915077, -1.1960394346870424, 0, 0, -2.7256370964673238, -0.9080177898160631, -0.7739945904033205, -0.29157882294526083, -2.0599825024556027, -0.6156371996742152, -0.006086514109432934, -0.8296782839032163, 0.0008092653341775424, 0.04161306816722683, 0.0034264692325982156, -0.9578001030483714, -0.006281927374160914, -0.28835874344086, -4.245775886469167, -4.317410633818672, -8.02385959091948, -3.240512651984099, -27.289853042932705, -26.81776520493048, -82.45104200076496, -80.68164887277251 ]) ref_energy_maxresid = 54.21913802238612 ref_force_maxresid = 791.6734866205463 # Testing pure-python and fortran versions of Gaussian-neural on different # number of processes for fortran in [False, True]: for cores in range(1, 6): label = 'train-nonperiodic/%s-%i' % (fortran, cores) print(label) calc = Amp(descriptor=Gaussian( cutoff=6.5, Gs=Gs, fortran=fortran, ), model=NeuralNetwork( hiddenlayers=hiddenlayers, weights=weights, scalings=scalings, activation='sigmoid', regressor=regressor, fortran=fortran, ), label=label, dblabel=label, cores=cores) lossfunction = LossFunction(convergence=convergence) calc.model.lossfunction = lossfunction calc.train(images=images, ) diff = abs(calc.model.lossfunction.loss - ref_loss) print("diff at 204 =", diff) assert (diff < 10.**(-10.)), \ 'Calculated value of loss function is wrong!' diff = abs(calc.model.lossfunction.energy_loss - ref_energyloss) assert (diff < 10.**(-10.)), \ 'Calculated value of energy per atom RMSE is wrong!' diff = abs(calc.model.lossfunction.force_loss - ref_forceloss) assert (diff < 10 ** (-10.)), \ 'Calculated value of force RMSE is wrong!' diff = abs(calc.model.lossfunction.energy_maxresid - ref_energy_maxresid) assert (diff < 10.**(-10.)), \ 'Calculated value of energy per atom max residual is wrong!' diff = abs(calc.model.lossfunction.force_maxresid - ref_force_maxresid) assert (diff < 10 ** (-10.)), \ 'Calculated value of force max residual is wrong!' for _ in range(len(ref_dloss_dparameters)): diff = abs(calc.model.lossfunction.dloss_dparameters[_] - ref_dloss_dparameters[_]) assert(diff < 10 ** (-12.)), \ "Calculated value of loss function derivative is wrong!" dblabel = label secondlabel = '_' + label calc = Amp(descriptor=Gaussian( cutoff=6.5, Gs=Gs, fortran=fortran, ), model=NeuralNetwork( hiddenlayers=hiddenlayers, weights=weights, scalings=scalings, activation='sigmoid', regressor=regressor, fortran=fortran, ), label=secondlabel, dblabel=dblabel, cores=cores) lossfunction = LossFunction(convergence=convergence) calc.model.lossfunction = lossfunction calc.train(images=images, ) diff = abs(calc.model.lossfunction.loss - ref_loss) assert (diff < 10.**(-10.)), \ 'Calculated value of loss function is wrong!' diff = abs(calc.model.lossfunction.energy_loss - ref_energyloss) assert (diff < 10.**(-10.)), \ 'Calculated value of energy per atom RMSE is wrong!' diff = abs(calc.model.lossfunction.force_loss - ref_forceloss) assert (diff < 10 ** (-10.)), \ 'Calculated value of force RMSE is wrong!' diff = abs(calc.model.lossfunction.energy_maxresid - ref_energy_maxresid) assert (diff < 10.**(-10.)), \ 'Calculated value of energy per atom max residual is wrong!' diff = abs(calc.model.lossfunction.force_maxresid - ref_force_maxresid) assert (diff < 10 ** (-10.)), \ 'Calculated value of force max residual is wrong!' for _ in range(len(ref_dloss_dparameters)): diff = abs(calc.model.lossfunction.dloss_dparameters[_] - ref_dloss_dparameters[_]) assert(diff < 10 ** (-12.)), \ 'Calculated value of loss function derivative is wrong!'
timestep=timestep, ) label = "energy-trained" dblabel = label + "-train" calc = trn.create_calc(label=label, dblabel=dblabel) ann = Annealer(calc=calc, images=train_traj, Tmax=20, Tmin=1, steps=2000, train_forces=False) amp_name = trn.train_calc(calc, train_traj) label = os.path.join("calcs", "force-trained") dblabel = label + "-train" calc = Amp.load(amp_name, label=label, dblabel=dblabel) convergence = { "energy_rmse": 1e-16, "force_rmse": 1e-16, "max_steps": max_steps } loss_function = LossFunction( convergence=convergence, energy_coefficient=1.0, force_coefficient=0.1, overfit=overfit, ) calc.model.lossfunction = loss_function amp_name = trn.train_calc(calc, train_force_traj)
def plot_sensitivity(load, images, d=0.0001, label='sensitivity', dblabel=None, plotfile=None, overwrite=False, energy_coefficient=1.0, force_coefficient=0.04): """Returns the plot of loss function in terms of perturbed parameters. Takes the load file and images. Any other keyword taken by the Amp calculator can be fed to this class also. Parameters ---------- load : str Path for loading an existing ".amp" file. Should be fed like 'load="filename.amp"'. images : list or str List of ASE atoms objects with positions, symbols, energies, and forces in ASE format. This can also be the path to an ASE trajectory (.traj) or database (.db) file. Energies can be obtained from any reference, e.g. DFT calculations. d : float The amount of perturbation in each parameter. label : str Default prefix/location used for all files. dblabel : str Optional separate prefix/location of database files, including fingerprints, fingerprint primes, and neighborlists, to avoid calculating them. If not supplied, just uses the value from label. plotfile : Object File for the plot. overwrite : bool If a plot or an script containing values found overwrite it. energy_coefficient : float Coefficient of energy loss in the total loss function. force_coefficient : float Coefficient of force loss in the total loss function. """ from amp.model import LossFunction calc = Amp.load(file=load) if plotfile is None: plotfile = make_filename(label, '-plot.pdf') if (not overwrite) and os.path.exists(plotfile): raise IOError('File exists: %s.\nIf you want to overwrite,' ' set overwrite=True or manually delete.' % plotfile) calc.dblabel = label if dblabel is None else dblabel if force_coefficient == 0.: calculate_derivatives = False else: calculate_derivatives = True calc._log('\nAmp sensitivity analysis started. ' + now() + '\n') calc._log('Descriptor: %s' % calc.descriptor.__class__.__name__) calc._log('Model: %s' % calc.model.__class__.__name__) images = hash_images(images) calc._log('\nDescriptor\n==========') calc.descriptor.calculate_fingerprints( images=images, parallel=calc._parallel, log=calc._log, calculate_derivatives=calculate_derivatives) vector = calc.model.vector.copy() lossfunction = LossFunction( energy_coefficient=energy_coefficient, force_coefficient=force_coefficient, parallel=calc._parallel, ) calc.model.lossfunction = lossfunction # Set up local loss function. calc.model.lossfunction.attach_model( calc.model, fingerprints=calc.descriptor.fingerprints, fingerprintprimes=calc.descriptor.fingerprintprimes, images=images) originalloss = calc.model.lossfunction.get_loss(vector, lossprime=False)['loss'] calc._log('\n Perturbing parameters...', tic='perturb') allparameters = [] alllosses = [] num_parameters = len(vector) for count in range(num_parameters): calc._log('parameter %i out of %i' % (count + 1, num_parameters)) parameters = [] losses = [] # parameter is perturbed -d and loss function calculated. vector[count] -= d parameters.append(vector[count]) perturbedloss = calc.model.lossfunction.get_loss( vector, lossprime=False)['loss'] losses.append(perturbedloss) vector[count] += d parameters.append(vector[count]) losses.append(originalloss) # parameter is perturbed +d and loss function calculated. vector[count] += d parameters.append(vector[count]) perturbedloss = calc.model.lossfunction.get_loss( vector, lossprime=False)['loss'] losses.append(perturbedloss) allparameters.append(parameters) alllosses.append(losses) # returning back to the original value. vector[count] -= d calc._log('...parameters perturbed and loss functions calculated', toc='perturb') calc._log('Plotting loss function vs perturbed parameters...', tic='plot') with PdfPages(plotfile) as pdf: count = 0 for parameter in vector: fig = pyplot.figure() ax = fig.add_subplot(111) ax.plot( allparameters[count], alllosses[count], marker='o', linestyle='--', color='b', ) xmin = allparameters[count][0] - \ 0.1 * (allparameters[count][-1] - allparameters[count][0]) xmax = allparameters[count][-1] + \ 0.1 * (allparameters[count][-1] - allparameters[count][0]) ymin = min(alllosses[count]) - \ 0.1 * (max(alllosses[count]) - min(alllosses[count])) ymax = max(alllosses[count]) + \ 0.1 * (max(alllosses[count]) - min(alllosses[count])) ax.set_xlim([xmin, xmax]) ax.set_ylim([ymin, ymax]) ax.set_xlabel('parameter no %i' % count) ax.set_ylabel('loss function') pdf.savefig(fig) pyplot.close(fig) count += 1 calc._log(' ...loss functions plotted.', toc='plot')
def calc_rmse(calc_paths, images, cores=None, dblabel=None, energy_coefficient=1.0, force_coefficient=0.04, ): """Calculates energy and force RMSEs for a set of Amp calculators. All calculators must have the same descriptors and models. Parameters ---------- calc_paths : list List of paths for loading existing ".amp" files. images : list or str List of ASE atoms objects with positions, symbols, energies, and forces in ASE format. This can also be the path to an ASE trajectory (.traj) or database (.db) file. Energies can be obtained from any reference, e.g. DFT calculations. cores : int Can specify cores to use for parallel processing; if None, will determine from environment dblabel : str Optional separate prefix/location of database files, including fingerprints, fingerprint primes, and neighborlists, to avoid calculating them. If not supplied, just uses the value from label. energy_coefficient : float Coefficient of energy loss in the total loss function. force_coefficient : float Coefficient of force loss in the total loss function. """ from amp.model import LossFunction calcs = [] calc = Amp.load(file=calc_paths[0], cores=cores, dblabel=dblabel) calcs.append(calc) for i in range(1,len(calc_paths)): calcs.append(Amp.load(file=calc_paths[i], cores=cores, dblabel=dblabel, logging=False)) calc._log('Loaded file: %s' % calc_paths[i]) if force_coefficient == 0.: calculate_derivatives = False convergence = {'energy_rmse': 0.001} else: calculate_derivatives = True convergence = {'energy_rmse': 0.001, 'force_rmse': 0.01} # Setting the convergence is a kludgy way to keep LossFunction.__init__() # from resetting the force_coefficient to 0 calc._log('\nAmp calc_rmse started. ' + now() + '\n') calc._log('Descriptor: %s' % calc.descriptor.__class__.__name__) calc._log('Model: %s' % calc.model.__class__.__name__) images = hash_images(images) calc._log('\nDescriptor\n==========') calc.descriptor.calculate_fingerprints( images=images, parallel=calc._parallel, log=calc._log, calculate_derivatives=calculate_derivatives) lossfunction = LossFunction(energy_coefficient=energy_coefficient, force_coefficient=force_coefficient, parallel=calc._parallel, raise_ConvergenceOccurred=False, convergence=convergence, ) calc.model.lossfunction = lossfunction if force_coefficient == 0.: calc.model.lossfunction.attach_model( calc.model, log=calc._log, fingerprints=calc.descriptor.fingerprints, images=images) else: calc.model.lossfunction.attach_model( calc.model, log=calc._log, fingerprints=calc.descriptor.fingerprints, fingerprintprimes=calc.descriptor.fingerprintprimes, images=images) steps, loss, energy_loss, force_loss = [], [], [], [] energy_maxresid, force_maxresid, energy_rmse, force_rmse = [], [], [], [] for i in range(len(calc_paths)): steps.append(int(os.path.basename(calc_paths[i])[:-4])) vector = calcs[i].model.vector.copy() results = calc.model.lossfunction.get_loss( vector, lossprime=calculate_derivatives) loss.append(results['loss']) energy_loss.append(results['energy_loss']) force_loss.append(results['force_loss']) energy_maxresid.append(results['energy_maxresid']) force_maxresid.append(results['force_maxresid']) energy_rmse.append(np.sqrt(energy_loss[i] / len(images))) if force_coefficient == 0.: calc._log('%5i %19s %12.4e %10.4e %10.4e' % (steps[i], now(), loss[i], energy_rmse[i], energy_maxresid[i])) else: force_rmse.append(np.sqrt(force_loss[i] / len(images))) calc._log('%5i %19s %12.4e %10.4e ' ' %10.4e %10.4e %10.4e' % (steps[i], now(), loss[i], energy_rmse[i], energy_maxresid[i], force_rmse[i] ,force_maxresid[i])) data = {} data['steps'] = steps data['loss'] = loss data['energy_loss'] = energy_loss data['force_loss'] = force_loss data['energy_maxresid'] = energy_maxresid data['force_maxresid'] = force_maxresid data['energy_rmse'] = energy_rmse if force_coefficient != 0.: data['force_rmse'] = force_rmse return data
def test(): """Gaussian/Neural numeric-analytic consistency.""" images = generate_data() regressor = Regressor(optimizer='BFGS') _G = make_symmetry_functions(type='G2', etas=[0.05, 5.], elements=['Cu', 'Pt']) _G += make_symmetry_functions(type='G4', etas=[0.005], zetas=[1., 4.], gammas=[1.], elements=['Cu', 'Pt']) Gs = {'Cu': _G, 'Pt': _G} calc = Amp(descriptor=Gaussian(Gs=Gs), model=NeuralNetwork( hiddenlayers=(2, 1), regressor=regressor, randomseed=42, ), cores=1) step = 0 for d in [None, 0.00001]: for fortran in [True, False]: for cores in [1, 2]: step += 1 label = \ 'numeric_analytic_test/analytic-%s-%i' % (fortran, cores) \ if d is None \ else 'numeric_analytic_test/numeric-%s-%i' \ % (fortran, cores) print(label) loss = LossFunction(convergence={ 'energy_rmse': 10**10, 'force_rmse': 10**10 }, d=d) calc.set_label(label) calc.dblabel = 'numeric_analytic_test/analytic-True-1' calc.model.lossfunction = loss calc.descriptor.fortran = fortran calc.model.fortran = fortran calc.cores = cores calc.train(images=images, ) if step == 1: ref_energies = [] ref_forces = [] for image in images: ref_energies += [calc.get_potential_energy(image)] ref_forces += [calc.get_forces(image)] ref_dloss_dparameters = \ calc.model.lossfunction.dloss_dparameters else: energies = [] forces = [] for image in images: energies += [calc.get_potential_energy(image)] forces += [calc.get_forces(image)] dloss_dparameters = \ calc.model.lossfunction.dloss_dparameters for image_no in range(2): diff = abs(energies[image_no] - ref_energies[image_no]) assert (diff < 10.**(-13.)), \ 'The calculated value of energy of image %i is ' \ 'wrong!' % (image_no + 1) for atom_no in range(len(images[0])): for i in range(3): diff = abs(forces[image_no][atom_no][i] - ref_forces[image_no][atom_no][i]) assert (diff < 10.**(-10.)), \ 'The calculated %i force of atom %i of ' \ 'image %i is wrong!' \ % (i, atom_no, image_no + 1) # Checks analytical and numerical dloss_dparameters for _ in range(len(ref_dloss_dparameters)): diff = abs(dloss_dparameters[_] - ref_dloss_dparameters[_]) assert(diff < 10 ** (-10.)), \ 'The calculated value of loss function ' \ 'derivative is wrong!' # Checks analytical and numerical forces forces = [] for image in images: image.set_calculator(calc) forces += [calc.calculate_numerical_forces(image, d=d)] for atom_no in range(len(images[0])): for i in range(3): diff = abs(forces[image_no][atom_no][i] - ref_forces[image_no][atom_no][i]) print('{:3d} {:1d} {:7.1e}'.format(atom_no, i, diff)) assert (diff < 10.**(-6.)), \ 'The calculated %i force of atom %i of ' \ 'image %i is wrong! (Diff = %f)' \ % (i, atom_no, image_no + 1, diff)
def train_amp(baseframe=200, traj='ethane.traj', convergence={ 'energy_rmse': 0.25, 'force_rmse': 0.5 }, elements=['C', 'H', 'O'], cores=4): """Gaussian/tflow train test.""" p = ple() label = 'amp' all_images = Trajectory(traj) nimg, mean_e = get_mean_energy(all_images) G = make_symmetry_functions(elements=elements, type='G2', etas=np.logspace(np.log10(0.05), np.log10(5.), num=4)) G += make_symmetry_functions(elements=elements, type='G5', etas=[0.005], zetas=[1., 4.], gammas=[+1., -1.]) G = {element: G for element in elements} # Gs=G if not isfile('amp.amp'): # print('\nset up calculator ...\n') calc = Amp(descriptor=Gaussian(mode='atom-centered', Gs=G), model=NeuralNetwork(hiddenlayers=(1024, 1024, 1024, 512, 512, 256, 256, 256, 256, 128, 128), convergenceCriteria=convergence, activation='tanh', energy_coefficient=1.0, force_coefficient=None, optimizationMethod='ADAM', parameters={'energyMeanScale': mean_e}, maxTrainingEpochs=100000), label=label, cores=cores) # 'l-BFGS-b' or 'ADAM' trained_images = [all_images[j] for j in range(0, baseframe)] calc.train(overwrite=True, images=trained_images) del calc else: calc = Amp.load('amp.amp') calc.model.parameters['convergence'] = convergence calc.model.lossfunction = LossFunction(convergence=convergence) trained_images = [all_images[j] for j in range(0, baseframe)] calc.train(overwrite=True, images=trained_images) del calc edfts, eamps, eamps_ = [], [], [] dolabel = True basestep = int(baseframe / tframe) system('epstopdf energies.eps') p.scatter(x, edft, eamp, eamp_, dolabel=dolabel) p.plot() plot_energies(edfts, eamps, eamp_=eamps_) system('epstopdf energies_scatter.eps')
def test(): """Guassian/Neural training. Checks consistency of pure-python and fortran versions. """ images = make_images() convergence = {'energy_rmse': 10.**10., 'energy_maxresid': 10.**10., 'force_rmse': 10.**10., 'force_maxresid': 10.**10., } regressor = Regressor(optimizer='BFGS') count = 0 for fortran in [False, True]: for cores in range(1, 2): string = 'consistgauss/%s-%i' label = string % (fortran, cores) calc = Amp(descriptor=Gaussian(cutoff=cutoff, Gs=Gs, fortran=fortran,), model=NeuralNetwork(hiddenlayers=hiddenlayers, weights=weights, scalings=scalings, activation=activation, fprange=fingerprints_range, regressor=regressor,), label=label, cores=1) lossfunction = LossFunction(convergence=convergence) calc.model.lossfunction = lossfunction calc.train(images=images,) if count == 0: ref_loss = calc.model.lossfunction.loss ref_energy_loss = calc.model.lossfunction.energy_loss ref_force_loss = calc.model.lossfunction.force_loss ref_dloss_dparameters = \ calc.model.lossfunction.dloss_dparameters else: assert (abs(calc.model.lossfunction.loss - ref_loss) < 10.**(-10.)), \ '''Loss function value for %r fortran, and %i cores is not consistent with the value of python version on single core.''' % (fortran, cores) assert (abs(calc.model.lossfunction.energy_loss - ref_energy_loss) < 10.**(-9.)), \ '''Energy rmse value for %r fortran, and %i cores is not consistent with the value of python version on single core.''' % (fortran, cores) assert (abs(calc.model.lossfunction.force_loss - ref_force_loss) < 10.**(-9.)), \ '''Force rmse value for %r fortran, and %i cores is not consistent with the value of python version on single core.''' % (fortran, cores) for _ in range(len(ref_dloss_dparameters)): assert (calc.model.lossfunction.dloss_dparameters[_] - ref_dloss_dparameters[_] < 10.**(-10.)) '''Derivative of the cost function for %r fortran, and %i cores is not consistent with the value of python version on single core. ''' % (fortran, cores) count = count + 1
from ase.calculators.emt import EMT initial = 'initial.traj' final = 'final.traj' Gs = None n = 5 cutoff = 6.5 amp_calc = Amp(descriptor=Gaussian(cutoff=cutoff, fortran=True, Gs=Gs), model=NeuralNetwork(hiddenlayers=(n, n), fortran=True, checkpoints=None)) convergence = {'energy_rmse': 0.0001, 'force_rmse': 0.01} amp_calc.model.lossfunction = LossFunction(convergence=convergence) dft_calc = EMT() neb = accelerate_neb(initial=initial, final=final, tolerance=0.05, maxiter=200, fmax=0.05, ifmax=1., step=2., metric='fmax') neb.initialize(calc=dft_calc, amp_calc=amp_calc, climb=False, intermediates=5) neb.accelerate()
#import os #from ase import Atoms, Atom, units #import ase.io from amp import Amp from amp.descriptor.gaussian import Gaussian from amp.model.neuralnetwork import NeuralNetwork from amp.model import LossFunction calc = Amp(descriptor=Gaussian(), model=NeuralNetwork(hiddenlayers=(10, 10, 10))) calc.model.lossfunction = LossFunction(convergence={ 'energy_rmse': 0.02, 'force_rmse': 0.03 }) calc.train(images='geoopt_LCAO.traj')
images = [atoms] g2_etas = [0.005] g2_rs_s = [0] * 4 g4_etas = [0.005] g4_zetas = [1., 4.] g4_gammas = [1., -1.] cutoff = 4 make_amp_descriptors_simple_nn(images, g2_etas, g2_rs_s, g4_etas, g4_zetas, g4_gammas, cutoff) G = make_symmetry_functions(elements=elements, type='G2', etas=g2_etas) #Add Rs parameter (0.0 for default) to comply with my version of AMP #for g in G: # g['Rs'] = 0.0 G += make_symmetry_functions(elements=elements, type='G4', etas=g4_etas, zetas=g4_zetas, gammas=g4_gammas) calc = Amp(descriptor=Gaussian(Gs=G, cutoff=4.), cores=ncores, model=NeuralNetwork(hiddenlayers=hiddenlayers)) calc.model.lossfunction = LossFunction(convergence=convergence, force_coefficient=0.001) calc.train(images=images)
def train_rnn(baseframe=100, tframe=8, total_step=10, traj='ethane.traj', convergence={ 'energy_rmse': 0.25, 'force_rmse': 0.5 }, elements=['C', 'H', 'O'], hiddenlayers=(64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64), optim='ADAM', cores=4): """Gaussian/tflow train test.""" p = ple() label = 'amp' all_images = Trajectory(traj) nimg, mean_e = get_mean_energy(all_images) G = make_symmetry_functions(elements=elements, type='G2', etas=np.logspace(np.log10(0.05), np.log10(5.), num=4)) G += make_symmetry_functions(elements=elements, type='G5', etas=[0.005], zetas=[1., 4.], gammas=[+1., -1.]) G = {element: G for element in elements} # Gs=G if not isfile('amp.amp'): print('\nset up calculator ...\n') calc = Amp(descriptor=Gaussian(mode='atom-centered', Gs=G), model=NeuralNetwork(hiddenlayers=hiddenlayers, convergenceCriteria=convergence, activation='tanh', energy_coefficient=1.0, force_coefficient=None, optimizationMethod=optim, parameters={'energyMeanScale': mean_e}, maxTrainingEpochs=100000), label=label, cores=cores) # 'l-BFGS-b' or 'ADAM' trained_images = [all_images[j] for j in range(0, baseframe)] calc.train(overwrite=True, images=trained_images) del calc else: calc = Amp.load('amp.amp') calc.model.parameters['convergence'] = convergence calc.model.lossfunction = LossFunction(convergence=convergence) trained_images = [all_images[j] for j in range(0, baseframe)] calc.train(overwrite=True, images=trained_images) del calc tstep = int((nimg - baseframe) / tframe) if total_step > tstep: total_step = tstep print('Max train cycle of %d is allowed.' % total_step) edfts, eamps, eamps_ = [], [], [] dolabel = True basestep = int(baseframe / tframe) for step in range(basestep, total_step + basestep): new_images = [ all_images[j] for j in range(0 + step * tframe, tframe + step * tframe) ] trained_images.extend(new_images) x, edft, eamp, eamp_ = [], [], [], [] ii = step * tframe # ----- test ----- calc1 = Amp.load('amp.amp') for i, image in enumerate(new_images): x.append(ii) eamp_.append(calc1.get_potential_energy(image)) eamps_.append(eamp_[-1]) edft.append(image.get_potential_energy()) edfts.append(edft[-1]) ii += 1 del calc1 # ----- train ----- calc = Amp.load('amp.amp') calc.model.lossfunction = LossFunction(convergence=convergence) # calc.model.convergenceCriteria=convergence calc.train(overwrite=True, images=trained_images) del calc # ----- test ----- calc2 = Amp.load('amp.amp') print('\n---- current training result ---- \n') for i, image in enumerate(new_images): eamp.append(calc2.get_potential_energy(image)) eamps.append(eamp[-1]) print("energy(AMP) = %f energy(DFT) = %f" % (eamp[-1], edft[i])) # print("forces = %s" % str(calc2.get_forces(image))) del calc2 plot_energies(edfts, eamps, eamp_=None) system('epstopdf energies.eps') p.scatter(x, edft, eamp, eamp_, dolabel=dolabel) if dolabel: dolabel = False p.plot() system('epstopdf energies_scatter.eps')
def periodic_0th_bfgs_step_test(): """Gaussian/Neural training periodic standard test. Compares results to that expected from separate mathematica calculations. """ # Making the list of images images = [ Atoms(symbols='PdOPd', pbc=np.array([True, False, False], dtype=bool), cell=np.array([[2., 0., 0.], [0., 2., 0.], [0., 0., 2.]]), positions=np.array([[0.5, 1., 0.5], [1., 0.5, 1.], [1.5, 1.5, 1.5]])), Atoms(symbols='PdO', pbc=np.array([True, True, False], dtype=bool), cell=np.array([[2., 0., 0.], [0., 2., 0.], [0., 0., 2.]]), positions=np.array([[0.5, 1., 0.5], [1., 0.5, 1.]])), Atoms(symbols='Cu', pbc=np.array([True, True, False], dtype=bool), cell=np.array([[1.8, 0., 0.], [0., 1.8, 0.], [0., 0., 1.8]]), positions=np.array([[0., 0., 0.]])) ] for image in images: image.set_calculator(EMT()) image.get_potential_energy(apply_constraint=False) image.get_forces(apply_constraint=False) # Parameters Gs = { 'O': [{ 'type': 'G2', 'element': 'Pd', 'eta': 0.8 }, { 'type': 'G4', 'elements': ['O', 'Pd'], 'eta': 0.3, 'gamma': 0.6, 'zeta': 0.5 }], 'Pd': [{ 'type': 'G2', 'element': 'Pd', 'eta': 0.2 }, { 'type': 'G4', 'elements': ['Pd', 'Pd'], 'eta': 0.9, 'gamma': 0.75, 'zeta': 1.5 }], 'Cu': [{ 'type': 'G2', 'element': 'Cu', 'eta': 0.8 }, { 'type': 'G4', 'elements': ['Cu', 'Cu'], 'eta': 0.3, 'gamma': 0.6, 'zeta': 0.5 }] } hiddenlayers = {'O': (2, ), 'Pd': (2, ), 'Cu': (2, )} weights = OrderedDict([ ('O', OrderedDict([(1, np.matrix([[-2.0, 6.0], [3.0, -3.0], [1.5, -0.9]])), (2, np.matrix([[5.5], [3.6], [1.4]]))])), ('Pd', OrderedDict([(1, np.matrix([[-1.0, 3.0], [2.0, 4.2], [1.0, -0.7]])), (2, np.matrix([[4.0], [0.5], [3.0]]))])), ('Cu', OrderedDict([(1, np.matrix([[0.0, 1.0], [-1.0, -2.0], [2.5, -1.9]])), (2, np.matrix([[0.5], [1.6], [-1.4]]))])) ]) scalings = OrderedDict([ ('O', OrderedDict([('intercept', -2.3), ('slope', 4.5)])), ('Pd', OrderedDict([('intercept', 1.6), ('slope', 2.5)])), ('Cu', OrderedDict([('intercept', -0.3), ('slope', -0.5)])) ]) # Correct values if aseversion < 12: # EMT values have changed from 3.12.0 version ref_loss = 8004.292841411172 ref_energyloss = (43.7360019403031**2.) * 3 ref_forceloss = (137.40994760947325**2.) * 3 ref_dloss_dparameters = np.array([ 0.08141668748130322, 0.03231235582925534, 0.04388650395738586, 0.017417514465922313, 0.028431276597563077, 0.011283700608814465, 0.0941695726576061, -0.12322258890990219, 0.12679918754154568, 63.53960075374332, 0.01624770019548904, -86.6263955859162, -0.01777752828707744, 86.22415217526024, 0.017745913074496918, 104.58358033298292, -96.73280209888215, -99.09843648905876, -8.302880631972338, -1.2590007162074357, 8.302877346883133, 1.25875988418134, -8.302866610678247, -1.2563833805675353, 28.324298392680998, 28.093155094726413, -29.37874455931869, -11.247473567044866, 11.119951466664787, -87.08582317481387, -20.939485239182346, -125.73267675705365, -35.138524407482116 ]) else: ref_loss = 8004.287750978173 ref_energyloss = (43.73598563177581**2.) * 3 ref_forceloss = (137.409923023214**2.) * 3 ref_dloss_dparameters = np.array([ 0.08141663280688925, 0.03231233413027478, 0.043886474485922956, 0.01741750276939638, 0.02843125750487539, 0.011283693031378718, 0.09416950941914284, -0.12322250616122936, 0.1267991023910503, 63.53958764057119, 0.016247696749304368, -86.62637753054923, -0.01777752451341436, 86.22413420485914, 0.01774590930723711, 104.58353326982777, -96.73275667196937, -99.09839026204304, -8.302877823431269, -1.2590002903842232, 8.302874538343092, 1.2587594584335775, -8.302863802141216, -1.2563829555383859, 28.32428881173613, 28.093145591893936, -29.37873462156934, -11.24746601393696, 11.11994399919284, -87.08579155328007, -20.93947792122797, -125.73262989900473, -35.13850819392253 ]) # Testing pure-python and fortran versions of Gaussian-neural on different # number of processes for fortran in [False, True]: for cores in range(1, 4): label = 'train-periodic/%s-%i' % (fortran, cores) print(label) calc = Amp(descriptor=Gaussian( cutoff=4., Gs=Gs, fortran=fortran, ), model=NeuralNetwork( hiddenlayers=hiddenlayers, weights=weights, scalings=scalings, activation='tanh', regressor=regressor, fortran=fortran, ), label=label, dblabel=label, cores=cores) lossfunction = LossFunction(convergence=convergence) calc.model.lossfunction = lossfunction calc.train(images=images, ) diff = abs(calc.model.lossfunction.loss - ref_loss) print("diff at 414 =", diff) assert (diff < 10.**(-10.)), \ 'Calculated value of loss function is wrong!' diff = abs(calc.model.lossfunction.energy_loss - ref_energyloss) assert (diff < 10.**(-10.)), \ 'Calculated value of energy per atom RMSE is wrong!' diff = abs(calc.model.lossfunction.force_loss - ref_forceloss) assert (diff < 10 ** (-9.)), \ 'Calculated value of force RMSE is wrong!' for _ in range(len(ref_dloss_dparameters)): diff = abs(calc.model.lossfunction.dloss_dparameters[_] - ref_dloss_dparameters[_]) assert(diff < 10 ** (-10.)), \ 'Calculated value of loss function derivative is wrong!' dblabel = label secondlabel = '_' + label calc = Amp(descriptor=Gaussian(cutoff=4., Gs=Gs, fortran=fortran), model=NeuralNetwork( hiddenlayers=hiddenlayers, weights=weights, scalings=scalings, activation='tanh', regressor=regressor, fortran=fortran, ), label=secondlabel, dblabel=dblabel, cores=cores) lossfunction = LossFunction(convergence=convergence) calc.model.lossfunction = lossfunction calc.train(images=images, ) diff = abs(calc.model.lossfunction.loss - ref_loss) assert (diff < 10.**(-10.)), \ 'Calculated value of loss function is wrong!' diff = abs(calc.model.lossfunction.energy_loss - ref_energyloss) assert (diff < 10.**(-10.)), \ 'Calculated value of energy per atom RMSE is wrong!' diff = abs(calc.model.lossfunction.force_loss - ref_forceloss) assert (diff < 10 ** (-9.)), \ 'Calculated value of force RMSE is wrong!' for _ in range(len(ref_dloss_dparameters)): diff = abs(calc.model.lossfunction.dloss_dparameters[_] - ref_dloss_dparameters[_]) assert(diff < 10 ** (-10.)), \ 'Calculated value of loss function derivative is wrong!'
elements = ['O', 'Ru'] Gs = make_symmetry_functions(elements) filehandle = open('nodes', 'r') linelist = filehandle.readlines() filehandle.close() cores = {} for i in range(len(linelist)): node = linelist[i][0:5] cores[node] = 32 train_images = io.read( '/work/common/hxin_lab/jiamin/non_adiabatic/Langevin/Training_3nd/trajs/fingerprints/identified.traj', index=':') print len(train_images) #calc = Amp(descriptor=Gaussian(Gs=Gs, elements=['O','Ru']), model=NeuralNetwork(hiddenlayers=(70,70)), cores = 32) convergence = { 'energy_rmse': 0.001, 'energy_maxresid': None, 'force_rmse': 0.005, 'force_maxresid': None } lossfunction = LossFunction(convergence=convergence) calc = Amp.load('./amp-checkpoint.amp', cores=cores) #calc=Amp.load('/work/common/hxin_lab/jiamin/non_adiabatic/Langevin/Training_2nd/sym70_2L70/amp-checkpoint.amp', cores=cores) calc.model.lossfunction = lossfunction calc.train(images=train_images, )
def train_images(images): calc = Amp(descriptor=Gaussian(), model=NeuralNetwork(hiddenlayers=(10, 10, 10))) calc.model.lossfunction = LossFunction(convergence={'energy_rmse': 0.001}) calc.model.lossfunction = LossFunction(force_coefficient=-0.1) calc.train(images=images, overwrite=True)