def run(self, ref_data=None, restart=None): """ Runs the gradient optimization. Ensure that the attributes in __init__ are set as you desire before using this function. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ # We need reference data if you didn't provide it. if ref_data is None: ref_data = opt.return_ref_data(self.args_ref) # We need the initial FF data. if self.ff.data is None: logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~')) # Is opt.Optimizer.ff_lines used anymore? self.ff.export_ff() self.ff.data = calculate.main(self.args_ff) # Not 100% sure if this is necessary, but it certainly doesn't hurt. compare.correlate_energies(ref_data, self.ff.data) r_dict = compare.data_by_type(ref_data) c_dict = compare.data_by_type(self.ff.data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) if self.ff.score is None: # Already zeroed reference and correlated the energies. self.ff.score = compare.compare_data(r_dict, c_dict) data_types = [] for typ in r_dict: data_types.append(typ) data_types.sort() logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~')) logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score)) opt.pretty_ff_results(self.ff, level=20) logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~')) if restart: par_file = restart logger.log( 20, ' -- Restarting gradient from central ' 'differentiation file {}.'.format(par_file)) else: # We need a file to hold the differentiated parameter data. par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt')) if par_files: par_files.sort() most_recent_par_file = par_files[-1] most_recent_par_file = most_recent_par_file.split('/')[-1] most_recent_num = most_recent_par_file[9:12] num = int(most_recent_num) + 1 par_file = 'par_diff_{:03d}.txt'.format(num) else: par_file = 'par_diff_001.txt' logger.log( 20, ' -- Generating central differentiation ' 'file {}.'.format(par_file)) f = open(os.path.join(self.direc, par_file), 'w') csv_writer = csv.writer(f) # Row 1 - Labels # Row 2 - Weights # Row 3 - Reference data values # Row 4 - Initial FF data values ## Deprecated -TR #csv_writer.writerow([x.lbl for x in ref_data]) #csv_writer.writerow([x.wht for x in ref_data]) #csv_writer.writerow([x.val for x in ref_data]) #csv_writer.writerow([x.val for x in self.ff.data]) writerows = [[], [], [], []] for data_type in data_types: writerows[0].extend([x.lbl for x in r_dict[data_type]]) writerows[1].extend([x.wht for x in r_dict[data_type]]) writerows[2].extend([x.val for x in r_dict[data_type]]) writerows[3].extend([x.val for x in c_dict[data_type]]) for row in writerows: csv_writer.writerow(row) logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~')) # Save many FFs, each with their own parameter sets. ffs = opt.differentiate_ff(self.ff) logger.log( 20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~')) for ff in ffs: ff.export_ff(lines=self.ff.lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) # Deprecated #ff.score = compare.compare_data(ref_data, data) c_data = compare.data_by_type(data) r_dict, c_data = compare.trim_data(r_dict, c_data) ff.score = compare.compare_data(r_dict, c_data) opt.pretty_ff_results(ff) # Write the data rather than storing it in memory. For large # parameter sets, this could consume GBs of memory otherwise! #csv_writer.writerow([x.val for x in data]) row = [] for data_type in data_types: row.extend([x.val for x in c_data[data_type]]) csv_writer.writerow(row) f.close() # Make sure we have derivative information. Used for NR. # # The derivatives are useful for checking up on the progress of the # optimization and for deciding which parameters to use in a # subsequent simplex optimization. # # Still need a way to do this with the resatrt file. opt.param_derivs(self.ff, ffs) # Calculate the Jacobian, residual vector, matrix A and vector b. # These aren't needed if you're only doing Newton-Raphson. if self.do_lstsq or self.do_lagrange or self.do_levenberg or \ self.do_svd: logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~')) # Setup the residual vector. # Deprecated - TR #num_d = len(ref_data) num_d = 0 for datatype in r_dict: num_d += len(r_dict[datatype]) resid = np.empty((num_d, 1), dtype=float) # Deprecated - TR #for i in xrange(0, num_d): # resid[i, 0] = ref_data[i].wht * \ # (ref_data[i].val - self.ff.data[i].val) count = 0 for data_type in data_types: for r, c in zip(r_dict[data_type], c_dict[data_type]): resid[count, 0] = r.wht * (r.val - c.val) count += 1 # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid)) logger.log(20, ' -- Formed {} residual vector.'.format(resid.shape)) # Setup the Jacobian. num_p = len(self.ff.params) # Maybe should be a part of the Jacobian function. jacob = np.empty((num_d, num_p), dtype=float) jacob = return_jacobian(jacob, os.path.join(self.direc, par_file)) # logger.log(5, 'JACOBIAN:\n{}'.format(jacob)) logger.log(20, ' -- Formed {} Jacobian.'.format(jacob.shape)) ma = jacob.T.dot(jacob) vb = jacob.T.dot(resid) # We need these for most optimization methods. logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-')) # logger.log(5, 'A:\n{}'.format(ma)) # logger.log(5, 'b:\n{}'.format(vb)) # Start coming up with new parameter sets. if self.do_newton and not restart: logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~')) # Moved the derivative section outside of here. changes = do_newton(self.ff.params, radii=self.newton_radii, cutoffs=self.newton_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lstsq: logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~')) changes = do_lstsq(ma, vb, radii=self.lstsq_radii, cutoffs=self.lstsq_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lagrange: logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~')) for factor in sorted(self.lagrange_factors): changes = do_lagrange(ma, vb, factor, radii=self.lagrange_radii, cutoffs=self.lagrange_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_levenberg: logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~')) for factor in sorted(self.levenberg_factors): changes = do_levenberg(ma, vb, factor, radii=self.levenberg_radii, cutoffs=self.levenberg_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_svd: logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~')) # J = U . s . VT mu, vs, mvt = return_svd(jacob) logger.log(1, '>>> mu.shape: {}'.format(mu.shape)) logger.log(1, '>>> vs.shape: {}'.format(vs.shape)) logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape)) logger.log(1, '>>> vb.shape: {}'.format(vb.shape)) if self.svd_factors: changes = do_svd_w_thresholds(mu, vs, mvt, resid, self.svd_factors, radii=self.svd_radii, cutoffs=self.svd_cutoffs) else: changes = do_svd_wo_thresholds(mu, vs, mvt, resid, radii=self.svd_radii, cutoffs=self.svd_cutoffs) cleanup(self.new_ffs, self.ff, changes) # Report how many trial FFs were generated. logger.log( 20, ' -- Generated {} trial force field(s).'.format( len(self.new_ffs))) # If there are any trials, test them. if self.new_ffs: logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~')) for ff in self.new_ffs: data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff) # Shouldn't need to zero anymore. # Deprecated #ff.score = compare.compare_data(ref_data, data) c_data = compare.data_by_type(data) r_dict, c_data = compare.trim_data(r_dict, c_data) ff.score = compare.compare_data(r_dict, c_data) opt.pretty_ff_results(ff) self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) # Check for improvement. if self.new_ffs[0].score < self.ff.score: ff = self.new_ffs[0] logger.log( 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(ff, level=20) # Copy parameter derivatives from original FF to save time in # case we move onto simplex immediately after this. copy_derivs(self.ff, ff) else: ff = self.ff else: ff = self.ff ff.export_ff(ff.path) return ff
def run(self, ref_data=None): # We need reference data if you didn't provide it. if ref_data is None: ref_data = opt.return_ref_data(self.args_ref) # We need the initial FF data. if self.ff.data is None: logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~')) # Check whether this is efficient with the ff_lines. self.ff.export_ff() self.ff.data = calculate.main(self.args_ff) # We could do this, but the zeroing of energies has already been # done. # self.ff.score = compare.compare_data(ref_data, self.ff.data) # So instead we do this. compare.correlate_energies(ref_data, self.ff.data) if self.ff.score is None: # Already zeroed reference and correlated the energies. self.ff.score = compare.calculate_score(ref_data, self.ff.data) logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.fscore)) logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~')) # We need a file to hold the differentiated parameter data. par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt')) if par_files: par_files.sort() most_recent_par_file = par_files[-1] most_recent_par_file = most_recent_par_file.split('/')[-1] most_recent_num = most_recent_par_file[9:12] num = int(most_recent_num) + 1 par_file = 'par_diff_{:03d}.txt'.format(num) else: par_file = 'par_diff_001.txt' f = open(os.path.join(self.direc, par_file), 'w') csv_writer = csv.writer(f) # Row 1 - Labels # Row 2 - Weights # Row 3 - Reference data values # Row 4 - Initial FF data values csv_writer.writerow([x.lbl for x in ref_data]) csv_writer.writerow([x.wht for x in ref_data]) csv_writer.writerow([x.val for x in ref_data]) csv_writer.writerow([x.val for x in self.ff.data]) logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~')) # Setup the residual vector. # Perhaps move this closer to the Jacobian section. num_d = len(ref_data) resid = np.empty((num_d, 1), dtype=float) for i in xrange(0, num_d): resid[i, 0] = ref_data[i].wht * (ref_data[i].val - self.ff.data[i].val) logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid)) logger.log(20, ' -- Formed {} residual vector.'.format(resid.shape)) # Save many FFs, each with their own parameter sets. ffs = opt.differentiate_ff(self.ff) logger.log(20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~')) for ff in ffs: ff.export_ff(lines=self.ff.lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) compare.correlate_energies(ref_data, data) ff.score = compare.calculate_score(ref_data, data) opt.pretty_ff_results(ff) # Write the data rather than storing it in memory. For large parameter # sets, this could consume GBs of memory otherwise! csv_writer.writerow([x.val for x in data]) f.close() # Calculate the Jacobian, residual vector, matrix A and vector b. # These aren't needed if you're only doing Newton-Raphson. if self.do_lstsq or self.do_lagrange or self.do_levenberg or \ self.do_svd: logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~')) # Setup the Jacobian. num_p = len(self.ff.params) # Maybe should be a part of the Jacobian function. jacob = np.empty((num_d, num_p), dtype=float) jacob = return_jacobian(jacob, os.path.join(self.direc, par_file)) ma = jacob.T.dot(jacob) vb = jacob.T.dot(resid) # We need these for most optimization methods. logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-')) logger.log(5, 'A:\n{}'.format(ma)) logger.log(5, 'b:\n{}'.format(vb)) # Start coming up with new parameter sets. if self.do_newton: logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~')) # Make sure we have derivative information. if self.ff.params[0].d1 is None: opt.param_derivs(self.ff, ffs) changes = do_newton(self.ff.params, radii=self.newton_radii, cutoffs=self.newton_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lstsq: logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~')) changes = do_lstsq(ma, vb, radii=self.lstsq_radii, cutoffs=self.lstsq_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lagrange: logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~')) for factor in sorted(self.lagrange_factors): changes = do_lagrange(ma, vb, factor, radii=self.lagrange_radii, cutoffs=self.lagrange_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_levenberg: logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~')) for factor in sorted(self.levenberg_factors): changes = do_levenberg(ma, vb, factor, radii=self.levenberg_radii, cutoffs=self.levenberg_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_svd: logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~')) mu, vs, mv = return_svd(ma) if self.svd_factors: changes = do_svd_w_thresholds(mu, vs, mv, vb, self.svd_factors, radii=self.svd_radii, cutoffs=self.svd_cutoffs) else: changes = do_svd_wo_thresholds(mu, vs, mv, vb, radii=self.svd_radii, cutoffs=self.svd_cutoffs) cleanup(self.new_ffs, self.ff, changes) # Report how many trial FFs were generated. logger.log(20, ' -- Generated {} trial force field(s).'.format( len(self.new_ffs))) # If there are any trials, test them. if self.new_ffs: logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~')) for ff in self.new_ffs: data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff) ff.score = compare.compare_data(ref_data, data, zero=False) opt.pretty_ff_results(ff) self.new_ffs = sorted( self.new_ffs, key=lambda x: x.score) # Check for improvement. if self.new_ffs[0].score < self.ff.score: ff = self.new_ffs[0] logger.log( 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust( 79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(ff, level=20) else: ff = self.ff else: ff = self.ff return ff
def run(self, ref_data=None, restart=None): """ Runs the gradient optimization. Ensure that the attributes in __init__ are set as you desire before using this function. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ # We need reference data if you didn't provide it. if ref_data is None: ref_data = opt.return_ref_data(self.args_ref) # We need the initial FF data. if self.ff.data is None: logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~')) # Is opt.Optimizer.ff_lines used anymore? self.ff.export_ff() self.ff.data = calculate.main(self.args_ff) # Not 100% sure if this is necessary, but it certainly doesn't hurt. compare.correlate_energies(ref_data, self.ff.data) r_dict = compare.data_by_type(ref_data) c_dict = compare.data_by_type(self.ff.data) r_dict, c_dict = compare.trim_data(r_dict,c_dict) if self.ff.score is None: # Already zeroed reference and correlated the energies. self.ff.score = compare.compare_data(r_dict, c_dict) data_types = [] for typ in r_dict: data_types.append(typ) data_types.sort() logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~')) logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score)) opt.pretty_ff_results(self.ff, level=20) logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~')) if restart: par_file = restart logger.log(20, ' -- Restarting gradient from central ' 'differentiation file {}.'.format(par_file)) else: # We need a file to hold the differentiated parameter data. par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt')) if par_files: par_files.sort() most_recent_par_file = par_files[-1] most_recent_par_file = most_recent_par_file.split('/')[-1] most_recent_num = most_recent_par_file[9:12] num = int(most_recent_num) + 1 par_file = 'par_diff_{:03d}.txt'.format(num) else: par_file = 'par_diff_001.txt' logger.log(20, ' -- Generating central differentiation ' 'file {}.'.format(par_file)) f = open(os.path.join(self.direc, par_file), 'w') csv_writer = csv.writer(f) # Row 1 - Labels # Row 2 - Weights # Row 3 - Reference data values # Row 4 - Initial FF data values ## Deprecated -TR #csv_writer.writerow([x.lbl for x in ref_data]) #csv_writer.writerow([x.wht for x in ref_data]) #csv_writer.writerow([x.val for x in ref_data]) #csv_writer.writerow([x.val for x in self.ff.data]) writerows = [[],[],[],[]] for data_type in data_types: writerows[0].extend([x.lbl for x in r_dict[data_type]]) writerows[1].extend([x.wht for x in r_dict[data_type]]) writerows[2].extend([x.val for x in r_dict[data_type]]) writerows[3].extend([x.val for x in c_dict[data_type]]) for row in writerows: csv_writer.writerow(row) logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~')) # Save many FFs, each with their own parameter sets. ffs = opt.differentiate_ff(self.ff) logger.log(20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust( 79, '~')) for ff in ffs: ff.export_ff(lines=self.ff.lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) # Deprecated #ff.score = compare.compare_data(ref_data, data) c_data = compare.data_by_type(data) r_dict, c_data = compare.trim_data(r_dict,c_data) ff.score = compare.compare_data(r_dict, c_data) opt.pretty_ff_results(ff) # Write the data rather than storing it in memory. For large # parameter sets, this could consume GBs of memory otherwise! #csv_writer.writerow([x.val for x in data]) row = [] for data_type in data_types: row.extend([x.val for x in c_data[data_type]]) csv_writer.writerow(row) f.close() # Make sure we have derivative information. Used for NR. # # The derivatives are useful for checking up on the progress of the # optimization and for deciding which parameters to use in a # subsequent simplex optimization. # # Still need a way to do this with the resatrt file. opt.param_derivs(self.ff, ffs) # Calculate the Jacobian, residual vector, matrix A and vector b. # These aren't needed if you're only doing Newton-Raphson. if self.do_lstsq or self.do_lagrange or self.do_levenberg or \ self.do_svd: logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~')) # Setup the residual vector. # Deprecated - TR #num_d = len(ref_data) num_d = 0 for datatype in r_dict: num_d += len(r_dict[datatype]) resid = np.empty((num_d, 1), dtype=float) # Deprecated - TR #for i in xrange(0, num_d): # resid[i, 0] = ref_data[i].wht * \ # (ref_data[i].val - self.ff.data[i].val) count = 0 for data_type in data_types: for r,c in zip(r_dict[data_type],c_dict[data_type]): resid[count, 0] = r.wht * (r.val - c.val) count += 1 # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid)) logger.log( 20, ' -- Formed {} residual vector.'.format(resid.shape)) # Setup the Jacobian. num_p = len(self.ff.params) # Maybe should be a part of the Jacobian function. jacob = np.empty((num_d, num_p), dtype=float) jacob = return_jacobian(jacob, os.path.join(self.direc, par_file)) # logger.log(5, 'JACOBIAN:\n{}'.format(jacob)) logger.log(20, ' -- Formed {} Jacobian.'.format(jacob.shape)) ma = jacob.T.dot(jacob) vb = jacob.T.dot(resid) # We need these for most optimization methods. logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-')) # logger.log(5, 'A:\n{}'.format(ma)) # logger.log(5, 'b:\n{}'.format(vb)) # Start coming up with new parameter sets. if self.do_newton and not restart: logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~')) # Moved the derivative section outside of here. changes = do_newton(self.ff.params, radii=self.newton_radii, cutoffs=self.newton_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lstsq: logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~')) changes = do_lstsq(ma, vb, radii=self.lstsq_radii, cutoffs=self.lstsq_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lagrange: logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~')) for factor in sorted(self.lagrange_factors): changes = do_lagrange(ma, vb, factor, radii=self.lagrange_radii, cutoffs=self.lagrange_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_levenberg: logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~')) for factor in sorted(self.levenberg_factors): changes = do_levenberg(ma, vb, factor, radii=self.levenberg_radii, cutoffs=self.levenberg_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_svd: logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~')) # J = U . s . VT mu, vs, mvt = return_svd(jacob) logger.log(1, '>>> mu.shape: {}'.format(mu.shape)) logger.log(1, '>>> vs.shape: {}'.format(vs.shape)) logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape)) logger.log(1, '>>> vb.shape: {}'.format(vb.shape)) if self.svd_factors: changes = do_svd_w_thresholds(mu, vs, mvt, resid, self.svd_factors, radii=self.svd_radii, cutoffs=self.svd_cutoffs) else: changes = do_svd_wo_thresholds(mu, vs, mvt, resid, radii=self.svd_radii, cutoffs=self.svd_cutoffs) cleanup(self.new_ffs, self.ff, changes) # Report how many trial FFs were generated. logger.log(20, ' -- Generated {} trial force field(s).'.format( len(self.new_ffs))) # If there are any trials, test them. if self.new_ffs: logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~')) for ff in self.new_ffs: data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff) # Shouldn't need to zero anymore. # Deprecated #ff.score = compare.compare_data(ref_data, data) c_data = compare.data_by_type(data) r_dict, c_data = compare.trim_data(r_dict,c_data) ff.score = compare.compare_data(r_dict, c_data) opt.pretty_ff_results(ff) self.new_ffs = sorted( self.new_ffs, key=lambda x: x.score) # Check for improvement. if self.new_ffs[0].score < self.ff.score: ff = self.new_ffs[0] logger.log( 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust( 79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(ff, level=20) # Copy parameter derivatives from original FF to save time in # case we move onto simplex immediately after this. copy_derivs(self.ff, ff) else: ff = self.ff else: ff = self.ff return ff
def run(self, ref_data=None, restart=None): # We need reference data if you didn't provide it. if ref_data is None: ref_data = opt.return_ref_data(self.args_ref) # We need the initial FF data. if self.ff.data is None: logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~')) # Check whether this is efficient with the ff_lines. self.ff.export_ff() self.ff.data = calculate.main(self.args_ff) # We could do this, but the zeroing of energies has already been # done. # self.ff.score = compare.compare_data(ref_data, self.ff.data) # So instead we do this. compare.correlate_energies(ref_data, self.ff.data) if self.ff.score is None: # Already zeroed reference and correlated the energies. self.ff.score = compare.calculate_score(ref_data, self.ff.data) logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score)) logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~')) # We need a file to hold the differentiated parameter data. logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~')) if restart: par_file = restart logger.log( 20, ' -- Restarting gradient from central ' 'differentiation file {}.'.format(par_file)) else: par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt')) if par_files: par_files.sort() most_recent_par_file = par_files[-1] most_recent_par_file = most_recent_par_file.split('/')[-1] most_recent_num = most_recent_par_file[9:12] num = int(most_recent_num) + 1 par_file = 'par_diff_{:03d}.txt'.format(num) else: par_file = 'par_diff_001.txt' logger.log( 20, ' -- Generating central differentiation ' 'file {}.'.format(par_file)) f = open(os.path.join(self.direc, par_file), 'w') csv_writer = csv.writer(f) # Row 1 - Labels # Row 2 - Weights # Row 3 - Reference data values # Row 4 - Initial FF data values csv_writer.writerow([x.lbl for x in ref_data]) csv_writer.writerow([x.wht for x in ref_data]) csv_writer.writerow([x.val for x in ref_data]) csv_writer.writerow([x.val for x in self.ff.data]) logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~')) # Save many FFs, each with their own parameter sets. ffs = opt.differentiate_ff(self.ff) logger.log( 20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~')) for ff in ffs: ff.export_ff(lines=self.ff.lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) compare.correlate_energies(ref_data, data) ff.score = compare.calculate_score(ref_data, data) opt.pretty_ff_results(ff) # Write the data rather than storing it in memory. For large parameter # sets, this could consume GBs of memory otherwise! csv_writer.writerow([x.val for x in data]) f.close() # Calculate the Jacobian, residual vector, matrix A and vector b. # These aren't needed if you're only doing Newton-Raphson. if self.do_lstsq or self.do_lagrange or self.do_levenberg or \ self.do_svd: logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~')) # Setup the residual vector. num_d = len(ref_data) resid = np.empty((num_d, 1), dtype=float) for i in xrange(0, num_d): resid[i, 0] = ref_data[i].wht * (ref_data[i].val - self.ff.data[i].val) # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid)) logger.log(20, ' -- Formed {} residual vector.'.format(resid.shape)) # Setup the Jacobian. num_p = len(self.ff.params) # Maybe should be a part of the Jacobian function. jacob = np.empty((num_d, num_p), dtype=float) jacob = return_jacobian(jacob, os.path.join(self.direc, par_file)) # logger.log(5, 'JACOBIAN:\n{}'.format(jacob)) logger.log(20, ' -- Formed {} Jacobian.'.format(jacob.shape)) ma = jacob.T.dot(jacob) vb = jacob.T.dot(resid) # We need these for most optimization methods. logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-')) # logger.log(5, 'A:\n{}'.format(ma)) # logger.log(5, 'b:\n{}'.format(vb)) # Start coming up with new parameter sets. if self.do_newton and not restart: logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~')) # Make sure we have derivative information. if self.ff.params[0].d1 is None: opt.param_derivs(self.ff, ffs) changes = do_newton(self.ff.params, radii=self.newton_radii, cutoffs=self.newton_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lstsq: logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~')) changes = do_lstsq(ma, vb, radii=self.lstsq_radii, cutoffs=self.lstsq_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lagrange: logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~')) for factor in sorted(self.lagrange_factors): changes = do_lagrange(ma, vb, factor, radii=self.lagrange_radii, cutoffs=self.lagrange_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_levenberg: logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~')) for factor in sorted(self.levenberg_factors): changes = do_levenberg(ma, vb, factor, radii=self.levenberg_radii, cutoffs=self.levenberg_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_svd: logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~')) # J = U . s . VT mu, vs, mvt = return_svd(jacob) logger.log(1, '>>> mu.shape: {}'.format(mu.shape)) logger.log(1, '>>> vs.shape: {}'.format(vs.shape)) logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape)) logger.log(1, '>>> vb.shape: {}'.format(vb.shape)) if self.svd_factors: changes = do_svd_w_thresholds(mu, vs, mvt, resid, self.svd_factors, radii=self.svd_radii, cutoffs=self.svd_cutoffs) else: changes = do_svd_wo_thresholds(mu, vs, mvt, resid, radii=self.svd_radii, cutoffs=self.svd_cutoffs) cleanup(self.new_ffs, self.ff, changes) # Report how many trial FFs were generated. logger.log( 20, ' -- Generated {} trial force field(s).'.format( len(self.new_ffs))) # If there are any trials, test them. if self.new_ffs: logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~')) for ff in self.new_ffs: data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff) # Shouldn't need to zero anymore. ff.score = compare.compare_data(ref_data, data) opt.pretty_ff_results(ff) self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) # Check for improvement. if self.new_ffs[0].score < self.ff.score: ff = self.new_ffs[0] logger.log( 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(ff, level=20) # Copy parameter derivatives from original FF to save time in # case we move onto simplex immediately after this. copy_derivs(self.ff, ff) else: ff = self.ff else: ff = self.ff return ff