Beispiel #1
0
    def run(self, ref_data=None, restart=None):
        """
        Runs the gradient optimization.

        Ensure that the attributes in __init__ are set as you desire before
        using this function.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        # We need reference data if you didn't provide it.
        if ref_data is None:
            ref_data = opt.return_ref_data(self.args_ref)

        # We need the initial FF data.
        if self.ff.data is None:
            logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~'))
            # Is opt.Optimizer.ff_lines used anymore?
            self.ff.export_ff()
            self.ff.data = calculate.main(self.args_ff)
            # Not 100% sure if this is necessary, but it certainly doesn't hurt.
            compare.correlate_energies(ref_data, self.ff.data)
        r_dict = compare.data_by_type(ref_data)
        c_dict = compare.data_by_type(self.ff.data)
        r_dict, c_dict = compare.trim_data(r_dict, c_dict)
        if self.ff.score is None:
            # Already zeroed reference and correlated the energies.
            self.ff.score = compare.compare_data(r_dict, c_dict)
        data_types = []
        for typ in r_dict:
            data_types.append(typ)
        data_types.sort()
        logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~'))
        logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score))
        opt.pretty_ff_results(self.ff, level=20)

        logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~'))
        if restart:
            par_file = restart
            logger.log(
                20, '  -- Restarting gradient from central '
                'differentiation file {}.'.format(par_file))
        else:
            # We need a file to hold the differentiated parameter data.
            par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt'))
            if par_files:
                par_files.sort()
                most_recent_par_file = par_files[-1]
                most_recent_par_file = most_recent_par_file.split('/')[-1]
                most_recent_num = most_recent_par_file[9:12]
                num = int(most_recent_num) + 1
                par_file = 'par_diff_{:03d}.txt'.format(num)
            else:
                par_file = 'par_diff_001.txt'
            logger.log(
                20, '  -- Generating central differentiation '
                'file {}.'.format(par_file))
            f = open(os.path.join(self.direc, par_file), 'w')
            csv_writer = csv.writer(f)
            # Row 1 - Labels
            # Row 2 - Weights
            # Row 3 - Reference data values
            # Row 4 - Initial FF data values
            ## Deprecated -TR
            #csv_writer.writerow([x.lbl for x in ref_data])
            #csv_writer.writerow([x.wht for x in ref_data])
            #csv_writer.writerow([x.val for x in ref_data])
            #csv_writer.writerow([x.val for x in self.ff.data])
            writerows = [[], [], [], []]
            for data_type in data_types:
                writerows[0].extend([x.lbl for x in r_dict[data_type]])
                writerows[1].extend([x.wht for x in r_dict[data_type]])
                writerows[2].extend([x.val for x in r_dict[data_type]])
                writerows[3].extend([x.val for x in c_dict[data_type]])
            for row in writerows:
                csv_writer.writerow(row)
            logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~'))
            # Save many FFs, each with their own parameter sets.
            ffs = opt.differentiate_ff(self.ff)
            logger.log(
                20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~'))
            for ff in ffs:
                ff.export_ff(lines=self.ff.lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                # Deprecated
                #ff.score = compare.compare_data(ref_data, data)
                c_data = compare.data_by_type(data)
                r_dict, c_data = compare.trim_data(r_dict, c_data)
                ff.score = compare.compare_data(r_dict, c_data)
                opt.pretty_ff_results(ff)
                # Write the data rather than storing it in memory. For large
                # parameter sets, this could consume GBs of memory otherwise!
                #csv_writer.writerow([x.val for x in data])
                row = []
                for data_type in data_types:
                    row.extend([x.val for x in c_data[data_type]])
                csv_writer.writerow(row)
            f.close()

            # Make sure we have derivative information. Used for NR.
            #
            # The derivatives are useful for checking up on the progress of the
            # optimization and for deciding which parameters to use in a
            # subsequent simplex optimization.
            #
            # Still need a way to do this with the resatrt file.
            opt.param_derivs(self.ff, ffs)

        # Calculate the Jacobian, residual vector, matrix A and vector b.
        # These aren't needed if you're only doing Newton-Raphson.
        if self.do_lstsq or self.do_lagrange or self.do_levenberg or \
                self.do_svd:
            logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~'))
            # Setup the residual vector.
            # Deprecated - TR
            #num_d = len(ref_data)
            num_d = 0
            for datatype in r_dict:
                num_d += len(r_dict[datatype])
            resid = np.empty((num_d, 1), dtype=float)
            # Deprecated - TR
            #for i in xrange(0, num_d):
            #    resid[i, 0] = ref_data[i].wht * \
            #                  (ref_data[i].val - self.ff.data[i].val)
            count = 0
            for data_type in data_types:
                for r, c in zip(r_dict[data_type], c_dict[data_type]):
                    resid[count, 0] = r.wht * (r.val - c.val)
                    count += 1
            # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid))
            logger.log(20,
                       '  -- Formed {} residual vector.'.format(resid.shape))
            # Setup the Jacobian.
            num_p = len(self.ff.params)
            # Maybe should be a part of the Jacobian function.
            jacob = np.empty((num_d, num_p), dtype=float)
            jacob = return_jacobian(jacob, os.path.join(self.direc, par_file))
            # logger.log(5, 'JACOBIAN:\n{}'.format(jacob))
            logger.log(20, '  -- Formed {} Jacobian.'.format(jacob.shape))
            ma = jacob.T.dot(jacob)
            vb = jacob.T.dot(resid)
            # We need these for most optimization methods.
            logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-'))
            # logger.log(5, 'A:\n{}'.format(ma))
            # logger.log(5, 'b:\n{}'.format(vb))
        # Start coming up with new parameter sets.
        if self.do_newton and not restart:
            logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~'))
            # Moved the derivative section outside of here.
            changes = do_newton(self.ff.params,
                                radii=self.newton_radii,
                                cutoffs=self.newton_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        if self.do_lstsq:
            logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~'))
            changes = do_lstsq(ma,
                               vb,
                               radii=self.lstsq_radii,
                               cutoffs=self.lstsq_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        if self.do_lagrange:
            logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~'))
            for factor in sorted(self.lagrange_factors):
                changes = do_lagrange(ma,
                                      vb,
                                      factor,
                                      radii=self.lagrange_radii,
                                      cutoffs=self.lagrange_cutoffs)
                cleanup(self.new_ffs, self.ff, changes)
        if self.do_levenberg:
            logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~'))
            for factor in sorted(self.levenberg_factors):
                changes = do_levenberg(ma,
                                       vb,
                                       factor,
                                       radii=self.levenberg_radii,
                                       cutoffs=self.levenberg_cutoffs)
                cleanup(self.new_ffs, self.ff, changes)
        if self.do_svd:
            logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~'))
            # J = U . s . VT
            mu, vs, mvt = return_svd(jacob)
            logger.log(1, '>>> mu.shape: {}'.format(mu.shape))
            logger.log(1, '>>> vs.shape: {}'.format(vs.shape))
            logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape))
            logger.log(1, '>>> vb.shape: {}'.format(vb.shape))
            if self.svd_factors:
                changes = do_svd_w_thresholds(mu,
                                              vs,
                                              mvt,
                                              resid,
                                              self.svd_factors,
                                              radii=self.svd_radii,
                                              cutoffs=self.svd_cutoffs)
            else:
                changes = do_svd_wo_thresholds(mu,
                                               vs,
                                               mvt,
                                               resid,
                                               radii=self.svd_radii,
                                               cutoffs=self.svd_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        # Report how many trial FFs were generated.
        logger.log(
            20, '  -- Generated {} trial force field(s).'.format(
                len(self.new_ffs)))
        # If there are any trials, test them.
        if self.new_ffs:
            logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~'))
            for ff in self.new_ffs:
                data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff)
                # Shouldn't need to zero anymore.
                # Deprecated
                #ff.score = compare.compare_data(ref_data, data)
                c_data = compare.data_by_type(data)
                r_dict, c_data = compare.trim_data(r_dict, c_data)
                ff.score = compare.compare_data(r_dict, c_data)
                opt.pretty_ff_results(ff)
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            # Check for improvement.
            if self.new_ffs[0].score < self.ff.score:
                ff = self.new_ffs[0]
                logger.log(
                    20,
                    '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~'))
                opt.pretty_ff_results(self.ff, level=20)
                opt.pretty_ff_results(ff, level=20)
                # Copy parameter derivatives from original FF to save time in
                # case we move onto simplex immediately after this.
                copy_derivs(self.ff, ff)
            else:
                ff = self.ff
        else:
            ff = self.ff
        ff.export_ff(ff.path)
        return ff
Beispiel #2
0
 def run(self, ref_data=None):
     # We need reference data if you didn't provide it.
     if ref_data is None:
         ref_data = opt.return_ref_data(self.args_ref)
     # We need the initial FF data.
     if self.ff.data is None:
         logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~'))
         # Check whether this is efficient with the ff_lines.
         self.ff.export_ff()
         self.ff.data = calculate.main(self.args_ff)
         # We could do this, but the zeroing of energies has already been
         # done.
         # self.ff.score = compare.compare_data(ref_data, self.ff.data)
         # So instead we do this.
         compare.correlate_energies(ref_data, self.ff.data)
     if self.ff.score is None:
         # Already zeroed reference and correlated the energies.
         self.ff.score = compare.calculate_score(ref_data, self.ff.data)
         logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.fscore))
     logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~'))
     # We need a file to hold the differentiated parameter data.
     par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt'))
     if par_files:
         par_files.sort()
         most_recent_par_file = par_files[-1]
         most_recent_par_file = most_recent_par_file.split('/')[-1]
         most_recent_num = most_recent_par_file[9:12]
         num = int(most_recent_num) + 1
         par_file = 'par_diff_{:03d}.txt'.format(num)
     else:
         par_file = 'par_diff_001.txt'
     f = open(os.path.join(self.direc, par_file), 'w')
     csv_writer = csv.writer(f)
     # Row 1 - Labels
     # Row 2 - Weights
     # Row 3 - Reference data values
     # Row 4 - Initial FF data values
     csv_writer.writerow([x.lbl for x in ref_data])
     csv_writer.writerow([x.wht for x in ref_data])
     csv_writer.writerow([x.val for x in ref_data])
     csv_writer.writerow([x.val for x in self.ff.data])
     logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~'))
     # Setup the residual vector.
     # Perhaps move this closer to the Jacobian section.
     num_d = len(ref_data)
     resid = np.empty((num_d, 1), dtype=float)
     for i in xrange(0, num_d):
         resid[i, 0] = ref_data[i].wht * (ref_data[i].val - self.ff.data[i].val)
     logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid))
     logger.log(20, '  -- Formed {} residual vector.'.format(resid.shape))
     # Save many FFs, each with their own parameter sets.
     ffs = opt.differentiate_ff(self.ff)
     logger.log(20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~'))
     for ff in ffs:
         ff.export_ff(lines=self.ff.lines)
         logger.log(20, '  -- Calculating {}.'.format(ff))
         data = calculate.main(self.args_ff)
         compare.correlate_energies(ref_data, data)
         ff.score = compare.calculate_score(ref_data, data)
         opt.pretty_ff_results(ff)
         # Write the data rather than storing it in memory. For large parameter
         # sets, this could consume GBs of memory otherwise!
         csv_writer.writerow([x.val for x in data])
     f.close()
     # Calculate the Jacobian, residual vector, matrix A and vector b.
     # These aren't needed if you're only doing Newton-Raphson.
     if self.do_lstsq or self.do_lagrange or self.do_levenberg or \
             self.do_svd:
         logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~'))
         # Setup the Jacobian.
         num_p = len(self.ff.params)
         # Maybe should be a part of the Jacobian function.
         jacob = np.empty((num_d, num_p), dtype=float)
         jacob = return_jacobian(jacob, os.path.join(self.direc, par_file))
         ma = jacob.T.dot(jacob)
         vb = jacob.T.dot(resid)
         # We need these for most optimization methods.
         logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-'))
         logger.log(5, 'A:\n{}'.format(ma))
         logger.log(5, 'b:\n{}'.format(vb))
     # Start coming up with new parameter sets.
     if self.do_newton:
         logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~'))
         # Make sure we have derivative information.
         if self.ff.params[0].d1 is None:
             opt.param_derivs(self.ff, ffs)
         changes = do_newton(self.ff.params,
                             radii=self.newton_radii,
                             cutoffs=self.newton_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     if self.do_lstsq:
         logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~'))
         changes = do_lstsq(ma, vb,                               
                            radii=self.lstsq_radii,
                            cutoffs=self.lstsq_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     if self.do_lagrange:
         logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~'))
         for factor in sorted(self.lagrange_factors):
             changes = do_lagrange(ma, vb, factor,
                                   radii=self.lagrange_radii,
                                   cutoffs=self.lagrange_cutoffs)
             cleanup(self.new_ffs, self.ff, changes)
     if self.do_levenberg:
         logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~'))
         for factor in sorted(self.levenberg_factors):
             changes = do_levenberg(ma, vb, factor,
                                    radii=self.levenberg_radii,
                                    cutoffs=self.levenberg_cutoffs)
             cleanup(self.new_ffs, self.ff, changes)
     if self.do_svd:
         logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~'))
         mu, vs, mv = return_svd(ma)
         if self.svd_factors:
             changes = do_svd_w_thresholds(mu, vs, mv, vb, self.svd_factors,
                                           radii=self.svd_radii,
                                           cutoffs=self.svd_cutoffs)
         else:
             changes = do_svd_wo_thresholds(mu, vs, mv, vb,
                                            radii=self.svd_radii,
                                            cutoffs=self.svd_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     # Report how many trial FFs were generated.
     logger.log(20, '  -- Generated {} trial force field(s).'.format(
             len(self.new_ffs)))
     # If there are any trials, test them.
     if self.new_ffs:
         logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~'))
         for ff in self.new_ffs:
             data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff)
             ff.score = compare.compare_data(ref_data, data, zero=False)
             opt.pretty_ff_results(ff)
         self.new_ffs = sorted(
             self.new_ffs, key=lambda x: x.score)
         # Check for improvement.
         if self.new_ffs[0].score < self.ff.score:
             ff = self.new_ffs[0]
             logger.log(
                 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(
                     79, '~'))
             opt.pretty_ff_results(self.ff, level=20)
             opt.pretty_ff_results(ff, level=20)
         else:
             ff = self.ff
     else:
         ff = self.ff
     return ff
Beispiel #3
0
    def run(self, ref_data=None, restart=None):
        """
        Runs the gradient optimization.

        Ensure that the attributes in __init__ are set as you desire before
        using this function.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        # We need reference data if you didn't provide it.
        if ref_data is None:
            ref_data = opt.return_ref_data(self.args_ref)

        # We need the initial FF data.
        if self.ff.data is None:
            logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~'))
            # Is opt.Optimizer.ff_lines used anymore?
            self.ff.export_ff()
            self.ff.data = calculate.main(self.args_ff)
            # Not 100% sure if this is necessary, but it certainly doesn't hurt.
            compare.correlate_energies(ref_data, self.ff.data)
        r_dict = compare.data_by_type(ref_data)
        c_dict = compare.data_by_type(self.ff.data)
        r_dict, c_dict = compare.trim_data(r_dict,c_dict)
        if self.ff.score is None:
            # Already zeroed reference and correlated the energies.
            self.ff.score = compare.compare_data(r_dict, c_dict)
        data_types = []
        for typ in r_dict:
            data_types.append(typ)
        data_types.sort()
        logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~'))
        logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score))
        opt.pretty_ff_results(self.ff, level=20)

        logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~'))
        if restart:
            par_file = restart
            logger.log(20, '  -- Restarting gradient from central '
                       'differentiation file {}.'.format(par_file))
        else:
            # We need a file to hold the differentiated parameter data.
            par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt'))
            if par_files:
                par_files.sort()
                most_recent_par_file = par_files[-1]
                most_recent_par_file = most_recent_par_file.split('/')[-1]
                most_recent_num = most_recent_par_file[9:12]
                num = int(most_recent_num) + 1
                par_file = 'par_diff_{:03d}.txt'.format(num)
            else:
                par_file = 'par_diff_001.txt'
            logger.log(20, '  -- Generating central differentiation '
                       'file {}.'.format(par_file))
            f = open(os.path.join(self.direc, par_file), 'w')
            csv_writer = csv.writer(f)
            # Row 1 - Labels
            # Row 2 - Weights
            # Row 3 - Reference data values
            # Row 4 - Initial FF data values
            ## Deprecated -TR
            #csv_writer.writerow([x.lbl for x in ref_data])
            #csv_writer.writerow([x.wht for x in ref_data])
            #csv_writer.writerow([x.val for x in ref_data])
            #csv_writer.writerow([x.val for x in self.ff.data])
            writerows = [[],[],[],[]]
            for data_type in data_types:
                writerows[0].extend([x.lbl for x in r_dict[data_type]])
                writerows[1].extend([x.wht for x in r_dict[data_type]])
                writerows[2].extend([x.val for x in r_dict[data_type]])
                writerows[3].extend([x.val for x in c_dict[data_type]])
            for row in writerows:
                csv_writer.writerow(row)
            logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~'))
            # Save many FFs, each with their own parameter sets.
            ffs = opt.differentiate_ff(self.ff)
            logger.log(20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(
                79, '~'))
            for ff in ffs:
                ff.export_ff(lines=self.ff.lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                # Deprecated
                #ff.score = compare.compare_data(ref_data, data)
                c_data = compare.data_by_type(data)
                r_dict, c_data = compare.trim_data(r_dict,c_data)
                ff.score = compare.compare_data(r_dict, c_data)
                opt.pretty_ff_results(ff)
                # Write the data rather than storing it in memory. For large
                # parameter sets, this could consume GBs of memory otherwise!
                #csv_writer.writerow([x.val for x in data])
                row = []
                for data_type in data_types:
                    row.extend([x.val for x in c_data[data_type]])
                csv_writer.writerow(row)
            f.close()

            # Make sure we have derivative information. Used for NR.
            #
            # The derivatives are useful for checking up on the progress of the
            # optimization and for deciding which parameters to use in a
            # subsequent simplex optimization.
            #
            # Still need a way to do this with the resatrt file.
            opt.param_derivs(self.ff, ffs)

        # Calculate the Jacobian, residual vector, matrix A and vector b.
        # These aren't needed if you're only doing Newton-Raphson.
        if self.do_lstsq or self.do_lagrange or self.do_levenberg or \
                self.do_svd:
            logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~'))
            # Setup the residual vector.
            # Deprecated - TR
            #num_d = len(ref_data)
            num_d = 0
            for datatype in r_dict:
                num_d += len(r_dict[datatype])
            resid = np.empty((num_d, 1), dtype=float)
            # Deprecated - TR
            #for i in xrange(0, num_d):
            #    resid[i, 0] = ref_data[i].wht * \
            #                  (ref_data[i].val - self.ff.data[i].val)
            count = 0
            for data_type in data_types:
                for r,c in zip(r_dict[data_type],c_dict[data_type]):
                    resid[count, 0] = r.wht * (r.val - c.val)
                    count += 1
            # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid))
            logger.log(
                20, '  -- Formed {} residual vector.'.format(resid.shape))
            # Setup the Jacobian.
            num_p = len(self.ff.params)
            # Maybe should be a part of the Jacobian function.
            jacob = np.empty((num_d, num_p), dtype=float)
            jacob = return_jacobian(jacob, os.path.join(self.direc, par_file))
            # logger.log(5, 'JACOBIAN:\n{}'.format(jacob))
            logger.log(20, '  -- Formed {} Jacobian.'.format(jacob.shape))
            ma = jacob.T.dot(jacob)
            vb = jacob.T.dot(resid)
            # We need these for most optimization methods.
            logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-'))
            # logger.log(5, 'A:\n{}'.format(ma))
            # logger.log(5, 'b:\n{}'.format(vb))
        # Start coming up with new parameter sets.
        if self.do_newton and not restart:
            logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~'))
            # Moved the derivative section outside of here.
            changes = do_newton(self.ff.params,
                                radii=self.newton_radii,
                                cutoffs=self.newton_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        if self.do_lstsq:
            logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~'))
            changes = do_lstsq(ma, vb,
                               radii=self.lstsq_radii,
                               cutoffs=self.lstsq_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        if self.do_lagrange:
            logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~'))
            for factor in sorted(self.lagrange_factors):
                changes = do_lagrange(ma, vb, factor,
                                      radii=self.lagrange_radii,
                                      cutoffs=self.lagrange_cutoffs)
                cleanup(self.new_ffs, self.ff, changes)
        if self.do_levenberg:
            logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~'))
            for factor in sorted(self.levenberg_factors):
                changes = do_levenberg(ma, vb, factor,
                                       radii=self.levenberg_radii,
                                       cutoffs=self.levenberg_cutoffs)
                cleanup(self.new_ffs, self.ff, changes)
        if self.do_svd:
            logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~'))
            # J = U . s . VT
            mu, vs, mvt = return_svd(jacob)
            logger.log(1, '>>> mu.shape: {}'.format(mu.shape))
            logger.log(1, '>>> vs.shape: {}'.format(vs.shape))
            logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape))
            logger.log(1, '>>> vb.shape: {}'.format(vb.shape))
            if self.svd_factors:
                changes = do_svd_w_thresholds(mu, vs, mvt, resid, self.svd_factors,
                                              radii=self.svd_radii,
                                              cutoffs=self.svd_cutoffs)
            else:
                changes = do_svd_wo_thresholds(mu, vs, mvt, resid,
                                               radii=self.svd_radii,
                                               cutoffs=self.svd_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        # Report how many trial FFs were generated.
        logger.log(20, '  -- Generated {} trial force field(s).'.format(
                len(self.new_ffs)))
        # If there are any trials, test them.
        if self.new_ffs:
            logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~'))
            for ff in self.new_ffs:
                data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff)
                # Shouldn't need to zero anymore.
                # Deprecated
                #ff.score = compare.compare_data(ref_data, data)
                c_data = compare.data_by_type(data)
                r_dict, c_data = compare.trim_data(r_dict,c_data)
                ff.score = compare.compare_data(r_dict, c_data)
                opt.pretty_ff_results(ff)
            self.new_ffs = sorted(
                self.new_ffs, key=lambda x: x.score)
            # Check for improvement.
            if self.new_ffs[0].score < self.ff.score:
                ff = self.new_ffs[0]
                logger.log(
                    20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(
                        79, '~'))
                opt.pretty_ff_results(self.ff, level=20)
                opt.pretty_ff_results(ff, level=20)
                # Copy parameter derivatives from original FF to save time in
                # case we move onto simplex immediately after this.
                copy_derivs(self.ff, ff)
            else:
                ff = self.ff
        else:
            ff = self.ff
        return ff
Beispiel #4
0
 def run(self, ref_data=None, restart=None):
     # We need reference data if you didn't provide it.
     if ref_data is None:
         ref_data = opt.return_ref_data(self.args_ref)
     # We need the initial FF data.
     if self.ff.data is None:
         logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~'))
         # Check whether this is efficient with the ff_lines.
         self.ff.export_ff()
         self.ff.data = calculate.main(self.args_ff)
         # We could do this, but the zeroing of energies has already been
         # done.
         # self.ff.score = compare.compare_data(ref_data, self.ff.data)
         # So instead we do this.
         compare.correlate_energies(ref_data, self.ff.data)
     if self.ff.score is None:
         # Already zeroed reference and correlated the energies.
         self.ff.score = compare.calculate_score(ref_data, self.ff.data)
         logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score))
     logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~'))
     # We need a file to hold the differentiated parameter data.
     logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~'))
     if restart:
         par_file = restart
         logger.log(
             20, '  -- Restarting gradient from central '
             'differentiation file {}.'.format(par_file))
     else:
         par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt'))
         if par_files:
             par_files.sort()
             most_recent_par_file = par_files[-1]
             most_recent_par_file = most_recent_par_file.split('/')[-1]
             most_recent_num = most_recent_par_file[9:12]
             num = int(most_recent_num) + 1
             par_file = 'par_diff_{:03d}.txt'.format(num)
         else:
             par_file = 'par_diff_001.txt'
         logger.log(
             20, '  -- Generating central differentiation '
             'file {}.'.format(par_file))
         f = open(os.path.join(self.direc, par_file), 'w')
         csv_writer = csv.writer(f)
         # Row 1 - Labels
         # Row 2 - Weights
         # Row 3 - Reference data values
         # Row 4 - Initial FF data values
         csv_writer.writerow([x.lbl for x in ref_data])
         csv_writer.writerow([x.wht for x in ref_data])
         csv_writer.writerow([x.val for x in ref_data])
         csv_writer.writerow([x.val for x in self.ff.data])
         logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~'))
         # Save many FFs, each with their own parameter sets.
         ffs = opt.differentiate_ff(self.ff)
         logger.log(
             20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~'))
         for ff in ffs:
             ff.export_ff(lines=self.ff.lines)
             logger.log(20, '  -- Calculating {}.'.format(ff))
             data = calculate.main(self.args_ff)
             compare.correlate_energies(ref_data, data)
             ff.score = compare.calculate_score(ref_data, data)
             opt.pretty_ff_results(ff)
             # Write the data rather than storing it in memory. For large parameter
             # sets, this could consume GBs of memory otherwise!
             csv_writer.writerow([x.val for x in data])
         f.close()
     # Calculate the Jacobian, residual vector, matrix A and vector b.
     # These aren't needed if you're only doing Newton-Raphson.
     if self.do_lstsq or self.do_lagrange or self.do_levenberg or \
             self.do_svd:
         logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~'))
         # Setup the residual vector.
         num_d = len(ref_data)
         resid = np.empty((num_d, 1), dtype=float)
         for i in xrange(0, num_d):
             resid[i, 0] = ref_data[i].wht * (ref_data[i].val -
                                              self.ff.data[i].val)
         # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid))
         logger.log(20,
                    '  -- Formed {} residual vector.'.format(resid.shape))
         # Setup the Jacobian.
         num_p = len(self.ff.params)
         # Maybe should be a part of the Jacobian function.
         jacob = np.empty((num_d, num_p), dtype=float)
         jacob = return_jacobian(jacob, os.path.join(self.direc, par_file))
         # logger.log(5, 'JACOBIAN:\n{}'.format(jacob))
         logger.log(20, '  -- Formed {} Jacobian.'.format(jacob.shape))
         ma = jacob.T.dot(jacob)
         vb = jacob.T.dot(resid)
         # We need these for most optimization methods.
         logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-'))
         # logger.log(5, 'A:\n{}'.format(ma))
         # logger.log(5, 'b:\n{}'.format(vb))
     # Start coming up with new parameter sets.
     if self.do_newton and not restart:
         logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~'))
         # Make sure we have derivative information.
         if self.ff.params[0].d1 is None:
             opt.param_derivs(self.ff, ffs)
         changes = do_newton(self.ff.params,
                             radii=self.newton_radii,
                             cutoffs=self.newton_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     if self.do_lstsq:
         logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~'))
         changes = do_lstsq(ma,
                            vb,
                            radii=self.lstsq_radii,
                            cutoffs=self.lstsq_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     if self.do_lagrange:
         logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~'))
         for factor in sorted(self.lagrange_factors):
             changes = do_lagrange(ma,
                                   vb,
                                   factor,
                                   radii=self.lagrange_radii,
                                   cutoffs=self.lagrange_cutoffs)
             cleanup(self.new_ffs, self.ff, changes)
     if self.do_levenberg:
         logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~'))
         for factor in sorted(self.levenberg_factors):
             changes = do_levenberg(ma,
                                    vb,
                                    factor,
                                    radii=self.levenberg_radii,
                                    cutoffs=self.levenberg_cutoffs)
             cleanup(self.new_ffs, self.ff, changes)
     if self.do_svd:
         logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~'))
         # J = U . s . VT
         mu, vs, mvt = return_svd(jacob)
         logger.log(1, '>>> mu.shape: {}'.format(mu.shape))
         logger.log(1, '>>> vs.shape: {}'.format(vs.shape))
         logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape))
         logger.log(1, '>>> vb.shape: {}'.format(vb.shape))
         if self.svd_factors:
             changes = do_svd_w_thresholds(mu,
                                           vs,
                                           mvt,
                                           resid,
                                           self.svd_factors,
                                           radii=self.svd_radii,
                                           cutoffs=self.svd_cutoffs)
         else:
             changes = do_svd_wo_thresholds(mu,
                                            vs,
                                            mvt,
                                            resid,
                                            radii=self.svd_radii,
                                            cutoffs=self.svd_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     # Report how many trial FFs were generated.
     logger.log(
         20, '  -- Generated {} trial force field(s).'.format(
             len(self.new_ffs)))
     # If there are any trials, test them.
     if self.new_ffs:
         logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~'))
         for ff in self.new_ffs:
             data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff)
             # Shouldn't need to zero anymore.
             ff.score = compare.compare_data(ref_data, data)
             opt.pretty_ff_results(ff)
         self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
         # Check for improvement.
         if self.new_ffs[0].score < self.ff.score:
             ff = self.new_ffs[0]
             logger.log(
                 20,
                 '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~'))
             opt.pretty_ff_results(self.ff, level=20)
             opt.pretty_ff_results(ff, level=20)
             # Copy parameter derivatives from original FF to save time in
             # case we move onto simplex immediately after this.
             copy_derivs(self.ff, ff)
         else:
             ff = self.ff
     else:
         ff = self.ff
     return ff