Example #1
0
def locate_user():
    global user_name, temp_sample_width, temp_data, loginWin, new_username_Entry, signupWin, action_label
    user_name = username_Entry.get()
    if user_name in users:
        password_data = users[user_name]

        path = 'attempt.wav'
        rd.record_to_file(temp_sample_width, temp_data, path)
        attempt_data = convert.process_audio(path)

        if compare.compare_data(password_data, attempt_data):
            action_label['text'] = 'You have been granted access to the profile of ' + user_name +\
                                   '\n correctness: '+compare.correctness
        else:
            action_label[
                'text'] = 'The password had a correctness value of ' + str(
                    compare.correctness) + '\n password denied.'
    else:
        action_label['text'] = 'Incorrect username, try again.'

    # resets the recorded data
    temp_sample_width = None
    temp_data = None

    loginWin.destroy()
Example #2
0
File: loop.py Project: Q2MM/q2mm
    def opt_loop(self):
        """
        Iterator for cycling through optimization methods.

        Will continue to run the loop optimization methods until the convergence
        criterion has been met.

        Updates the user with logs on the optimization score changes. Backs up
        the FF after each loop cycle.
        """
        change = None
        last_score = None
        # This additional check ensures that the code won't crash if the user
        # forgets to add a COMP command in the loop input file.
        if self.ff.score is None:
            logger.warning(
                '  -- No existing FF score! Please ensure use of COMP in the '
                'input file! Calculating FF score automatically to compensate.')
            self.ff.score = compare.compare_data(
                self.ref_data, self.ff.data)
        while last_score is None \
                or change is None \
                or change > self.convergence:
            self.cycle_num += 1
            last_score = self.ff.score
            self.ff = self.run_loop_input(
                self.loop_lines, score=self.ff.score)
            logger.log(1, '>>> last_score: {}'.format(last_score))
            logger.log(1, '>>> self.ff.score: {}'.format(self.ff.score))
            change = (last_score - self.ff.score) / last_score
            pretty_loop_summary(
                self.cycle_num, self.ff.score, change)
            # MM3* specific. Will have to be changed soon to allow for expansion
            # into other FF software packages.
            mm3_files = glob.glob(os.path.join(self.direc, 'mm3_???.fld'))
            if mm3_files:
                mm3_files.sort()
                most_recent_mm3_file = mm3_files[-1]
                most_recent_mm3_file = most_recent_mm3_file.split('/')[-1]
                most_recent_num = most_recent_mm3_file[4:7]
                num = int(most_recent_num) + 1
                mm3_file = 'mm3_{:03d}.fld'.format(num)
            else:
                mm3_file = 'mm3_001.fld'
            mm3_file = os.path.join(self.direc, mm3_file)
            self.ff.export_ff(path=mm3_file)
            logger.log(20, '  -- Wrote best FF to {}'.format(mm3_file))
        for param in self.ff.params:
            param.value_at_limits()
        return self.ff
Example #3
0
File: loop.py Project: v3op01/q2mm
    def opt_loop(self):
        """
        Iterator for cycling through optimization methods.

        Will continue to run the loop optimization methods until the convergence
        criterion has been met.

        Updates the user with logs on the optimization score changes. Backs up
        the FF after each loop cycle.
        """
        change = None
        last_score = None
        # This additional check ensures that the code won't crash if the user
        # forgets to add a COMP command in the loop input file.
        if self.ff.score is None:
            logger.warning(
                '  -- No existing FF score! Please ensure use of COMP in the '
                'input file! Calculating FF score automatically to compensate.'
            )
            self.ff.score = compare.compare_data(self.ref_data, self.ff.data)
        while last_score is None \
                or change is None \
                or change > self.convergence:
            self.cycle_num += 1
            last_score = self.ff.score
            self.ff = self.run_loop_input(self.loop_lines, score=self.ff.score)
            logger.log(1, '>>> last_score: {}'.format(last_score))
            logger.log(1, '>>> self.ff.score: {}'.format(self.ff.score))
            change = (last_score - self.ff.score) / last_score
            pretty_loop_summary(self.cycle_num, self.ff.score, change)
            # MM3* specific. Will have to be changed soon to allow for expansion
            # into other FF software packages.
            mm3_files = glob.glob(os.path.join(self.direc, 'mm3_???.fld'))
            if mm3_files:
                mm3_files.sort()
                most_recent_mm3_file = mm3_files[-1]
                most_recent_mm3_file = most_recent_mm3_file.split('/')[-1]
                most_recent_num = most_recent_mm3_file[4:7]
                num = int(most_recent_num) + 1
                mm3_file = 'mm3_{:03d}.fld'.format(num)
            else:
                mm3_file = 'mm3_001.fld'
            mm3_file = os.path.join(self.direc, mm3_file)
            self.ff.export_ff(path=mm3_file)
            logger.log(20, '  -- Wrote best FF to {}'.format(mm3_file))
        for param in self.ff.params:
            param.value_at_limits()
        return self.ff
Example #4
0
    def run(self, r_data=None):
        """
        Once all attributes are setup as you so desire, run this method to
        optimize the parameters.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        if r_data is None:
            r_data = opt.return_ref_data(self.args_ref)

        if self.ff.score is None:
            logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~'))
            self.ff.export_ff()
            # Could store data on self.ff.data if we wanted. Not necessary for
            # simplex. If simplex yielded no improvements, it would return this
            # FF, and then we might want the data such taht we don't have to
            # recalculate it in gradient. Let's hope simplex generally yields
            # improvements.
            data = calculate.main(self.args_ff)
            self.ff.score = compare.compare_data(r_data, data)
        else:
            logger.log(20, '  -- Reused existing score and data for initial FF.')

        logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~'))
        logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score))
        opt.pretty_ff_results(self.ff, level=20)

        if self.max_params and len(self.ff.params) > self.max_params:
            logger.log(20, '  -- More parameters than the maximum allowed.')
            logger.log(5, 'CURRENT PARAMS: {}'.format(len(self.ff.params)))
            logger.log(5, 'MAX PARAMS: {}'.format(self.max_params))
            # Here we select the parameters that have the lowest 2nd
            # derivatives.

            # THIS IS SCHEDULED FOR CHANGING. THIS IS ACTUALLY NOT A GOOD
            # CRITERION FOR PARAMETER SELECTION.
            if self.ff.params[0].d1:
                logger.log(15, '  -- Reusing existing parameter derivatives.')
                # Differentiate all parameters forward. Yes, I know this is
                # counter-intuitive because we are going to only use subset of
                # the forward differentiated FFs. However, this is very
                # computationally inexpensive because we're not scoring them
                # now. We will remove the forward differentiated FFs we don't
                # want before scoring.
                ffs = opt.differentiate_ff(self.ff, central=False)
            else:
                logger.log(15, '  -- Calculating new parameter derivatives.')
                # Do central differentiation so we can calculate derivatives.
                # Another option would be to write code to determine
                # derivatives only from forward differentiation.
                ffs = opt.differentiate_ff(self.ff, central=True)
                # We have to score to get the derivatives.
                for ff in ffs:
                    ff.export_ff(lines=self.ff_lines)
                    logger.log(20, '  -- Calculating {}.'.format(ff))
                    data = calculate.main(self.args_ff)
                    ff.score = compare.compare_data(r_data, data)
                    opt.pretty_ff_results(ff)
                # Add the derivatives to your original FF.
                opt.param_derivs(self.ff, ffs)
                # Only keep the forward differentiated FFs.
                ffs = opt.extract_forward(ffs)
                logger.log(5, '  -- Keeping {} forward differentiated '
                           'FFs.'.format(len(ffs)))

            # This sorts the parameters based upon their 2nd derivative.
            # It keeps the ones with lowest 2nd derivatives.

            # SCHEDULED FOR CHANGES. NOT A GOOD SORTING CRITERION.
            params = select_simp_params_on_derivs(
                self.ff.params, max_params=self.max_params)
            # From the entire list of forward differentiated FFs, pick
            # out the ones that have the lowest 2nd derivatives.
            self.new_ffs = opt.extract_ff_by_params(ffs, params)
            logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs)))

            # Reduce number of parameters.
            # Will need an option that's not MM3* specific in the future.
            ff_rows = [x.mm3_row for x in params]
            ff_cols = [x.mm3_col for x in params]
            for ff in self.new_ffs:
                new_params = []
                for param in ff.params:
                    if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                        new_params.append(param)
                ff.params = new_params
            # Make a copy of your original FF that has less parameters.
            ff_copy = copy.deepcopy(self.ff)
            new_params = []
            for param in ff.params:
                if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                    new_params.append(param)
            ff_copy.params = new_params
        else:
            # In this case it's simple. Just forward differentiate each
            # parameter.
            self.new_ffs = opt.differentiate_ff(self.ff, central=False)
            logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs)))
            # Still make that FF copy.
            ff_copy = copy.deepcopy(self.ff)
        # Double check and make sure they're all scored.
        for ff in self.new_ffs:
            if ff.score is None:
                ff.export_ff(lines=self.ff_lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(ff)
        # Add your copy of the orignal to FF to the forward differentiated FFs.
        self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score)
        # Allow 3 cycles w/o change for each parameter present. Remember that
        # the initial FF was added here, hence the minus one.
        self._max_cycles_wo_change = 3 * (len(self.new_ffs) - 1)
        wrapper = textwrap.TextWrapper(width=79)
        # Shows all FFs parameters.
        opt.pretty_ff_params(self.new_ffs)
        # Start the simplex cycles.
        current_cycle = 0
        cycles_wo_change = 0
        while current_cycle < self.max_cycles \
                and cycles_wo_change < self._max_cycles_wo_change:
            current_cycle += 1
            last_best = self.new_ffs[0].score
            best_ff = self.new_ffs[0]
            logger.log(20, '~~ START SIMPLEX CYCLE {} ~~'.format(
                    current_cycle).rjust(79, '~'))
            logger.log(20, 'ORDERED FF SCORES:')
            logger.log(20, wrapper.fill('{}'.format(
                    ' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs))))

            # !!! FOR TESTING !!!

            # Write the best and worst FFs to some other directory. Then
            # write the worst FF to optimization working directory. Then
            # raise opt.OptError. The worst FF should be overwritten by
            # the best FF afterwards.

            # if current_cycle == 5:
            #     self.new_ffs[-1].export_ff(
            #         path='ref_methanol_flds/mm3_worst.fld',
            #         lines=self.ff.lines)
            #     self.new_ffs[0].export_ff(
            #         path='ref_methanol_flds/mm3_best.fld',
            #         lines=self.ff.lines)
            #     self.new_ffs[-1].export_ff(
            #         path='ref_methanol/mm3.fld',
            #         lines=self.ff.lines)
            #     raise opt.OptError

            # !!! END TESTING !!!

            inv_ff = self.ff.__class__()
            if self.do_weighted_reflection:
                inv_ff.method = 'WEIGHTED INVERSION'
            else:
                inv_ff.method = 'INVERSION'
            inv_ff.params = copy.deepcopy(best_ff.params)
            ref_ff = self.ff.__class__()
            ref_ff.method = 'REFLECTION'
            ref_ff.params = copy.deepcopy(best_ff.params)
            # Need score difference sum for weighted inversion.
            # Calculate this value before going into loop.
            if self.do_weighted_reflection:
                # If zero, should break.
                score_diff_sum = sum([x.score - self.new_ffs[-1].score
                                      for x in self.new_ffs[:-1]])
                if score_diff_sum == 0.:
                    logger.warning(
                        'No difference between force field scores. '
                        'Exiting simplex.')
                    # We want to raise opt.OptError such that
                    # opt.catch_run_errors will write the best FF obtained thus
                    # far.
                    raise opt.OptError(
                        'No difference between force field scores. '
                        'Exiting simplex.')
            for i in xrange(0, len(best_ff.params)):
                if self.do_weighted_reflection:
                    inv_val = (
                        sum([x.params[i].value * 
                             (x.score - self.new_ffs[-1].score)
                             for x in self.new_ffs[:-1]])
                        / score_diff_sum)
                else:
                    inv_val = (
                        sum([x.params[i].value for x in self.new_ffs[:-1]])
                        /
                        len(self.new_ffs[:-1]))
                inv_ff.params[i].value = inv_val
                ref_ff.params[i].value = (
                    2 * inv_val - self.new_ffs[-1].params[i].value)
            # The inversion point does not need to be scored.
            # Calculate score for reflected parameters.
            self.ff.export_ff(self.ff.path, params=ref_ff.params)
            data = calculate.main(self.args_ff)
            ref_ff.score = compare.compare_data(r_data, data)
            opt.pretty_ff_results(ref_ff)
            if ref_ff.score < self.new_ffs[0].score:
                logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~'))
                exp_ff = self.ff.__class__()
                exp_ff.method = 'EXPANSION'
                exp_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(self.new_ffs[0].params)):
                    exp_ff.params[i].value = (
                        3 * inv_ff.params[i].value -
                        2 * self.new_ffs[-1].params[i].value)
                self.ff.export_ff(self.ff.path, exp_ff.params)
                data = calculate.main(self.args_ff)
                exp_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(exp_ff)
                if exp_ff.score < ref_ff.score:
                    self.new_ffs[-1] = exp_ff
                    logger.log(
                        20, '  -- Expansion succeeded. Keeping expanded '
                        'parameters.')
                else:
                    self.new_ffs[-1] = ref_ff
                    logger.log(
                        20, '  -- Expansion failed. Keeping reflected parameters.')
            elif ref_ff.score < self.new_ffs[-2].score:
                logger.log(20, '  -- Keeping reflected parameters.')
                self.new_ffs[-1] = ref_ff
            else:
                logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~'))
                con_ff = self.ff.__class__()
                con_ff.method = 'CONTRACTION'
                con_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(best_ff.params)):
                    if ref_ff.score > self.new_ffs[-1].score:
                        con_val = (
                            (inv_ff.params[i].value +
                             self.new_ffs[-1].params[i].value) / 2)
                    else:
                        con_val = (
                            (3 * inv_ff.params[i].value -
                             self.new_ffs[-1].params[i].value) / 2)
                    con_ff.params[i].value = con_val
                self.ff.export_ff(self.ff.path, params=con_ff.params)
                data = calculate.main(self.args_ff)
                con_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(con_ff)
                # This change was made to reflect the 1998 Q2MM publication.
                # if con_ff.score < self.new_ffs[-1].score:
                if con_ff.score < self.new_ffs[-2].score:
                    logger.log(20, '  -- Contraction succeeded.')
                    self.new_ffs[-1] = con_ff
                elif self.do_massive_contraction:
                    logger.log(
                        20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~'))
                    for ff_num, ff in enumerate(self.new_ffs[1:]):
                        for i in xrange(0, len(best_ff.params)):
                            ff.params[i].value = (
                                (ff.params[i].value +
                                 self.new_ffs[0].params[i].value) / 2)
                        self.ff.export_ff(self.ff.path, params=ff.params)
                        data = calculate.main(self.args_ff)
                        ff.score = compare.compare_data(r_data, data)
                        ff.method += ' MC'
                        opt.pretty_ff_results(ff)
                else:
                    logger.log(
                        20, '  -- Contraction failed. Keeping parmaeters '
                        'anyway.')
                    self.new_ffs[-1] = con_ff
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            if self.new_ffs[0].score < last_best:
                cycles_wo_change = 0
            else:
                cycles_wo_change += 1
                logger.log(20, '  -- {} cycles without improvement out of {} '
                           'allowed.'.format(
                        cycles_wo_change, self._max_cycles_wo_change))
            best_ff = self.new_ffs[0]
            logger.log(20, 'BEST:')
            opt.pretty_ff_results(self.new_ffs[0], level=20)
            logger.log(20, '~~ END SIMPLEX CYCLE {} ~~'.format(
                    current_cycle).rjust(79, '~'))
        if best_ff.score < self.ff.score:
            logger.log(20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(
                    79, '~'))
            best_ff = restore_simp_ff(best_ff, self.ff)
        else:
            logger.log(20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(
                    79, '~'))
        opt.pretty_ff_results(self.ff, level=20)
        opt.pretty_ff_results(best_ff, level=20)
        logger.log(20, '  -- Writing best force field from simplex.')
        best_ff.export_ff(best_ff.path)
        return best_ff
Example #5
0
 def run_loop_input(self, lines, score=None):
     lines_iterator = iter(lines)
     while True:
         try:
             line = lines_iterator.next()
         except StopIteration:
             return self.ff
         cols = line.split()
         if cols[0] == 'DIR':
             self.direc = cols[1]
         if cols[0] == 'FFLD':
             # Import FF data.
             if cols[1] == 'read':
                 self.ff = datatypes.MM3(os.path.join(self.direc, cols[2]))
                 self.ff.import_ff()
                 self.ff.method = 'READ'
                 with open(os.path.join(self.direc, cols[2]), 'r') as f:
                     self.ff.lines = f.readlines()
             # Export FF data.
             if cols[1] == 'write':
                 self.ff.export_ff(os.path.join(self.direc, cols[2]))
         # Trim parameters.
         if cols[0] == 'PARM':
             logger.log(20, '~~ SELECTING PARAMETERS ~~'.rjust(79, '~'))
             self.ff.params = parameters.trim_params_by_file(
                 self.ff.params, os.path.join(self.direc, cols[1]))
         if cols[0] == 'LOOP':
             # Read lines that will be looped over.
             inner_loop_lines = []
             line = lines_iterator.next()
             while line.split()[0] != 'END':
                 inner_loop_lines.append(line)
                 line = lines_iterator.next()
             # Make loop object and populate attributes.
             loop = Loop()
             loop.convergence = float(cols[1])
             loop.direc = self.direc
             loop.ff = self.ff
             loop.args_ff = self.args_ff
             loop.args_ref = self.args_ref
             loop.ref_data = self.ref_data
             loop.loop_lines = inner_loop_lines
             # Log commands.
             pretty_loop_input(inner_loop_lines,
                               name='OPTIMIZATION LOOP',
                               score=self.ff.score)
             # Run inner loop.
             self.ff = loop.opt_loop()
         # Note: Probably want to update this to append the directory given
         #       by the new DIR command.
         if cols[0] == 'RDAT':
             logger.log(20,
                        '~~ CALCULATING REFERENCE DATA ~~'.rjust(79, '~'))
             if len(cols) > 1:
                 self.args_ref = ' '.join(cols[1:]).split()
             self.ref_data = opt.return_ref_data(self.args_ref)
         if cols[0] == 'CDAT':
             logger.log(20, '~~ CALCULATING FF DATA ~~'.rjust(79, '~'))
             if len(cols) > 1:
                 self.args_ff = ' '.join(cols[1:]).split()
             self.ff.data = calculate.main(self.args_ff)
         if cols[0] == 'COMP':
             self.ff.score = compare.compare_data(self.ref_data,
                                                  self.ff.data)
             if '-o' in cols:
                 compare.pretty_data_comp(
                     self.ref_data, self.ff.data,
                     os.path.join(self.direc, cols[cols.index('-o') + 1]))
             if '-p' in cols:
                 compare.pretty_data_comp(self.ref_data, self.ff.data)
         if cols[0] == 'GRAD':
             grad = gradient.Gradient(direc=self.direc,
                                      ff=self.ff,
                                      ff_lines=self.ff.lines,
                                      args_ff=self.args_ff)
             self.ff = grad.run(ref_data=self.ref_data)
         if cols[0] == 'SIMP':
             simp = simplex.Simplex(direc=self.direc,
                                    ff=self.ff,
                                    ff_lines=self.ff.lines,
                                    args_ff=self.args_ff)
             self.ff = simp.run(r_data=self.ref_data)
         if cols[0] == 'WGHT':
             data_type = cols[1]
             co.WEIGHTS[data_type] = float(cols[2])
Example #6
0
    def run(self, r_data=None):
        """
        Once all attributes are setup as you so desire, run this method to
        optimize the parameters.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        if r_data is None:
            r_data = opt.return_ref_data(self.args_ref)
        logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~'))
        # Here we don't actually need the database connection/force field data.
        # We only need the score.
        if self.ff.score is None:
            logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~'))
            self.ff.export_ff()
            # I could store this object to prevent on self.ff to prevent garbage
            # collection. Would be nice if simplex was followed by gradient,
            # which needs that information, and if simplex yielded no
            # improvements. At most points in the optimization, this is probably
            # too infrequent for it to be worth the memory, but it might be nice
            # once the parameters are close to convergence.
            data = calculate.main(self.args_ff)
            self.ff.score = compare.compare_data(r_data, data, zero=False)
            logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score))
        else:
            logger.log(15, '  -- Reused existing score and data for initial FF.')
        logger.log(15, 'INIT FF SCORE: {}'.format(self.ff.score))
        ffs = opt.differentiate_ff(self.ff)
        for ff in ffs:
            ff.export_ff(lines=self.ff_lines)
            logger.log(20, '  -- Calculating {}.'.format(ff))
            data = calculate.main(self.args_ff)
            ff.score = compare.compare_data(r_data, data, zero=False)
            opt.pretty_ff_results(ff)
        if self.max_params and len(self.ff.params) > self.max_params:
            simp_params = reduce_num_simp_params(
                self.ff, ffs, max_params=self.max_params)
            self.new_ffs = reduce_num_simp_ffs(
                ffs, simp_params)
        else:
            self.new_ffs = ffs
        self.new_ffs = sorted(self.new_ffs + [self.ff], key=lambda x: x.score)
        wrapper = textwrap.TextWrapper(width=79)
        logger.log(20, 'ORDERED FF SCORES:')
        logger.log(20, wrapper.fill('{}'.format(
                ' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs))))
        # Shows all FFs parameters.
        opt.pretty_ff_params(self.new_ffs)
        # Start the simplex cycles.
        current_cycle = 0
        cycles_wo_change = 0
        while current_cycle < self.max_cycles \
                and cycles_wo_change < self.max_cycles_wo_change:
            current_cycle += 1
            last_best = self.new_ffs[0].score
            best_ff = self.new_ffs[0]
            logger.log(20, '~~ START SIMPLEX CYCLE {} ~~'.format(
                    current_cycle).rjust(79, '~'))
            inv_ff = self.ff.__class__()
            if self.do_weighted_reflection:
                inv_ff.method = 'WEIGHTED INVERSION'
            else:
                inv_ff.method = 'INVERSION'
            inv_ff.params = copy.deepcopy(best_ff.params)
            ref_ff = self.ff.__class__()
            ref_ff.method = 'REFLECTION'
            ref_ff.params = copy.deepcopy(best_ff.params)
            for i in xrange(0, len(best_ff.params)):
                if self.do_weighted_reflection:
                    try:
                        inv_val = (
                            sum([x.params[i].value *
                                 (x.score - self.new_ffs[-1].score)
                                 for x in self.new_ffs[:-1]])
                            / 
                            sum([x.score - self.new_ffs[-1].score
                                 for x in self.new_ffs[:-1]]))
                    except ZeroDivisionError:
                        logger.warning(
                            'Attempted to divide by zero while calculating the '
                            'weighted simplex inversion point. All penalty '
                            'function scores for the trial force fields are '
                            'numerically equivalent.')
                        # Breaking should just exit the while loop. Should still
                        # give you the best force field determined thus far.
                        break
                else:
                    inv_val = (
                        sum([x.params[i].value for x in self.new_ffs[:-1]])
                        /
                        len(self.new_ffs[:-1]))
                inv_ff.params[i].value = inv_val
                ref_ff.params[i].value = (
                    2 * inv_val - self.new_ffs[-1].params[i].value)
            # Calculate score for inverted parameters.
            self.ff.export_ff(self.ff.path, params=inv_ff.params)
            data = calculate.main(self.args_ff)
            inv_ff.score = compare.compare_data(r_data, data, zero=False)
            opt.pretty_ff_results(inv_ff)
            # Calculate score for reflected parameters.
            self.ff.export_ff(self.ff.path, params=ref_ff.params)
            data = calculate.main(self.args_ff)
            ref_ff.score = compare.compare_data(r_data, data, zero=False)
            opt.pretty_ff_results(ref_ff)
            if ref_ff.score < self.new_ffs[0].score:
                logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~'))
                exp_ff = self.ff.__class__()
                exp_ff.method = 'EXPANSION'
                exp_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(self.new_ffs[0].params)):
                    exp_ff.params[i].value = (
                        3 * inv_ff.params[i].value -
                        2 * self.new_ffs[-1].params[i].value)
                self.ff.export_ff(self.ff.path, exp_ff.params)
                data = calculate.main(self.args_ff)
                exp_ff.score = compare.compare_data(r_data, data, zero=False)
                opt.pretty_ff_results(exp_ff)
                if exp_ff.score < ref_ff.score:
                    self.new_ffs[-1] = exp_ff
                    logger.log(
                        20, '  -- Expansion succeeded. Keeping expanded '
                        'parameters.')
                else:
                    self.new_ffs[-1] = ref_ff
                    logger.log(
                        20, '  -- Expansion failed. Keeping reflected parameters.')
            elif ref_ff.score < self.new_ffs[-2].score:
                logger.log(20, '  -- Keeping reflected parameters.')
                self.new_ffs[-1] = ref_ff
            else:
                logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~'))
                con_ff = self.ff.__class__()
                con_ff.method = 'CONTRACTION'
                con_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(best_ff.params)):
                    if ref_ff.score > self.new_ffs[-1].score:
                        con_val = (
                            (inv_ff.params[i].value +
                             self.new_ffs[-1].params[i].value) / 2)
                    else:
                        con_val = (
                            (3 * inv_ff.params[i].value -
                             self.new_ffs[-1].params[i].value) / 2)
                    con_ff.params[i].value = con_val
                self.ff.export_ff(self.ff.path, params=con_ff.params)
                data = calculate.main(self.args_ff)
                con_ff.score = compare.compare_data(r_data, data, zero=False)
                opt.pretty_ff_results(con_ff)
                if con_ff.score < self.new_ffs[-2].score:
                    self.new_ffs[-1] = con_ff
                elif self.do_massive_contraction:
                    logger.log(
                        20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~'))
                    for ff_num, ff in enumerate(self.new_ffs[1:]):
                        for i in xrange(0, len(best_ff.params)):
                            ff.params[i].value = (
                                (ff.params[i].value +
                                 self.new_ffs[0].params[i].value) / 2)
                        self.ff.export_ff(self.ff.path, params=ff.params)
                        data = calculate.main(self.args_ff)
                        ff.score = compare.compare_data(r_data, data, zero=False)
                        ff.method += ' MC'
                        opt.pretty_ff_results(ff)
                else:
                    logger.log(20, '  -- Contraction failed.')
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            if self.new_ffs[0].score < last_best:
                cycles_wo_change = 0
            else:
                cycles_wo_change += 1
                logger.log(20, '  -- {} cycles without change.'.format(
                        cycles_wo_change))
            best_ff = self.new_ffs[0]
            logger.log(20, 'BEST:')
            opt.pretty_ff_results(self.new_ffs[0], level=20)
            logger.log(20, '~~ END SIMPLEX CYCLE {} ~~'.format(
                    current_cycle).rjust(79, '~'))
        if best_ff.score < self.ff.score:
            logger.log(20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(
                    79, '~'))
            best_ff = restore_simp_ff(best_ff, self.ff)
        else:
            logger.log(20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(
                    79, '~'))
        opt.pretty_ff_results(self.ff, level=20)
        opt.pretty_ff_results(best_ff, level=20)
        logger.log(20, '  -- Writing best force field from simplex.')
        best_ff.export_ff(best_ff.path)
        return best_ff
Example #7
0
    def run(self, ref_data=None, restart=None):
        """
        Runs the gradient optimization.

        Ensure that the attributes in __init__ are set as you desire before
        using this function.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        # We need reference data if you didn't provide it.
        if ref_data is None:
            ref_data = opt.return_ref_data(self.args_ref)

        # We need the initial FF data.
        if self.ff.data is None:
            logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~'))
            # Is opt.Optimizer.ff_lines used anymore?
            self.ff.export_ff()
            self.ff.data = calculate.main(self.args_ff)
            # Not 100% sure if this is necessary, but it certainly doesn't hurt.
            compare.correlate_energies(ref_data, self.ff.data)
        r_dict = compare.data_by_type(ref_data)
        c_dict = compare.data_by_type(self.ff.data)
        r_dict, c_dict = compare.trim_data(r_dict, c_dict)
        if self.ff.score is None:
            # Already zeroed reference and correlated the energies.
            self.ff.score = compare.compare_data(r_dict, c_dict)
        data_types = []
        for typ in r_dict:
            data_types.append(typ)
        data_types.sort()
        logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~'))
        logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score))
        opt.pretty_ff_results(self.ff, level=20)

        logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~'))
        if restart:
            par_file = restart
            logger.log(
                20, '  -- Restarting gradient from central '
                'differentiation file {}.'.format(par_file))
        else:
            # We need a file to hold the differentiated parameter data.
            par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt'))
            if par_files:
                par_files.sort()
                most_recent_par_file = par_files[-1]
                most_recent_par_file = most_recent_par_file.split('/')[-1]
                most_recent_num = most_recent_par_file[9:12]
                num = int(most_recent_num) + 1
                par_file = 'par_diff_{:03d}.txt'.format(num)
            else:
                par_file = 'par_diff_001.txt'
            logger.log(
                20, '  -- Generating central differentiation '
                'file {}.'.format(par_file))
            f = open(os.path.join(self.direc, par_file), 'w')
            csv_writer = csv.writer(f)
            # Row 1 - Labels
            # Row 2 - Weights
            # Row 3 - Reference data values
            # Row 4 - Initial FF data values
            ## Deprecated -TR
            #csv_writer.writerow([x.lbl for x in ref_data])
            #csv_writer.writerow([x.wht for x in ref_data])
            #csv_writer.writerow([x.val for x in ref_data])
            #csv_writer.writerow([x.val for x in self.ff.data])
            writerows = [[], [], [], []]
            for data_type in data_types:
                writerows[0].extend([x.lbl for x in r_dict[data_type]])
                writerows[1].extend([x.wht for x in r_dict[data_type]])
                writerows[2].extend([x.val for x in r_dict[data_type]])
                writerows[3].extend([x.val for x in c_dict[data_type]])
            for row in writerows:
                csv_writer.writerow(row)
            logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~'))
            # Save many FFs, each with their own parameter sets.
            ffs = opt.differentiate_ff(self.ff)
            logger.log(
                20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~'))
            for ff in ffs:
                ff.export_ff(lines=self.ff.lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                # Deprecated
                #ff.score = compare.compare_data(ref_data, data)
                c_data = compare.data_by_type(data)
                r_dict, c_data = compare.trim_data(r_dict, c_data)
                ff.score = compare.compare_data(r_dict, c_data)
                opt.pretty_ff_results(ff)
                # Write the data rather than storing it in memory. For large
                # parameter sets, this could consume GBs of memory otherwise!
                #csv_writer.writerow([x.val for x in data])
                row = []
                for data_type in data_types:
                    row.extend([x.val for x in c_data[data_type]])
                csv_writer.writerow(row)
            f.close()

            # Make sure we have derivative information. Used for NR.
            #
            # The derivatives are useful for checking up on the progress of the
            # optimization and for deciding which parameters to use in a
            # subsequent simplex optimization.
            #
            # Still need a way to do this with the resatrt file.
            opt.param_derivs(self.ff, ffs)

        # Calculate the Jacobian, residual vector, matrix A and vector b.
        # These aren't needed if you're only doing Newton-Raphson.
        if self.do_lstsq or self.do_lagrange or self.do_levenberg or \
                self.do_svd:
            logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~'))
            # Setup the residual vector.
            # Deprecated - TR
            #num_d = len(ref_data)
            num_d = 0
            for datatype in r_dict:
                num_d += len(r_dict[datatype])
            resid = np.empty((num_d, 1), dtype=float)
            # Deprecated - TR
            #for i in xrange(0, num_d):
            #    resid[i, 0] = ref_data[i].wht * \
            #                  (ref_data[i].val - self.ff.data[i].val)
            count = 0
            for data_type in data_types:
                for r, c in zip(r_dict[data_type], c_dict[data_type]):
                    resid[count, 0] = r.wht * (r.val - c.val)
                    count += 1
            # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid))
            logger.log(20,
                       '  -- Formed {} residual vector.'.format(resid.shape))
            # Setup the Jacobian.
            num_p = len(self.ff.params)
            # Maybe should be a part of the Jacobian function.
            jacob = np.empty((num_d, num_p), dtype=float)
            jacob = return_jacobian(jacob, os.path.join(self.direc, par_file))
            # logger.log(5, 'JACOBIAN:\n{}'.format(jacob))
            logger.log(20, '  -- Formed {} Jacobian.'.format(jacob.shape))
            ma = jacob.T.dot(jacob)
            vb = jacob.T.dot(resid)
            # We need these for most optimization methods.
            logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-'))
            # logger.log(5, 'A:\n{}'.format(ma))
            # logger.log(5, 'b:\n{}'.format(vb))
        # Start coming up with new parameter sets.
        if self.do_newton and not restart:
            logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~'))
            # Moved the derivative section outside of here.
            changes = do_newton(self.ff.params,
                                radii=self.newton_radii,
                                cutoffs=self.newton_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        if self.do_lstsq:
            logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~'))
            changes = do_lstsq(ma,
                               vb,
                               radii=self.lstsq_radii,
                               cutoffs=self.lstsq_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        if self.do_lagrange:
            logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~'))
            for factor in sorted(self.lagrange_factors):
                changes = do_lagrange(ma,
                                      vb,
                                      factor,
                                      radii=self.lagrange_radii,
                                      cutoffs=self.lagrange_cutoffs)
                cleanup(self.new_ffs, self.ff, changes)
        if self.do_levenberg:
            logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~'))
            for factor in sorted(self.levenberg_factors):
                changes = do_levenberg(ma,
                                       vb,
                                       factor,
                                       radii=self.levenberg_radii,
                                       cutoffs=self.levenberg_cutoffs)
                cleanup(self.new_ffs, self.ff, changes)
        if self.do_svd:
            logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~'))
            # J = U . s . VT
            mu, vs, mvt = return_svd(jacob)
            logger.log(1, '>>> mu.shape: {}'.format(mu.shape))
            logger.log(1, '>>> vs.shape: {}'.format(vs.shape))
            logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape))
            logger.log(1, '>>> vb.shape: {}'.format(vb.shape))
            if self.svd_factors:
                changes = do_svd_w_thresholds(mu,
                                              vs,
                                              mvt,
                                              resid,
                                              self.svd_factors,
                                              radii=self.svd_radii,
                                              cutoffs=self.svd_cutoffs)
            else:
                changes = do_svd_wo_thresholds(mu,
                                               vs,
                                               mvt,
                                               resid,
                                               radii=self.svd_radii,
                                               cutoffs=self.svd_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        # Report how many trial FFs were generated.
        logger.log(
            20, '  -- Generated {} trial force field(s).'.format(
                len(self.new_ffs)))
        # If there are any trials, test them.
        if self.new_ffs:
            logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~'))
            for ff in self.new_ffs:
                data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff)
                # Shouldn't need to zero anymore.
                # Deprecated
                #ff.score = compare.compare_data(ref_data, data)
                c_data = compare.data_by_type(data)
                r_dict, c_data = compare.trim_data(r_dict, c_data)
                ff.score = compare.compare_data(r_dict, c_data)
                opt.pretty_ff_results(ff)
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            # Check for improvement.
            if self.new_ffs[0].score < self.ff.score:
                ff = self.new_ffs[0]
                logger.log(
                    20,
                    '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~'))
                opt.pretty_ff_results(self.ff, level=20)
                opt.pretty_ff_results(ff, level=20)
                # Copy parameter derivatives from original FF to save time in
                # case we move onto simplex immediately after this.
                copy_derivs(self.ff, ff)
            else:
                ff = self.ff
        else:
            ff = self.ff
        ff.export_ff(ff.path)
        return ff
Example #8
0
File: loop.py Project: Q2MM/q2mm
 def run_loop_input(self, lines, score=None):
     lines_iterator = iter(lines)
     while True:
         try:
             line = next(lines_iterator)
         except StopIteration:
             return self.ff
         cols = line.split()
         if cols[0] == 'DIR':
             self.direc = cols[1]
         if cols[0] == 'FFLD':
             # Import FF data.
             if cols[1] == 'read':
                 if cols[2] == 'mm3.fld':
                     self.ff = datatypes.MM3(os.path.join(self.direc, 
                                                          cols[2]))
                 if '.prm' in cols[2]:
                     self.ff = datatypes.TinkerFF(os.path.join(self.direc,
                                                               cols[2]))
                 self.ff.import_ff()
                 self.ff.method = 'READ'
                 with open(os.path.join(self.direc, cols[2]), 'r') as f:
                     self.ff.lines = f.readlines()
             # Export FF data.
             if cols[1] == 'write':
                 self.ff.export_ff(os.path.join(self.direc, cols[2]))
         # Trim parameters.
         if cols[0] == 'PARM':
             logger.log(20, '~~ SELECTING PARAMETERS ~~'.rjust(79, '~'))
             self.ff.params = parameters.trim_params_by_file(
                 self.ff.params, os.path.join(self.direc, cols[1]))
         if cols[0] == 'LOOP':
             # Read lines that will be looped over.
             inner_loop_lines = []
             line = next(lines_iterator)
             while line.split()[0] != 'END':
                 inner_loop_lines.append(line)
                 line = next(lines_iterator)
             # Make loop object and populate attributes.
             loop = Loop()
             loop.convergence = float(cols[1])
             loop.direc = self.direc
             loop.ff = self.ff
             loop.args_ff = self.args_ff
             loop.args_ref = self.args_ref
             loop.ref_data = self.ref_data
             loop.loop_lines = inner_loop_lines
             # Log commands.
             pretty_loop_input(
                 inner_loop_lines, name='OPTIMIZATION LOOP',
                 score=self.ff.score)
             # Run inner loop.
             self.ff = loop.opt_loop()
         # Note: Probably want to update this to append the directory given
         #       by the new DIR command.
         if cols[0] == 'RDAT':
             logger.log(
                 20, '~~ CALCULATING REFERENCE DATA ~~'.rjust(79, '~'))
             if len(cols) > 1:
                 self.args_ref = ' '.join(cols[1:]).split()
             self.ref_data = opt.return_ref_data(self.args_ref)
         if cols[0] == 'CDAT':
             logger.log(
                 20, '~~ CALCULATING FF DATA ~~'.rjust(79, '~'))
             if len(cols) > 1:
                 self.args_ff = ' '.join(cols[1:]).split()
             self.ff.data = calculate.main(self.args_ff)
         if cols[0] == 'COMP':
         # Deprecated
         #    self.ff.score = compare.compare_data(
         #        self.ref_data, self.ff.data)
         #    if '-o' in cols:
         #        compare.pretty_data_comp(
         #            self.ref_data,
         #            self.ff.data,
         #            os.path.join(self.direc, cols[cols.index('-o') + 1]))
         #    if '-p' in cols:
         #        compare.pretty_data_comp(
         #            self.ref_data,
         #            self.ff.data,
         #            doprint=True)
             output = False
             doprint = False
             r_dict = compare.data_by_type(self.ref_data)
             c_dict = compare.data_by_type(self.ff.data)
             r_dict, c_dict = compare.trim_data(r_dict,c_dict)
             if '-o' in cols:
                 output = os.path.join(self.direc, cols[cols.index('-o') +1])
             if '-p' in cols:
                 doprint = True
             self.ff.score = compare.compare_data(
                 r_dict, c_dict, output=output, doprint=doprint)
         if cols[0] == 'GRAD':
             grad = gradient.Gradient(
                 direc=self.direc,
                 ff=self.ff,
                 ff_lines=self.ff.lines,
                 args_ff=self.args_ff)
             #### Should probably just write a function instead of looping
             #### this for every gradient method. This includes everything
             #### between the two lines of #. TR 20180112
             ##############################################################        
             for col in cols[1:]:
                 if "lstsq" in col:
                     g_args = col.split('=')[1].split(',')
                     for arg in g_args:
                         if arg == "True":
                             grad.do_lstsq=True
                         elif arg == False:
                             grad.do_lstsq=False
                         if 'radii' in arg:
                             grad.lstsq_radii = []
                             radii_vals = re.search(
                                 r"\[(.+)\]",arg).group(1).split('/')
                             if radii_vals == "None":
                                 grad.lstsq_radii = None
                             else:
                                 for val in radii_vals:
                                     grad.lstsq_radii.append(float(val)) 
                         if 'cutoff' in arg:
                             grad.lstsq_cutoff = []
                             cutoff_vals = re.search(
                                 r"\[(.+)\]",arg).group(1).split('/')
                             if cutoff_vals == "None":
                                 grad.lstsq_cutoff = None
                             else:
                                 if len(cutoff_vals) > 2 or \
                                     len(cutoff_vals) < 2:
                                     raise Exception("Cutoff values must " \
                                         "be between two numbers.")
                                 for val in cutoff_vals:
                                     grad.lstsq_cutoff.append(float(val))
                 elif "newton" in col:
                     g_args = col.split('=')[1].split(',')
                     for arg in g_args:
                         if arg == "True":
                             grad.do_newton=True
                         elif arg == False:
                             grad.do_newton=False
                         if 'radii' in arg:
                             grad.newton_radii = []
                             radii_vals = re.search(
                                 r"\[(.+)\]",arg).group(1).split('/')
                             if radii_vals=='None':
                                 grad.newton_radii = None
                             else:
                                 for val in radii_vals:
                                     grad.newton_radii.append(float(val)) 
                         if 'cutoff' in arg:
                             grad.newton_cutoff = []
                             cutoff_vals = re.search(
                                 r"\[(.+)\]",arg).group(1).split('/')
                             if cutoff_vals=='None':
                                 grad.newton_cutoff = None
                             else:
                                 if len(cutoff_vals) > 2 or \
                                     len(cutoff_vals) < 2:
                                     raise Exception("Cutoff values must " \
                                         "be between two numbers.")
                                 for val in cutoff_vals:
                                     grad.newton_cutoff.append(float(val))
                 elif "levenberg" in col:
                     g_args = col.split('=')[1].split(',')
                     for arg in g_args:
                         if arg == "True":
                             grad.do_levenberg=True
                         elif arg == False:
                             grad.do_levenberg=False
                         if 'radii' in arg:
                             grad.levenberg_radii = []
                             radii_vals = re.search(
                                 r"\[(.+)\]",arg).group(1).split('/')
                             if radii_vals=='None':
                                 grad.levenberg_radii = None
                             else:
                                 for val in radii_vals:
                                     grad.levenberg_radii.append(float(val)) 
                         if 'cutoff' in arg:
                             grad.levenberg_cutoff = []
                             cutoff_vals = re.search(
                                 r"\[(.+)\]",arg).group(1).split('/')
                             if cutoff_vals=='None':
                                 grad.levenberg_cutoff = None
                             else:
                                 if len(cutoff_vals) > 2 or \
                                     len(cutoff_vals) < 2:
                                     raise Exception("Cutoff values must " \
                                         "be between two numbers.")
                                 for val in cutoff_vals:
                                     grad.levenberg_cutoff.append(float(val))
                         if 'factor' in arg:
                             grad.levenberg_cutoff = []
                             factor_vals = re.search(
                                 r"\[(.+)\]",arg).group(1).split('/')
                             if factor_vals=='None':
                                 grad.levenberg_factor = None
                             else:
                                 for val in factor_vals:
                                     grad.levenberg_factor.append(float(val))
                 elif "lagrange" in col:
                     g_args = col.split('=')[1].split(',')
                     for arg in g_args:
                         if arg == "True":
                             grad.do_lagrange=True
                         elif arg == False:
                             grad.do_lagrange=False
                         if 'radii' in arg:
                             grad.lagrange_radii = []
                             radii_vals = re.search(
                                 r"\[(.+)\]",arg).group(1).split('/')
                             if radii_vals=='None':
                                 grad.lagrange_radii = None
                             else:
                                 for val in radii_vals:
                                     grad.lagrange_radii.append(float(val)) 
                         if 'cutoff' in arg:
                             grad.lagrange_cutoff = []
                             cutoff_vals = re.search(
                                 r"\[(.+)\]",arg).group(1).split('/')
                             if cutoff_vals=='None':
                                 grad.lagrange_cutoff = None
                             else:
                                 if len(cutoff_vals) > 2 or \
                                     len(cutoff_vals) < 2:
                                     raise Exception("Cutoff values must " \
                                         "be between two numbers.")
                                 for val in cutoff_vals:
                                     grad.lagrange_cutoff.append(float(val))
                         if 'factor' in arg:
                             grad.lagrange_factors = []
                             factor_vals = re.search(
                                 r"\[(.+)\]",arg).group(1).split('/')
                             if factor_vals=='None':
                                 grad.lagrange_factors = None
                             else:
                                 for val in factor_vals:
                                     grad.lagrange_factors.append(float(val))
                 elif "svd" in col:
                     g_args = col.split('=')[1].split(',')
                     for arg in g_args:
                         if arg == "True":
                             grad.do_svd=True
                         elif arg == False:
                             grad.do_svd=False
                         if 'radii' in arg:
                             grad.svd_radii = []
                             radii_vals = re.search(
                                 r"\[(.+)\]",arg).group(1).split('/')
                             if radii_vals=='None':
                                 grad.svd_radii = None
                             else:
                                 for val in radii_vals:
                                     grad.svd_radii.append(float(val)) 
                         if 'cutoff' in arg:
                             grad.svd_cutoff = []
                             cutoff_vals = re.search(
                                 r"\[(.+)\]",arg).group(1).split('/')
                             if cutoff_vals=='None':
                                 grad.svd_cutoff = None
                             else:
                                 if len(cutoff_vals) > 2 or \
                                     len(cutoff_vals) < 2:
                                     raise Exception("Cutoff values must " \
                                         "be between two numbers.")
                                 for val in cutoff_vals:
                                     grad.svd_cutoff.append(float(val))
                         if 'factor' in arg:
                             grad.svd_cutoff = []
                             factor_vals = re.search(
                                 r"\[(.+)\]",arg).group(1).split('/')
                             if factor_vals=='None':
                                 grad.svd_factor = None
                             else:
                                 for val in factor_vals:
                                     grad.svd_factor.append(float(val))
                 else:
                     raise Exception("'{}' : Not Recognized".format(col))
             ##############################################################
             self.ff = grad.run(ref_data=self.ref_data)
         if cols[0] == 'SIMP':
             simp = simplex.Simplex(
                 direc=self.direc,
                 ff=self.ff,
                 ff_lines=self.ff.lines,
                 args_ff=self.args_ff)
             for col in cols[1:]:
                 if "max_params" in col:
                     simp.max_params = col.split('=')[1]
                 else:
                     raise Exception("'{}' : Not Recognized".format(col))
             self.ff = simp.run(r_data=self.ref_data)
         if cols[0] == 'WGHT':
             data_type = cols[1]
             co.WEIGHTS[data_type] = float(cols[2])
         if cols[0] == 'STEP':
             param_type = cols[1]
             co.STEPS[param_type] = float(cols[2])
Example #9
0
    def run(self, r_data=None):
        """
        Once all attributes are setup as you so desire, run this method to
        optimize the parameters.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        if r_data is None:
            r_data = opt.return_ref_data(self.args_ref)

        if self.ff.score is None:
            logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~'))
            self.ff.export_ff()
            # Could store data on self.ff.data if we wanted. Not necessary for
            # simplex. If simplex yielded no improvements, it would return this
            # FF, and then we might want the data such taht we don't have to
            # recalculate it in gradient. Let's hope simplex generally yields
            # improvements.
            data = calculate.main(self.args_ff)
            #deprecated
            #self.ff.score = compare.compare_data(r_data, data)
            r_dict = compare.data_by_type(r_data)
            c_dict = compare.data_by_type(data)
            r_dict, c_dict = compare.trim_data(r_dict, c_dict)
            self.ff.score = compare.compare_data(r_dict, c_dict)
        else:
            logger.log(20,
                       '  -- Reused existing score and data for initial FF.')

        logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~'))
        logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score))
        opt.pretty_ff_results(self.ff, level=20)

        # Here's what we do if there are too many parameters.
        if self.max_params and len(self.ff.params) > self.max_params:
            logger.log(20, '  -- More parameters than the maximum allowed.')
            logger.log(5, 'CURRENT PARAMS: {}'.format(len(self.ff.params)))
            logger.log(5, 'MAX PARAMS: {}'.format(self.max_params))
            # Here we select the parameters that have the lowest 2nd
            # derivatives.

            # Could fail when simplex finds improvements but restores other
            # parameters.
            # if self.ff.params[0].d1:

            if None in [x.d1 for x in self.ff.params]:
                logger.log(15, '  -- Calculating new parameter derivatives.')
                # Do central differentiation so we can calculate derivatives.
                # Another option would be to write code to determine
                # derivatives only from forward differentiation.
                ffs = opt.differentiate_ff(self.ff, central=True)
                # We have to score to get the derivatives.
                for ff in ffs:
                    ff.export_ff(path=self.ff.path, lines=self.ff_lines)
                    logger.log(20, '  -- Calculating {}.'.format(ff))
                    data = calculate.main(self.args_ff)
                    #deprecated
                    #ff.score = compare.compare_data(r_data, data)
                    r_dict = compare.data_by_type(r_data)
                    c_dict = compare.data_by_type(data)
                    r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                    ff.score = compare.compare_data(r_dict, c_dict)
                    opt.pretty_ff_results(ff)
                # Add the derivatives to your original FF.
                opt.param_derivs(self.ff, ffs)
                # Only keep the forward differentiated FFs.
                ffs = opt.extract_forward(ffs)
                logger.log(
                    5, '  -- Keeping {} forward differentiated '
                    'FFs.'.format(len(ffs)))
            else:
                logger.log(15, '  -- Reusing existing parameter derivatives.')
                # Differentiate all parameters forward. Yes, I know this is
                # counter-intuitive because we are going to only use subset of
                # the forward differentiated FFs. However, this is very
                # computationally inexpensive because we're not scoring them
                # now. We will remove the forward differentiated FFs we don't
                # want before scoring.
                ffs = opt.differentiate_ff(self.ff, central=False)

            # This sorts the parameters based upon their 2nd derivative.
            # It keeps the ones with lowest 2nd derivatives.

            # SCHEDULED FOR CHANGES. NOT A GOOD SORTING CRITERION.
            params = select_simp_params_on_derivs(self.ff.params,
                                                  max_params=self.max_params)
            # From the entire list of forward differentiated FFs, pick
            # out the ones that have the lowest 2nd derivatives.
            self.new_ffs = opt.extract_ff_by_params(ffs, params)
            logger.log(1,
                       '>>> len(self.new_ffs): {}'.format(len(self.new_ffs)))

            # Reduce number of parameters.
            # Will need an option that's not MM3* specific in the future.
            ff_rows = [x.mm3_row for x in params]
            ff_cols = [x.mm3_col for x in params]
            for ff in self.new_ffs:
                new_params = []
                for param in ff.params:
                    if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                        new_params.append(param)
                ff.params = new_params
            # Make a copy of your original FF that has less parameters.
            ff_copy = copy.deepcopy(self.ff)
            new_params = []
            for param in ff.params:
                if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                    new_params.append(param)
            ff_copy.params = new_params
        else:
            # In this case it's simple. Just forward differentiate each
            # parameter.
            self.new_ffs = opt.differentiate_ff(self.ff, central=False)
            logger.log(1,
                       '>>> len(self.new_ffs): {}'.format(len(self.new_ffs)))
            # Still make that FF copy.
            ff_copy = copy.deepcopy(self.ff)
        # Double check and make sure they're all scored.
        for ff in self.new_ffs:
            if ff.score is None:
                ff.export_ff(path=self.ff.path, lines=self.ff_lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                #deprecated
                #ff.score = compare.compare_data(r_data, data)
                r_dict = compare.data_by_type(r_data)
                c_dict = compare.data_by_type(data)
                r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                ff.score = compare.compare_data(r_dict, c_dict)
                opt.pretty_ff_results(ff)

        # Add your copy of the orignal to FF to the forward differentiated FFs.
        self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score)
        # Allow 3 cycles w/o change for each parameter present. Remember that
        # the initial FF was added here, hence the minus one.
        self._max_cycles_wo_change = 3 * (len(self.new_ffs) - 1)
        wrapper = textwrap.TextWrapper(width=79)
        # Shows all FFs parameters.
        opt.pretty_ff_params(self.new_ffs)

        # Start the simplex cycles.
        current_cycle = 0
        cycles_wo_change = 0
        while current_cycle < self.max_cycles \
                and cycles_wo_change < self._max_cycles_wo_change:
            current_cycle += 1

            # Save the last best in case some accidental sort goes on.
            # Plus it makes reading the code a litle easier.
            last_best_ff = copy.deepcopy(self.new_ffs[0])
            logger.log(
                20, '~~ START SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust(
                    79, '~'))
            logger.log(20, 'ORDERED FF SCORES:')
            logger.log(
                20,
                wrapper.fill('{}'.format(' '.join('{:15.4f}'.format(x.score)
                                                  for x in self.new_ffs))))

            inv_ff = self.ff.__class__()
            if self.do_weighted_reflection:
                inv_ff.method = 'WEIGHTED INVERSION'
            else:
                inv_ff.method = 'INVERSION'
            inv_ff.params = copy.deepcopy(last_best_ff.params)
            ref_ff = self.ff.__class__()
            ref_ff.method = 'REFLECTION'
            ref_ff.params = copy.deepcopy(last_best_ff.params)
            # Need score difference sum for weighted inversion.
            # Calculate this value before going into loop.
            if self.do_weighted_reflection:
                # If zero, should break.
                score_diff_sum = sum([
                    x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1]
                ])
                if score_diff_sum == 0.:
                    logger.warning('No difference between force field scores. '
                                   'Exiting simplex.')
                    # We want to raise opt.OptError such that
                    # opt.catch_run_errors will write the best FF obtained thus
                    # far.
                    raise opt.OptError(
                        'No difference between force field scores. '
                        'Exiting simplex.')
            for i in range(0, len(last_best_ff.params)):
                if self.do_weighted_reflection:
                    inv_val = (sum([
                        x.params[i].value * (x.score - self.new_ffs[-1].score)
                        for x in self.new_ffs[:-1]
                    ]) / score_diff_sum)
                else:
                    inv_val = (
                        sum([x.params[i].value for x in self.new_ffs[:-1]]) /
                        len(self.new_ffs[:-1]))
                inv_ff.params[i].value = inv_val
                ref_ff.params[i].value = (2 * inv_val -
                                          self.new_ffs[-1].params[i].value)
            # The inversion point does not need to be scored.
            # Calculate score for reflected parameters.
            ref_ff.export_ff(path=self.ff.path, lines=self.ff.lines)
            data = calculate.main(self.args_ff)
            #deprecated
            #ref_ff.score = compare.compare_data(r_data, data)
            r_dict = compare.data_by_type(r_data)
            c_dict = compare.data_by_type(data)
            r_dict, c_dict = compare.trim_data(r_dict, c_dict)
            ref_ff.score = compare.compare_data(r_dict, c_dict)
            opt.pretty_ff_results(ref_ff)
            if ref_ff.score < last_best_ff.score:
                logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~'))
                exp_ff = self.ff.__class__()
                exp_ff.method = 'EXPANSION'
                exp_ff.params = copy.deepcopy(last_best_ff.params)
                for i in range(0, len(last_best_ff.params)):
                    exp_ff.params[i].value = (
                        3 * inv_ff.params[i].value -
                        2 * self.new_ffs[-1].params[i].value)
                exp_ff.export_ff(path=self.ff.path, lines=self.ff.lines)
                data = calculate.main(self.args_ff)
                #deprecated
                #exp_ff.score = compare.compare_data(r_data, data)
                r_dict = compare.data_by_type(r_data)
                c_dict = compare.data_by_type(data)
                r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                exp_ff.score = compare.compare_data(r_dict, c_dict)
                opt.pretty_ff_results(exp_ff)
                if exp_ff.score < ref_ff.score:
                    self.new_ffs[-1] = exp_ff
                    logger.log(
                        20, '  -- Expansion succeeded. Keeping expanded '
                        'parameters.')
                else:
                    self.new_ffs[-1] = ref_ff
                    logger.log(
                        20,
                        '  -- Expansion failed. Keeping reflected parameters.')
            elif ref_ff.score < self.new_ffs[-2].score:
                logger.log(20, '  -- Keeping reflected parameters.')
                self.new_ffs[-1] = ref_ff
            else:
                logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~'))
                con_ff = self.ff.__class__()
                con_ff.method = 'CONTRACTION'
                con_ff.params = copy.deepcopy(last_best_ff.params)
                for i in range(0, len(last_best_ff.params)):
                    if ref_ff.score > self.new_ffs[-1].score:
                        con_val = ((inv_ff.params[i].value +
                                    self.new_ffs[-1].params[i].value) / 2)
                    else:
                        con_val = ((3 * inv_ff.params[i].value -
                                    self.new_ffs[-1].params[i].value) / 2)
                    con_ff.params[i].value = con_val
                self.ff.export_ff(params=con_ff.params)
                data = calculate.main(self.args_ff)
                #deprecated
                #con_ff.score = compare.compare_data(r_data, data)
                r_dict = compare.data_by_type(r_data)
                c_dict = compare.data_by_type(data)
                r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                con_ff.score = compare.compare_data(r_dict, c_dict)
                opt.pretty_ff_results(con_ff)
                # This change was made to reflect the 1998 Q2MM publication.
                # if con_ff.score < self.new_ffs[-1].score:
                if con_ff.score < self.new_ffs[-2].score:
                    logger.log(20, '  -- Contraction succeeded.')
                    self.new_ffs[-1] = con_ff
                elif self.do_massive_contraction:
                    logger.log(
                        20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~'))
                    for ff_num, ff in enumerate(self.new_ffs[1:]):
                        for i in range(0, len(last_best_ff.params)):
                            ff.params[i].value = (
                                (ff.params[i].value +
                                 self.new_ffs[0].params[i].value) / 2)
                        self.ff.export_ff(params=ff.params)
                        data = calculate.main(self.args_ff)
                        #deprecated
                        #ff.score = compare.compare_data(r_data, data)
                        r_dict = compare.data_by_type(r_data)
                        c_dict = compare.data_by_type(data)
                        r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                        ff.score = compare.compare_data(r_dict, c_dict)
                        ff.method += ' MC'
                        opt.pretty_ff_results(ff)
                else:
                    logger.log(
                        20, '  -- Contraction failed. Keeping parmaeters '
                        'anyway.')
                    self.new_ffs[-1] = con_ff
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            # Keep track of the number of cycles without change. If there's
            # improvement, reset the counter.
            if self.new_ffs[0].score < last_best_ff.score:
                cycles_wo_change = 0
            else:
                cycles_wo_change += 1
                logger.log(
                    20, '  -- {} cycles without improvement out of {} '
                    'allowed.'.format(cycles_wo_change,
                                      self._max_cycles_wo_change))
            logger.log(20, 'BEST:')
            opt.pretty_ff_results(self.new_ffs[0], level=20)
            logger.log(
                20, '~~ END SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust(
                    79, '~'))

        # This sort is likely unnecessary because it should be done at the end
        # of the last loop cycle, but I put it here just in case.
        self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
        best_ff = self.new_ffs[0]
        if best_ff.score < self.ff.score:
            logger.log(
                20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~'))
            best_ff = restore_simp_ff(best_ff, self.ff)
        else:
            logger.log(
                20,
                '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(79, '~'))
            # This restores the inital parameters, so no need to use
            # restore_simp_ff here.
            best_ff = self.ff
        opt.pretty_ff_results(self.ff, level=20)
        opt.pretty_ff_results(best_ff, level=20)
        logger.log(20, '  -- Writing best force field from simplex.')
        best_ff.export_ff(best_ff.path)
        return best_ff
Example #10
0
    def run(self, ref_data=None, restart=None):
        """
        Runs the gradient optimization.

        Ensure that the attributes in __init__ are set as you desire before
        using this function.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        # We need reference data if you didn't provide it.
        if ref_data is None:
            ref_data = opt.return_ref_data(self.args_ref)

        # We need the initial FF data.
        if self.ff.data is None:
            logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~'))
            # Is opt.Optimizer.ff_lines used anymore?
            self.ff.export_ff()
            self.ff.data = calculate.main(self.args_ff)
            # Not 100% sure if this is necessary, but it certainly doesn't hurt.
            compare.correlate_energies(ref_data, self.ff.data)
        r_dict = compare.data_by_type(ref_data)
        c_dict = compare.data_by_type(self.ff.data)
        r_dict, c_dict = compare.trim_data(r_dict,c_dict)
        if self.ff.score is None:
            # Already zeroed reference and correlated the energies.
            self.ff.score = compare.compare_data(r_dict, c_dict)
        data_types = []
        for typ in r_dict:
            data_types.append(typ)
        data_types.sort()
        logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~'))
        logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score))
        opt.pretty_ff_results(self.ff, level=20)

        logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~'))
        if restart:
            par_file = restart
            logger.log(20, '  -- Restarting gradient from central '
                       'differentiation file {}.'.format(par_file))
        else:
            # We need a file to hold the differentiated parameter data.
            par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt'))
            if par_files:
                par_files.sort()
                most_recent_par_file = par_files[-1]
                most_recent_par_file = most_recent_par_file.split('/')[-1]
                most_recent_num = most_recent_par_file[9:12]
                num = int(most_recent_num) + 1
                par_file = 'par_diff_{:03d}.txt'.format(num)
            else:
                par_file = 'par_diff_001.txt'
            logger.log(20, '  -- Generating central differentiation '
                       'file {}.'.format(par_file))
            f = open(os.path.join(self.direc, par_file), 'w')
            csv_writer = csv.writer(f)
            # Row 1 - Labels
            # Row 2 - Weights
            # Row 3 - Reference data values
            # Row 4 - Initial FF data values
            ## Deprecated -TR
            #csv_writer.writerow([x.lbl for x in ref_data])
            #csv_writer.writerow([x.wht for x in ref_data])
            #csv_writer.writerow([x.val for x in ref_data])
            #csv_writer.writerow([x.val for x in self.ff.data])
            writerows = [[],[],[],[]]
            for data_type in data_types:
                writerows[0].extend([x.lbl for x in r_dict[data_type]])
                writerows[1].extend([x.wht for x in r_dict[data_type]])
                writerows[2].extend([x.val for x in r_dict[data_type]])
                writerows[3].extend([x.val for x in c_dict[data_type]])
            for row in writerows:
                csv_writer.writerow(row)
            logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~'))
            # Save many FFs, each with their own parameter sets.
            ffs = opt.differentiate_ff(self.ff)
            logger.log(20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(
                79, '~'))
            for ff in ffs:
                ff.export_ff(lines=self.ff.lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                # Deprecated
                #ff.score = compare.compare_data(ref_data, data)
                c_data = compare.data_by_type(data)
                r_dict, c_data = compare.trim_data(r_dict,c_data)
                ff.score = compare.compare_data(r_dict, c_data)
                opt.pretty_ff_results(ff)
                # Write the data rather than storing it in memory. For large
                # parameter sets, this could consume GBs of memory otherwise!
                #csv_writer.writerow([x.val for x in data])
                row = []
                for data_type in data_types:
                    row.extend([x.val for x in c_data[data_type]])
                csv_writer.writerow(row)
            f.close()

            # Make sure we have derivative information. Used for NR.
            #
            # The derivatives are useful for checking up on the progress of the
            # optimization and for deciding which parameters to use in a
            # subsequent simplex optimization.
            #
            # Still need a way to do this with the resatrt file.
            opt.param_derivs(self.ff, ffs)

        # Calculate the Jacobian, residual vector, matrix A and vector b.
        # These aren't needed if you're only doing Newton-Raphson.
        if self.do_lstsq or self.do_lagrange or self.do_levenberg or \
                self.do_svd:
            logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~'))
            # Setup the residual vector.
            # Deprecated - TR
            #num_d = len(ref_data)
            num_d = 0
            for datatype in r_dict:
                num_d += len(r_dict[datatype])
            resid = np.empty((num_d, 1), dtype=float)
            # Deprecated - TR
            #for i in xrange(0, num_d):
            #    resid[i, 0] = ref_data[i].wht * \
            #                  (ref_data[i].val - self.ff.data[i].val)
            count = 0
            for data_type in data_types:
                for r,c in zip(r_dict[data_type],c_dict[data_type]):
                    resid[count, 0] = r.wht * (r.val - c.val)
                    count += 1
            # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid))
            logger.log(
                20, '  -- Formed {} residual vector.'.format(resid.shape))
            # Setup the Jacobian.
            num_p = len(self.ff.params)
            # Maybe should be a part of the Jacobian function.
            jacob = np.empty((num_d, num_p), dtype=float)
            jacob = return_jacobian(jacob, os.path.join(self.direc, par_file))
            # logger.log(5, 'JACOBIAN:\n{}'.format(jacob))
            logger.log(20, '  -- Formed {} Jacobian.'.format(jacob.shape))
            ma = jacob.T.dot(jacob)
            vb = jacob.T.dot(resid)
            # We need these for most optimization methods.
            logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-'))
            # logger.log(5, 'A:\n{}'.format(ma))
            # logger.log(5, 'b:\n{}'.format(vb))
        # Start coming up with new parameter sets.
        if self.do_newton and not restart:
            logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~'))
            # Moved the derivative section outside of here.
            changes = do_newton(self.ff.params,
                                radii=self.newton_radii,
                                cutoffs=self.newton_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        if self.do_lstsq:
            logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~'))
            changes = do_lstsq(ma, vb,
                               radii=self.lstsq_radii,
                               cutoffs=self.lstsq_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        if self.do_lagrange:
            logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~'))
            for factor in sorted(self.lagrange_factors):
                changes = do_lagrange(ma, vb, factor,
                                      radii=self.lagrange_radii,
                                      cutoffs=self.lagrange_cutoffs)
                cleanup(self.new_ffs, self.ff, changes)
        if self.do_levenberg:
            logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~'))
            for factor in sorted(self.levenberg_factors):
                changes = do_levenberg(ma, vb, factor,
                                       radii=self.levenberg_radii,
                                       cutoffs=self.levenberg_cutoffs)
                cleanup(self.new_ffs, self.ff, changes)
        if self.do_svd:
            logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~'))
            # J = U . s . VT
            mu, vs, mvt = return_svd(jacob)
            logger.log(1, '>>> mu.shape: {}'.format(mu.shape))
            logger.log(1, '>>> vs.shape: {}'.format(vs.shape))
            logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape))
            logger.log(1, '>>> vb.shape: {}'.format(vb.shape))
            if self.svd_factors:
                changes = do_svd_w_thresholds(mu, vs, mvt, resid, self.svd_factors,
                                              radii=self.svd_radii,
                                              cutoffs=self.svd_cutoffs)
            else:
                changes = do_svd_wo_thresholds(mu, vs, mvt, resid,
                                               radii=self.svd_radii,
                                               cutoffs=self.svd_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        # Report how many trial FFs were generated.
        logger.log(20, '  -- Generated {} trial force field(s).'.format(
                len(self.new_ffs)))
        # If there are any trials, test them.
        if self.new_ffs:
            logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~'))
            for ff in self.new_ffs:
                data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff)
                # Shouldn't need to zero anymore.
                # Deprecated
                #ff.score = compare.compare_data(ref_data, data)
                c_data = compare.data_by_type(data)
                r_dict, c_data = compare.trim_data(r_dict,c_data)
                ff.score = compare.compare_data(r_dict, c_data)
                opt.pretty_ff_results(ff)
            self.new_ffs = sorted(
                self.new_ffs, key=lambda x: x.score)
            # Check for improvement.
            if self.new_ffs[0].score < self.ff.score:
                ff = self.new_ffs[0]
                logger.log(
                    20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(
                        79, '~'))
                opt.pretty_ff_results(self.ff, level=20)
                opt.pretty_ff_results(ff, level=20)
                # Copy parameter derivatives from original FF to save time in
                # case we move onto simplex immediately after this.
                copy_derivs(self.ff, ff)
            else:
                ff = self.ff
        else:
            ff = self.ff
        return ff
Example #11
0
 def run(self, ref_data=None):
     # We need reference data if you didn't provide it.
     if ref_data is None:
         ref_data = opt.return_ref_data(self.args_ref)
     # We need the initial FF data.
     if self.ff.data is None:
         logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~'))
         # Check whether this is efficient with the ff_lines.
         self.ff.export_ff()
         self.ff.data = calculate.main(self.args_ff)
         # We could do this, but the zeroing of energies has already been
         # done.
         # self.ff.score = compare.compare_data(ref_data, self.ff.data)
         # So instead we do this.
         compare.correlate_energies(ref_data, self.ff.data)
     if self.ff.score is None:
         # Already zeroed reference and correlated the energies.
         self.ff.score = compare.calculate_score(ref_data, self.ff.data)
         logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.fscore))
     logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~'))
     # We need a file to hold the differentiated parameter data.
     par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt'))
     if par_files:
         par_files.sort()
         most_recent_par_file = par_files[-1]
         most_recent_par_file = most_recent_par_file.split('/')[-1]
         most_recent_num = most_recent_par_file[9:12]
         num = int(most_recent_num) + 1
         par_file = 'par_diff_{:03d}.txt'.format(num)
     else:
         par_file = 'par_diff_001.txt'
     f = open(os.path.join(self.direc, par_file), 'w')
     csv_writer = csv.writer(f)
     # Row 1 - Labels
     # Row 2 - Weights
     # Row 3 - Reference data values
     # Row 4 - Initial FF data values
     csv_writer.writerow([x.lbl for x in ref_data])
     csv_writer.writerow([x.wht for x in ref_data])
     csv_writer.writerow([x.val for x in ref_data])
     csv_writer.writerow([x.val for x in self.ff.data])
     logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~'))
     # Setup the residual vector.
     # Perhaps move this closer to the Jacobian section.
     num_d = len(ref_data)
     resid = np.empty((num_d, 1), dtype=float)
     for i in xrange(0, num_d):
         resid[i, 0] = ref_data[i].wht * (ref_data[i].val - self.ff.data[i].val)
     logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid))
     logger.log(20, '  -- Formed {} residual vector.'.format(resid.shape))
     # Save many FFs, each with their own parameter sets.
     ffs = opt.differentiate_ff(self.ff)
     logger.log(20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~'))
     for ff in ffs:
         ff.export_ff(lines=self.ff.lines)
         logger.log(20, '  -- Calculating {}.'.format(ff))
         data = calculate.main(self.args_ff)
         compare.correlate_energies(ref_data, data)
         ff.score = compare.calculate_score(ref_data, data)
         opt.pretty_ff_results(ff)
         # Write the data rather than storing it in memory. For large parameter
         # sets, this could consume GBs of memory otherwise!
         csv_writer.writerow([x.val for x in data])
     f.close()
     # Calculate the Jacobian, residual vector, matrix A and vector b.
     # These aren't needed if you're only doing Newton-Raphson.
     if self.do_lstsq or self.do_lagrange or self.do_levenberg or \
             self.do_svd:
         logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~'))
         # Setup the Jacobian.
         num_p = len(self.ff.params)
         # Maybe should be a part of the Jacobian function.
         jacob = np.empty((num_d, num_p), dtype=float)
         jacob = return_jacobian(jacob, os.path.join(self.direc, par_file))
         ma = jacob.T.dot(jacob)
         vb = jacob.T.dot(resid)
         # We need these for most optimization methods.
         logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-'))
         logger.log(5, 'A:\n{}'.format(ma))
         logger.log(5, 'b:\n{}'.format(vb))
     # Start coming up with new parameter sets.
     if self.do_newton:
         logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~'))
         # Make sure we have derivative information.
         if self.ff.params[0].d1 is None:
             opt.param_derivs(self.ff, ffs)
         changes = do_newton(self.ff.params,
                             radii=self.newton_radii,
                             cutoffs=self.newton_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     if self.do_lstsq:
         logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~'))
         changes = do_lstsq(ma, vb,                               
                            radii=self.lstsq_radii,
                            cutoffs=self.lstsq_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     if self.do_lagrange:
         logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~'))
         for factor in sorted(self.lagrange_factors):
             changes = do_lagrange(ma, vb, factor,
                                   radii=self.lagrange_radii,
                                   cutoffs=self.lagrange_cutoffs)
             cleanup(self.new_ffs, self.ff, changes)
     if self.do_levenberg:
         logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~'))
         for factor in sorted(self.levenberg_factors):
             changes = do_levenberg(ma, vb, factor,
                                    radii=self.levenberg_radii,
                                    cutoffs=self.levenberg_cutoffs)
             cleanup(self.new_ffs, self.ff, changes)
     if self.do_svd:
         logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~'))
         mu, vs, mv = return_svd(ma)
         if self.svd_factors:
             changes = do_svd_w_thresholds(mu, vs, mv, vb, self.svd_factors,
                                           radii=self.svd_radii,
                                           cutoffs=self.svd_cutoffs)
         else:
             changes = do_svd_wo_thresholds(mu, vs, mv, vb,
                                            radii=self.svd_radii,
                                            cutoffs=self.svd_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     # Report how many trial FFs were generated.
     logger.log(20, '  -- Generated {} trial force field(s).'.format(
             len(self.new_ffs)))
     # If there are any trials, test them.
     if self.new_ffs:
         logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~'))
         for ff in self.new_ffs:
             data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff)
             ff.score = compare.compare_data(ref_data, data, zero=False)
             opt.pretty_ff_results(ff)
         self.new_ffs = sorted(
             self.new_ffs, key=lambda x: x.score)
         # Check for improvement.
         if self.new_ffs[0].score < self.ff.score:
             ff = self.new_ffs[0]
             logger.log(
                 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(
                     79, '~'))
             opt.pretty_ff_results(self.ff, level=20)
             opt.pretty_ff_results(ff, level=20)
         else:
             ff = self.ff
     else:
         ff = self.ff
     return ff
Example #12
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#author:pyphrb
import os
import datetime
import time
import sys
from compare import compare_data
now_cwd = os.getcwd()
today = datetime.date.today()#gain today time (datetime type)
oneday = datetime.timedelta(days=1)
yesterday = today - oneday
str_today = str(today) + '.txt'
str_yesterday = str(yesterday) + '.txt'
dir_array = []#statement a array
dir_list =  os.listdir(os.getcwd())#gain current directory
for dirFile in dir_list:# foreach array
    if os.path.isdir(dirFile):# assert dirFile is directory
        dir_array.append(dirFile)#if dirFile is directory ,array append
for dirPath in dir_array:# foreach dir_array
        #print dirPath
        # sys.exit(0)
    os.chdir(os.getcwd() + os.sep + dirPath)#change directory to dirPath
    newdata = os.getcwd() + os.sep + dirPath + '_' + str_today
    olddata = os.getcwd() + os.sep + dirPath + '_' + str_yesterday
    if os.path.exists(olddata):
        compare_data(newdata, olddata, dirPath)
    else:
        pass
    os.chdir(now_cwd)
Example #13
0
 def test_compare_bonds(self):
     score = compare.compare_data(self.r_conn, self.f_conn)
     print('COMPARE BONDS SCORE: {}'.format(score))
Example #14
0
    def run(self, r_data=None):
        """
        Once all attributes are setup as you so desire, run this method to
        optimize the parameters.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        if r_data is None:
            r_data = opt.return_ref_data(self.args_ref)
        logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~'))
        # Here we don't actually need the database connection/force field data.
        # We only need the score.
        if self.ff.score is None:
            logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~'))
            self.ff.export_ff()
            # I could store this object to prevent on self.ff to prevent garbage
            # collection. Would be nice if simplex was followed by gradient,
            # which needs that information, and if simplex yielded no
            # improvements. At most points in the optimization, this is probably
            # too infrequent for it to be worth the memory, but it might be nice
            # once the parameters are close to convergence.
            data = calculate.main(self.args_ff)
            self.ff.score = compare.compare_data(r_data, data)
            logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score))
        else:
            logger.log(15,
                       '  -- Reused existing score and data for initial FF.')
        logger.log(15, 'INIT FF SCORE: {}'.format(self.ff.score))
        if self.max_params and len(self.ff.params) > self.max_params:
            if self.ff.params[0].d1:
                logger.log(15, '  -- Reusing existing parameter derivatives.')
                # Don't score so this really doesn't take much time.
                ffs = opt.differentiate_ff(self.ff, central=False)
            else:
                logger.log(15, '  -- Calculating new parameter derivatives.')
                ffs = opt.differentiate_ff(self.ff, central=True)
                # We have to score to get the derivatives.
                for ff in ffs:
                    ff.export_ff(lines=self.ff_lines)
                    logger.log(20, '  -- Calculating {}.'.format(ff))
                    data = calculate.main(self.args_ff)
                    ff.score = compare.compare_data(r_data, data)
                    opt.pretty_ff_results(ff)
                opt.param_derivs(self.ff, ffs)
                # Only keep the forward differentiated FFs.
                ffs = opt.extract_forward(ffs)
            params = select_simp_params_on_derivs(self.ff.params,
                                                  max_params=self.max_params)
            self.new_ffs = opt.extract_ff_by_params(ffs, params)
            # Reduce number of parameters.
            # Will need an option that's not MM3* specific.
            ff_rows = [x.mm3_row for x in params]
            ff_cols = [x.mm3_col for x in params]
            for ff in self.new_ffs:
                new_params = []
                for param in ff.params:
                    if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                        new_params.append(param)
                ff.params = new_params
        else:
            self.new_ffs = opt.differentiate_ff(self.ff, central=False)
        # Double check and make sure they're all scored.
        for ff in self.new_ffs:
            if ff.score is None:
                ff.export_ff(lines=self.ff_lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(ff)
        ff_copy = copy.deepcopy(self.ff)
        new_params = []
        for param in ff.params:
            if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                new_params.append(param)
        ff_copy.params = new_params
        self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score)
        wrapper = textwrap.TextWrapper(width=79)
        logger.log(20, 'ORDERED FF SCORES:')
        logger.log(
            20,
            wrapper.fill('{}'.format(' '.join('{:15.4f}'.format(x.score)
                                              for x in self.new_ffs))))
        # Shows all FFs parameters.
        opt.pretty_ff_params(self.new_ffs)
        # Start the simplex cycles.
        current_cycle = 0
        cycles_wo_change = 0
        while current_cycle < self.max_cycles \
                and cycles_wo_change < self.max_cycles_wo_change:
            current_cycle += 1
            last_best = self.new_ffs[0].score
            best_ff = self.new_ffs[0]
            logger.log(
                20, '~~ START SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust(
                    79, '~'))
            inv_ff = self.ff.__class__()
            if self.do_weighted_reflection:
                inv_ff.method = 'WEIGHTED INVERSION'
            else:
                inv_ff.method = 'INVERSION'
            inv_ff.params = copy.deepcopy(best_ff.params)
            ref_ff = self.ff.__class__()
            ref_ff.method = 'REFLECTION'
            ref_ff.params = copy.deepcopy(best_ff.params)
            for i in xrange(0, len(best_ff.params)):
                if self.do_weighted_reflection:
                    try:
                        inv_val = (sum([
                            x.params[i].value *
                            (x.score - self.new_ffs[-1].score)
                            for x in self.new_ffs[:-1]
                        ]) / sum([
                            x.score - self.new_ffs[-1].score
                            for x in self.new_ffs[:-1]
                        ]))
                    except ZeroDivisionError:
                        logger.warning(
                            'Attempted to divide by zero while calculating the '
                            'weighted simplex inversion point. All penalty '
                            'function scores for the trial force fields are '
                            'numerically equivalent.')
                        # Breaking should just exit the while loop. Should still
                        # give you the best force field determined thus far.
                        break
                else:
                    inv_val = (
                        sum([x.params[i].value for x in self.new_ffs[:-1]]) /
                        len(self.new_ffs[:-1]))
                inv_ff.params[i].value = inv_val
                ref_ff.params[i].value = (2 * inv_val -
                                          self.new_ffs[-1].params[i].value)
            # Calculate score for inverted parameters.
            self.ff.export_ff(self.ff.path, params=inv_ff.params)
            data = calculate.main(self.args_ff)
            inv_ff.score = compare.compare_data(r_data, data)
            opt.pretty_ff_results(inv_ff)
            # Calculate score for reflected parameters.
            self.ff.export_ff(self.ff.path, params=ref_ff.params)
            data = calculate.main(self.args_ff)
            ref_ff.score = compare.compare_data(r_data, data)
            opt.pretty_ff_results(ref_ff)
            if ref_ff.score < self.new_ffs[0].score:
                logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~'))
                exp_ff = self.ff.__class__()
                exp_ff.method = 'EXPANSION'
                exp_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(self.new_ffs[0].params)):
                    exp_ff.params[i].value = (
                        3 * inv_ff.params[i].value -
                        2 * self.new_ffs[-1].params[i].value)
                self.ff.export_ff(self.ff.path, exp_ff.params)
                data = calculate.main(self.args_ff)
                exp_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(exp_ff)
                if exp_ff.score < ref_ff.score:
                    self.new_ffs[-1] = exp_ff
                    logger.log(
                        20, '  -- Expansion succeeded. Keeping expanded '
                        'parameters.')
                else:
                    self.new_ffs[-1] = ref_ff
                    logger.log(
                        20,
                        '  -- Expansion failed. Keeping reflected parameters.')
            elif ref_ff.score < self.new_ffs[-2].score:
                logger.log(20, '  -- Keeping reflected parameters.')
                self.new_ffs[-1] = ref_ff
            else:
                logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~'))
                con_ff = self.ff.__class__()
                con_ff.method = 'CONTRACTION'
                con_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(best_ff.params)):
                    if ref_ff.score > self.new_ffs[-1].score:
                        con_val = ((inv_ff.params[i].value +
                                    self.new_ffs[-1].params[i].value) / 2)
                    else:
                        con_val = ((3 * inv_ff.params[i].value -
                                    self.new_ffs[-1].params[i].value) / 2)
                    con_ff.params[i].value = con_val
                self.ff.export_ff(self.ff.path, params=con_ff.params)
                data = calculate.main(self.args_ff)
                con_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(con_ff)
                if con_ff.score < self.new_ffs[-2].score:
                    self.new_ffs[-1] = con_ff
                elif self.do_massive_contraction:
                    logger.log(
                        20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~'))
                    for ff_num, ff in enumerate(self.new_ffs[1:]):
                        for i in xrange(0, len(best_ff.params)):
                            ff.params[i].value = (
                                (ff.params[i].value +
                                 self.new_ffs[0].params[i].value) / 2)
                        self.ff.export_ff(self.ff.path, params=ff.params)
                        data = calculate.main(self.args_ff)
                        ff.score = compare.compare_data(r_data, data)
                        ff.method += ' MC'
                        opt.pretty_ff_results(ff)
                else:
                    logger.log(20, '  -- Contraction failed.')
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            if self.new_ffs[0].score < last_best:
                cycles_wo_change = 0
            else:
                cycles_wo_change += 1
                logger.log(
                    20,
                    '  -- {} cycles without change.'.format(cycles_wo_change))
            best_ff = self.new_ffs[0]
            logger.log(20, 'BEST:')
            opt.pretty_ff_results(self.new_ffs[0], level=20)
            logger.log(
                20, '~~ END SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust(
                    79, '~'))
        if best_ff.score < self.ff.score:
            logger.log(
                20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~'))
            best_ff = restore_simp_ff(best_ff, self.ff)
        else:
            logger.log(
                20,
                '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(79, '~'))
        opt.pretty_ff_results(self.ff, level=20)
        opt.pretty_ff_results(best_ff, level=20)
        logger.log(20, '  -- Writing best force field from simplex.')
        best_ff.export_ff(best_ff.path)
        return best_ff
Example #15
0
 def run_loop_input(self, lines, score=None):
     lines_iterator = iter(lines)
     while True:
         try:
             line = lines_iterator.next()
         except StopIteration:
             return self.ff
         cols = line.split()
         if cols[0] == 'DIR':
             self.direc = cols[1]
         if cols[0] == 'FFLD':
             # Import FF data.
             if cols[1] == 'read':
                 self.ff = datatypes.MM3(os.path.join(self.direc, cols[2]))
                 self.ff.import_ff()
                 self.ff.method = 'READ'
                 with open(os.path.join(self.direc, cols[2]), 'r') as f:
                     self.ff.lines = f.readlines()
             # Export FF data.
             if cols[1] == 'write':
                 self.ff.export_ff(os.path.join(self.direc, cols[2]))
         # Trim parameters.
         if cols[0] == 'PARM':
             logger.log(20, '~~ SELECTING PARAMETERS ~~'.rjust(79, '~'))
             self.ff.params = parameters.trim_params_by_file(
                 self.ff.params, os.path.join(self.direc, cols[1]))
         if cols[0] == 'LOOP':
             # Read lines that will be looped over.
             inner_loop_lines = []
             line = lines_iterator.next()
             while line.split()[0] != 'END':
                 inner_loop_lines.append(line)
                 line = lines_iterator.next()
             # Make loop object and populate attributes.
             loop = Loop()
             loop.convergence = float(cols[1])
             loop.direc = self.direc
             loop.ff = self.ff
             loop.args_ff = self.args_ff
             loop.args_ref = self.args_ref
             loop.ref_data = self.ref_data
             loop.loop_lines = inner_loop_lines
             # Log commands.
             pretty_loop_input(
                 inner_loop_lines, name='OPTIMIZATION LOOP',
                 score=self.ff.score)
             # Run inner loop.
             self.ff = loop.opt_loop()
         # Note: Probably want to update this to append the directory given
         #       by the new DIR command.
         if cols[0] == 'RDAT':
             logger.log(
                 20, '~~ CALCULATING REFERENCE DATA ~~'.rjust(79, '~'))
             if len(cols) > 1:
                 self.args_ref = ' '.join(cols[1:]).split()
             self.ref_data = opt.return_ref_data(self.args_ref)
         if cols[0] == 'CDAT':
             logger.log(
                 20, '~~ CALCULATING FF DATA ~~'.rjust(79, '~'))
             if len(cols) > 1:
                 self.args_ff = ' '.join(cols[1:]).split()
             self.ff.data = calculate.main(self.args_ff)
         if cols[0] == 'COMP':
             self.ff.score = compare.compare_data(
                 self.ref_data, self.ff.data)
             if '-o' in cols:
                 compare.pretty_data_comp(
                     self.ref_data,
                     self.ff.data,
                     os.path.join(self.direc, cols[cols.index('-o') + 1]))
             if '-p' in cols:
                 compare.pretty_data_comp(
                     self.ref_data,
                     self.ff.data)
         if cols[0] == 'GRAD':
             grad = gradient.Gradient(
                 direc=self.direc,
                 ff=self.ff,
                 ff_lines=self.ff.lines,
                 args_ff=self.args_ff)
             self.ff = grad.run(ref_data=self.ref_data)
         if cols[0] == 'SIMP':
             simp = simplex.Simplex(
                 direc=self.direc,
                 ff=self.ff,
                 ff_lines=self.ff.lines,
                 args_ff=self.args_ff)
             self.ff = simp.run(r_data=self.ref_data)
         if cols[0] == 'WGHT':
             data_type = cols[1]
             co.WEIGHTS[data_type] = float(cols[2])
Example #16
0
File: loop.py Project: v3op01/q2mm
    def run_loop_input(self, lines, score=None):
        lines_iterator = iter(lines)
        while True:
            try:
                line = next(lines_iterator)
            except StopIteration:
                return self.ff
            cols = line.split()
            if cols[0] == 'DIR':
                self.direc = cols[1]
            if cols[0] == 'FFLD':
                # Import FF data.
                if cols[1] == 'read':
                    if cols[2] == 'mm3.fld':
                        self.ff = datatypes.MM3(
                            os.path.join(self.direc, cols[2]))
                    if 'prm' in line:
                        self.ff = datatypes.TinkerFF(
                            os.path.join(self.direc, cols[2]))
                    if 'frcmod' in line:
                        self.ff = datatypes.AmberFF(
                            os.path.join(self.direc, cols[2]))
                    self.ff.import_ff()
                    self.ff.method = 'READ'
                    with open(os.path.join(self.direc, cols[2]), 'r') as f:
                        self.ff.lines = f.readlines()
                # Export FF data.
                if cols[1] == 'write':
                    self.ff.export_ff(os.path.join(self.direc, cols[2]))
            # Trim parameters.
            if cols[0] == 'PARM':
                logger.log(20, '~~ SELECTING PARAMETERS ~~'.rjust(79, '~'))
                self.ff.params = parameters.trim_params_by_file(
                    self.ff.params, os.path.join(self.direc, cols[1]))
            if cols[0] == 'LOOP':
                # Read lines that will be looped over.
                inner_loop_lines = []
                line = next(lines_iterator)
                while line.split()[0] != 'END':
                    inner_loop_lines.append(line)
                    line = next(lines_iterator)
                # Make loop object and populate attributes.
                loop = Loop()
                loop.convergence = float(cols[1])
                loop.direc = self.direc
                loop.ff = self.ff
                loop.args_ff = self.args_ff
                loop.args_ref = self.args_ref
                loop.ref_data = self.ref_data
                loop.loop_lines = inner_loop_lines
                # Log commands.
                pretty_loop_input(inner_loop_lines,
                                  name='OPTIMIZATION LOOP',
                                  score=self.ff.score)
                # Run inner loop.
                self.ff = loop.opt_loop()
            # Note: Probably want to update this to append the directory given
            #       by the new DIR command.
            if cols[0] == 'RDAT':
                logger.log(20,
                           '~~ CALCULATING REFERENCE DATA ~~'.rjust(79, '~'))
                if len(cols) > 1:
                    self.args_ref = ' '.join(cols[1:]).split()
                self.ref_data = opt.return_ref_data(self.args_ref)
            if cols[0] == 'CDAT':
                logger.log(20, '~~ CALCULATING FF DATA ~~'.rjust(79, '~'))
                if len(cols) > 1:
                    self.args_ff = ' '.join(cols[1:]).split()
                self.ff.data = calculate.main(self.args_ff)

            if cols[0] == 'COMP':
                # Deprecated
                #    self.ff.score = compare.compare_data(
                #        self.ref_data, self.ff.data)
                #    if '-o' in cols:
                #        compare.pretty_data_comp(
                #            self.ref_data,
                #            self.ff.data,
                #            os.path.join(self.direc, cols[cols.index('-o') + 1]))
                #    if '-p' in cols:
                #        compare.pretty_data_comp(
                #            self.ref_data,
                #            self.ff.data,
                #            doprint=True)
                output = False
                doprint = False
                r_dict = compare.data_by_type(self.ref_data)
                c_dict = compare.data_by_type(self.ff.data)
                r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                if '-o' in cols:
                    output = os.path.join(self.direc,
                                          cols[cols.index('-o') + 1])
                if '-p' in cols:
                    doprint = True
                self.ff.score = compare.compare_data(r_dict,
                                                     c_dict,
                                                     output=output,
                                                     doprint=doprint)
            if cols[0] == 'GRAD':
                grad = gradient.Gradient(direc=self.direc,
                                         ff=self.ff,
                                         ff_lines=self.ff.lines,
                                         args_ff=self.args_ff)
                #### Should probably just write a function instead of looping
                #### this for every gradient method. This includes everything
                #### between the two lines of #. TR 20180112
                ##############################################################
                for col in cols[1:]:
                    if "lstsq" in col:
                        g_args = col.split('=')[1].split(',')
                        for arg in g_args:
                            if arg == "True":
                                grad.do_lstsq = True
                            elif arg == False:
                                grad.do_lstsq = False
                            if 'radii' in arg:
                                grad.lstsq_radii = []
                                radii_vals = re.search(r"\[(.+)\]",
                                                       arg).group(1).split('/')
                                if radii_vals == "None":
                                    grad.lstsq_radii = None
                                else:
                                    for val in radii_vals:
                                        grad.lstsq_radii.append(float(val))
                            if 'cutoff' in arg:
                                grad.lstsq_cutoff = []
                                cutoff_vals = re.search(
                                    r"\[(.+)\]", arg).group(1).split('/')
                                if cutoff_vals == "None":
                                    grad.lstsq_cutoff = None
                                else:
                                    if len(cutoff_vals) > 2 or \
                                        len(cutoff_vals) < 2:
                                        raise Exception("Cutoff values must " \
                                            "be between two numbers.")
                                    for val in cutoff_vals:
                                        grad.lstsq_cutoff.append(float(val))
                    elif "newton" in col:
                        g_args = col.split('=')[1].split(',')
                        for arg in g_args:
                            if arg == "True":
                                grad.do_newton = True
                            elif arg == False:
                                grad.do_newton = False
                            if 'radii' in arg:
                                grad.newton_radii = []
                                radii_vals = re.search(r"\[(.+)\]",
                                                       arg).group(1).split('/')
                                if radii_vals == 'None':
                                    grad.newton_radii = None
                                else:
                                    for val in radii_vals:
                                        grad.newton_radii.append(float(val))
                            if 'cutoff' in arg:
                                grad.newton_cutoff = []
                                cutoff_vals = re.search(
                                    r"\[(.+)\]", arg).group(1).split('/')
                                if cutoff_vals == 'None':
                                    grad.newton_cutoff = None
                                else:
                                    if len(cutoff_vals) > 2 or \
                                        len(cutoff_vals) < 2:
                                        raise Exception("Cutoff values must " \
                                            "be between two numbers.")
                                    for val in cutoff_vals:
                                        grad.newton_cutoff.append(float(val))
                    elif "levenberg" in col:
                        g_args = col.split('=')[1].split(',')
                        for arg in g_args:
                            if arg == "True":
                                grad.do_levenberg = True
                            elif arg == False:
                                grad.do_levenberg = False
                            if 'radii' in arg:
                                grad.levenberg_radii = []
                                radii_vals = re.search(r"\[(.+)\]",
                                                       arg).group(1).split('/')
                                if radii_vals == 'None':
                                    grad.levenberg_radii = None
                                else:
                                    for val in radii_vals:
                                        grad.levenberg_radii.append(float(val))
                            if 'cutoff' in arg:
                                grad.levenberg_cutoff = []
                                cutoff_vals = re.search(
                                    r"\[(.+)\]", arg).group(1).split('/')
                                if cutoff_vals == 'None':
                                    grad.levenberg_cutoff = None
                                else:
                                    if len(cutoff_vals) > 2 or \
                                        len(cutoff_vals) < 2:
                                        raise Exception("Cutoff values must " \
                                            "be between two numbers.")
                                    for val in cutoff_vals:
                                        grad.levenberg_cutoff.append(
                                            float(val))
                            if 'factor' in arg:
                                grad.levenberg_cutoff = []
                                factor_vals = re.search(
                                    r"\[(.+)\]", arg).group(1).split('/')
                                if factor_vals == 'None':
                                    grad.levenberg_factor = None
                                else:
                                    for val in factor_vals:
                                        grad.levenberg_factor.append(
                                            float(val))
                    elif "lagrange" in col:
                        g_args = col.split('=')[1].split(',')
                        for arg in g_args:
                            if arg == "True":
                                grad.do_lagrange = True
                            elif arg == False:
                                grad.do_lagrange = False
                            if 'radii' in arg:
                                grad.lagrange_radii = []
                                radii_vals = re.search(r"\[(.+)\]",
                                                       arg).group(1).split('/')
                                if radii_vals == 'None':
                                    grad.lagrange_radii = None
                                else:
                                    for val in radii_vals:
                                        grad.lagrange_radii.append(float(val))
                            if 'cutoff' in arg:
                                grad.lagrange_cutoff = []
                                cutoff_vals = re.search(
                                    r"\[(.+)\]", arg).group(1).split('/')
                                if cutoff_vals == 'None':
                                    grad.lagrange_cutoff = None
                                else:
                                    if len(cutoff_vals) > 2 or \
                                        len(cutoff_vals) < 2:
                                        raise Exception("Cutoff values must " \
                                            "be between two numbers.")
                                    for val in cutoff_vals:
                                        grad.lagrange_cutoff.append(float(val))
                            if 'factor' in arg:
                                grad.lagrange_factors = []
                                factor_vals = re.search(
                                    r"\[(.+)\]", arg).group(1).split('/')
                                if factor_vals == 'None':
                                    grad.lagrange_factors = None
                                else:
                                    for val in factor_vals:
                                        grad.lagrange_factors.append(
                                            float(val))
                    elif "svd" in col:
                        g_args = col.split('=')[1].split(',')
                        for arg in g_args:
                            if arg == "True":
                                grad.do_svd = True
                            elif arg == False:
                                grad.do_svd = False
                            if 'radii' in arg:
                                grad.svd_radii = []
                                radii_vals = re.search(r"\[(.+)\]",
                                                       arg).group(1).split('/')
                                if radii_vals == 'None':
                                    grad.svd_radii = None
                                else:
                                    for val in radii_vals:
                                        grad.svd_radii.append(float(val))
                            if 'cutoff' in arg:
                                grad.svd_cutoff = []
                                cutoff_vals = re.search(
                                    r"\[(.+)\]", arg).group(1).split('/')
                                if cutoff_vals == 'None':
                                    grad.svd_cutoff = None
                                else:
                                    if len(cutoff_vals) > 2 or \
                                        len(cutoff_vals) < 2:
                                        raise Exception("Cutoff values must " \
                                            "be between two numbers.")
                                    for val in cutoff_vals:
                                        grad.svd_cutoff.append(float(val))
                            if 'factor' in arg:
                                grad.svd_cutoff = []
                                factor_vals = re.search(
                                    r"\[(.+)\]", arg).group(1).split('/')
                                if factor_vals == 'None':
                                    grad.svd_factor = None
                                else:
                                    for val in factor_vals:
                                        grad.svd_factor.append(float(val))
                    else:
                        raise Exception("'{}' : Not Recognized".format(col))
                ##############################################################
                self.ff = grad.run(ref_data=self.ref_data)
            if cols[0] == 'SIMP':
                simp = simplex.Simplex(direc=self.direc,
                                       ff=self.ff,
                                       ff_lines=self.ff.lines,
                                       args_ff=self.args_ff)
                for col in cols[1:]:
                    if "max_params" in col:
                        simp.max_params = col.split('=')[1]
                    else:
                        raise Exception("'{}' : Not Recognized".format(col))
                self.ff = simp.run(r_data=self.ref_data)
            if cols[0] == 'WGHT':
                data_type = cols[1]
                co.WEIGHTS[data_type] = float(cols[2])
            if cols[0] == 'STEP':
                param_type = cols[1]
                co.STEPS[param_type] = float(cols[2])
Example #17
0
 def run(self, ref_data=None, restart=None):
     # We need reference data if you didn't provide it.
     if ref_data is None:
         ref_data = opt.return_ref_data(self.args_ref)
     # We need the initial FF data.
     if self.ff.data is None:
         logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~'))
         # Check whether this is efficient with the ff_lines.
         self.ff.export_ff()
         self.ff.data = calculate.main(self.args_ff)
         # We could do this, but the zeroing of energies has already been
         # done.
         # self.ff.score = compare.compare_data(ref_data, self.ff.data)
         # So instead we do this.
         compare.correlate_energies(ref_data, self.ff.data)
     if self.ff.score is None:
         # Already zeroed reference and correlated the energies.
         self.ff.score = compare.calculate_score(ref_data, self.ff.data)
         logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score))
     logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~'))
     # We need a file to hold the differentiated parameter data.
     logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~'))
     if restart:
         par_file = restart
         logger.log(
             20, '  -- Restarting gradient from central '
             'differentiation file {}.'.format(par_file))
     else:
         par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt'))
         if par_files:
             par_files.sort()
             most_recent_par_file = par_files[-1]
             most_recent_par_file = most_recent_par_file.split('/')[-1]
             most_recent_num = most_recent_par_file[9:12]
             num = int(most_recent_num) + 1
             par_file = 'par_diff_{:03d}.txt'.format(num)
         else:
             par_file = 'par_diff_001.txt'
         logger.log(
             20, '  -- Generating central differentiation '
             'file {}.'.format(par_file))
         f = open(os.path.join(self.direc, par_file), 'w')
         csv_writer = csv.writer(f)
         # Row 1 - Labels
         # Row 2 - Weights
         # Row 3 - Reference data values
         # Row 4 - Initial FF data values
         csv_writer.writerow([x.lbl for x in ref_data])
         csv_writer.writerow([x.wht for x in ref_data])
         csv_writer.writerow([x.val for x in ref_data])
         csv_writer.writerow([x.val for x in self.ff.data])
         logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~'))
         # Save many FFs, each with their own parameter sets.
         ffs = opt.differentiate_ff(self.ff)
         logger.log(
             20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~'))
         for ff in ffs:
             ff.export_ff(lines=self.ff.lines)
             logger.log(20, '  -- Calculating {}.'.format(ff))
             data = calculate.main(self.args_ff)
             compare.correlate_energies(ref_data, data)
             ff.score = compare.calculate_score(ref_data, data)
             opt.pretty_ff_results(ff)
             # Write the data rather than storing it in memory. For large parameter
             # sets, this could consume GBs of memory otherwise!
             csv_writer.writerow([x.val for x in data])
         f.close()
     # Calculate the Jacobian, residual vector, matrix A and vector b.
     # These aren't needed if you're only doing Newton-Raphson.
     if self.do_lstsq or self.do_lagrange or self.do_levenberg or \
             self.do_svd:
         logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~'))
         # Setup the residual vector.
         num_d = len(ref_data)
         resid = np.empty((num_d, 1), dtype=float)
         for i in xrange(0, num_d):
             resid[i, 0] = ref_data[i].wht * (ref_data[i].val -
                                              self.ff.data[i].val)
         # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid))
         logger.log(20,
                    '  -- Formed {} residual vector.'.format(resid.shape))
         # Setup the Jacobian.
         num_p = len(self.ff.params)
         # Maybe should be a part of the Jacobian function.
         jacob = np.empty((num_d, num_p), dtype=float)
         jacob = return_jacobian(jacob, os.path.join(self.direc, par_file))
         # logger.log(5, 'JACOBIAN:\n{}'.format(jacob))
         logger.log(20, '  -- Formed {} Jacobian.'.format(jacob.shape))
         ma = jacob.T.dot(jacob)
         vb = jacob.T.dot(resid)
         # We need these for most optimization methods.
         logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-'))
         # logger.log(5, 'A:\n{}'.format(ma))
         # logger.log(5, 'b:\n{}'.format(vb))
     # Start coming up with new parameter sets.
     if self.do_newton and not restart:
         logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~'))
         # Make sure we have derivative information.
         if self.ff.params[0].d1 is None:
             opt.param_derivs(self.ff, ffs)
         changes = do_newton(self.ff.params,
                             radii=self.newton_radii,
                             cutoffs=self.newton_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     if self.do_lstsq:
         logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~'))
         changes = do_lstsq(ma,
                            vb,
                            radii=self.lstsq_radii,
                            cutoffs=self.lstsq_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     if self.do_lagrange:
         logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~'))
         for factor in sorted(self.lagrange_factors):
             changes = do_lagrange(ma,
                                   vb,
                                   factor,
                                   radii=self.lagrange_radii,
                                   cutoffs=self.lagrange_cutoffs)
             cleanup(self.new_ffs, self.ff, changes)
     if self.do_levenberg:
         logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~'))
         for factor in sorted(self.levenberg_factors):
             changes = do_levenberg(ma,
                                    vb,
                                    factor,
                                    radii=self.levenberg_radii,
                                    cutoffs=self.levenberg_cutoffs)
             cleanup(self.new_ffs, self.ff, changes)
     if self.do_svd:
         logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~'))
         # J = U . s . VT
         mu, vs, mvt = return_svd(jacob)
         logger.log(1, '>>> mu.shape: {}'.format(mu.shape))
         logger.log(1, '>>> vs.shape: {}'.format(vs.shape))
         logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape))
         logger.log(1, '>>> vb.shape: {}'.format(vb.shape))
         if self.svd_factors:
             changes = do_svd_w_thresholds(mu,
                                           vs,
                                           mvt,
                                           resid,
                                           self.svd_factors,
                                           radii=self.svd_radii,
                                           cutoffs=self.svd_cutoffs)
         else:
             changes = do_svd_wo_thresholds(mu,
                                            vs,
                                            mvt,
                                            resid,
                                            radii=self.svd_radii,
                                            cutoffs=self.svd_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     # Report how many trial FFs were generated.
     logger.log(
         20, '  -- Generated {} trial force field(s).'.format(
             len(self.new_ffs)))
     # If there are any trials, test them.
     if self.new_ffs:
         logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~'))
         for ff in self.new_ffs:
             data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff)
             # Shouldn't need to zero anymore.
             ff.score = compare.compare_data(ref_data, data)
             opt.pretty_ff_results(ff)
         self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
         # Check for improvement.
         if self.new_ffs[0].score < self.ff.score:
             ff = self.new_ffs[0]
             logger.log(
                 20,
                 '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~'))
             opt.pretty_ff_results(self.ff, level=20)
             opt.pretty_ff_results(ff, level=20)
             # Copy parameter derivatives from original FF to save time in
             # case we move onto simplex immediately after this.
             copy_derivs(self.ff, ff)
         else:
             ff = self.ff
     else:
         ff = self.ff
     return ff