예제 #1
0
def garipov_curve_search(model_a,
                         model_b,
                         curve_type='polygon_chain') -> np.ndarray:
    """
    We refer by 'Garipov curve search' to the algorithm proposed by Garipov et al (2018) for
    finding low-loss paths between two arbitrary minima in a loss landscape. The core idea
    of the method is to define a parametric curve in the model's parameter space connecting
    one minima to the other, and then minimizing the expected loss along this curve by
    modifying its parameterization. For details, see https://arxiv.org/abs/1802.10026

    This is an alternative to the auto_neb algorithm.
    """
    model_a_wrapper = wrap_model(model_a)
    model_b_wrapper = wrap_model(model_b)

    point_a = model_a_wrapper.get_parameter_tensor()
    point_b = model_b_wrapper.get_parameter_tensor()

    # todo
    if curve_type == 'polygon_chain':
        raise NotImplementedError('Not implemented yet.')
    elif curve_type == 'bezier_curve':
        raise NotImplementedError('Not implemented yet.')
    else:
        raise AttributeError(
            'Curve type is not polygon_chain or bezier_curve.')
예제 #2
0
    def __init__(self, model_start, model_end, order=2):
        """
        Define a Bezier curve between a start point and an end point. The order of the
        curve refers to the number of control points, excluding the start point. The default
        order of 1, for example, results in no further control points being added after
        the given start and end points.

        :param model_start: point defining start of curve
        :param model_end: point defining end of curve
        :param order: number of control points, excluding start point
        """
        super().__init__()
        if order != 2:
            raise NotImplementedError(
                'Currently only order 2 bezier curves are supported.')

        self.model_start_wrapper = wrap_model(copy.deepcopy(model_start))
        self.model_end_wrapper = wrap_model(copy.deepcopy(model_end))
        self.order = order
        self.control_points = []

        # add intermediate control points
        if order > 1:
            start_parameters = self.model_start_wrapper.get_parameter_tensor()
            end_parameters = self.model_end_wrapper.get_parameter_tensor()
            direction = (end_parameters - start_parameters) / order

            for i in range(1, order):
                model_template_wrapper = copy.deepcopy(
                    self.model_start_wrapper)
                model_template_wrapper.set_parameter_tensor(start_parameters +
                                                            (direction * i))
                self.control_points.append(model_template_wrapper)
예제 #3
0
    def __init__(self, model, agent_interface=None, n_bases=2):
        super().__init__()
        self.trajectory = []
        self.agent_interface = agent_interface

        n = wrap_model(model, agent_interface).get_parameter_tensor().numel()
        self.A = np.column_stack(
            [np.random.normal(size=n) for _ in range(n_bases)])
예제 #4
0
 def save_position(self, model):
     np.save(
         self.dir + str(self.next_idx) + '.npy',
         wrap_model(model, self.agent_interface).get_parameter_tensor(
             deepcopy=True).as_numpy())
     self.next_idx += 1
예제 #5
0
 def save_position(self, model):
     # we solve the equation Ax = b using least squares, where A is the matrix of basis vectors
     b = wrap_model(model,
                    self.agent_interface).get_parameter_tensor().as_numpy()
     self.trajectory.append(np.linalg.lstsq(self.A, b, rcond=None)[0])