def update(self, x, y_true, params, averager=None): # Run forward pass. z1, h1, z2, h2 = self.forward(x, params, return_activations=True) # Compute errors for each layer (= gradient of cost w.r.t layer input). e2 = h2 - y_true # gradient through cross entropy loss e1 = d_sigmoid(z1) * (e2 @ (np.abs(self.V2) * np.sign(self.W2.T)) ) # gradient backpropagation # Using these errors, compute gradients of cost w.r.t. parameters. grad_b1 = e1 grad_b2 = e2 grad_W1 = np.outer(x, e1) # np.outer creates a matrix from two vectors grad_W2 = np.outer(h1, e2) # Update parameters. self.b1 -= params['lr'] * grad_b1 self.b2 -= params['lr'] * grad_b2 self.W1 -= params['lr'] * grad_W1 self.W2 -= params['lr'] * grad_W2 averager.add( 'backward_angle', np.rad2deg( utils.angle_between( (np.abs(self.V2) * np.sign(self.W2.T)).flatten(), self.W2.T.flatten()))) return h2
def update(self, x, y_true, params, averager=None): # "Engaged mode": Forward pass on input image, backward pass to adapt forward weights. # Run forward pass. z1, h1, z2, h2 = self.forward(x, params, return_activations=True) # Compute errors for each layer (= gradient of cost w.r.t layer input). e2 = h2 - y_true # gradient through cross entropy loss e1 = d_sigmoid(z1) * (e2 @ self.V2) # gradient backpropagation # Compute gradients of cost w.r.t. parameters. grad_b1 = e1 grad_b2 = e2 grad_W1 = np.outer(x, e1) # np.outer creates a matrix from two vectors grad_W2 = np.outer(h1, e2) # Update parameters. self.b1 -= params['lr_forward'] * grad_b1 self.b2 -= params['lr_forward'] * grad_b2 self.W1 -= params['lr_forward'] * grad_W1 self.W2 -= params['lr_forward'] * grad_W2 # "Mirroring mode": Compute activies for random inputs, adapt backward weights. self.update_weight_mirror(params) averager.add( 'backward_angle', np.rad2deg( utils.angle_between(self.V2.flatten(), self.W2.T.flatten()))) averager.add('backward_mean', np.mean(self.V2.flatten())) return h2
def update(self, x, y_true, params, averager=None): # Run forward pass. z1, h1, z2, h2 = self.forward(x, params, return_activations=True) # ---------- Phase 1: Compute targets and change feedforward weights. ---------- # --------------------- (-> activations approximate targets) ------------------- # Compute final layer target and backpropagate it. h2_target = h2 - params['lr_final'] * ( h2 - y_true ) # Use the activation given by normal (local!) gradient descent as the last layer target. This is a smoother version than using y_true directly as the target. z1_target = h2_target @ self.V2 + self.c2 # Backpropagate the targets. h1_target = sigmoid(z1_target) # Compute (local) forward losses. L1 = mean_squared_error(h1, h1_target) L2 = mean_squared_error(h2, h2_target) averager.add('L1', L1) averager.add('L2', L2) # Compute gradients of forward losses w.r.t. forward parameters. dL1_db1 = 2 * (h1 - h1_target) * d_sigmoid(z1) dL1_dW1 = 2 * (h1 - h1_target) * np.outer( x, d_sigmoid(z1)) # TODO: Simply by reusing dL1_db1. dL2_db2 = 2 * (h2 - h2_target) * d_sigmoid(z2) dL2_dW2 = 2 * (h2 - h2_target) * np.outer(h1, d_sigmoid(z2)) # Update forward parameters. self.b1 -= params['lr_forward'] * dL1_db1 self.W1 -= params['lr_forward'] * dL1_dW1 self.b2 -= params['lr_forward'] * dL2_db2 self.W2 -= params['lr_forward'] * dL2_dW2 # ---------- Phase 2: Compute reconstructed activations and change feedback weights. ---------- # ------------- (-> backward function approximates inverse of forward function) --------------- # Compute reconstructed activations (here we only have one feedback connection). z1_reconstructed = h2 @ self.V2 + self.c2 h1_reconstructed = sigmoid(z1_reconstructed) # Compute reconstruction loss. L_rec1 = mean_squared_error(h1, h1_reconstructed) averager.add('L_rec1', L_rec1) # Compute gradients of reconstruction loss w.r.t. forward parameters. dL_rec1_dc2 = 2 * (h1_reconstructed - h1) * d_sigmoid(z1_reconstructed) dL_rec1_dV2 = 2 * (h1_reconstructed - h1) * np.outer( h2, d_sigmoid(z1_reconstructed)) # Update backward parameters. self.c2 -= params['lr_backward'] * dL_rec1_dc2 self.V2 -= params['lr_backward'] * dL_rec1_dV2 averager.add( 'backward_angle', np.rad2deg( utils.angle_between(self.V2.flatten(), self.W2.T.flatten()))) averager.add('backward_mean', np.mean(self.V2.flatten())) return h2
def rad2deg(x): if isinstance(x, JaxArray): x = x.value return JaxArray(jnp.rad2deg(x))
def _get_optical_system(self, fft_oversample=2, detector_oversample=None, fov_arcsec=2, fov_pixels=None, options=None): """ Return an OpticalSystem instance corresponding to the instrument as currently configured. When creating such an OpticalSystem, you must specify the parameters needed to define the desired sampling, specifically the oversampling and field of view. Parameters ---------- fft_oversample : int Oversampling factor for intermediate plane calculations. Default is 2 detector_oversample: int, optional By default the detector oversampling is equal to the intermediate calculation oversampling. If you wish to use a different value for the detector, set this parameter. Note that if you just want images at detector pixel resolution you will achieve higher fidelity by still using some oversampling (i.e. *not* setting `oversample_detector=1`) and instead rebinning down the oversampled data. fov_pixels : float Field of view in pixels. Overrides fov_arcsec if both set. fov_arcsec : float Field of view, in arcseconds. Default is 2 options : dict Other arbitrary options for optical system creation Returns ------- osys : morphine.OpticalSystem an optical system instance representing the desired configuration. """ morphine_core._log.info("Creating optical system model:") if detector_oversample is None: detector_oversample = fft_oversample if options is None: options = dict() morphine_core._log.debug("Oversample: %d %d " % (fft_oversample, detector_oversample)) optsys = morphine_core.OpticalSystem(name=self.name, oversample=fft_oversample) if 'source_offset_x' in options or 'source_offset_y' in options: if 'source_offset_r' in options: raise ValueError( "Cannot set source offset using source_offset_x and source_offset_y" + " at the same time as source_offset_r") offx = options.get('source_offset_x', 0) offy = options.get('source_offset_y', 0) optsys.source_offset_r = np.sqrt(offx**2 + offy**2) optsys.source_offset_theta = np.rad2deg(np.arctan2(-offx, offy)) _log.debug( "Source offset from X,Y = ({}, {}) is (r,theta) = {},{}". format(offx, offy, optsys.source_offset_r, optsys.source_offset_theta)) else: if 'source_offset_r' in options: optsys.source_offset_r = options['source_offset_r'] if 'source_offset_theta' in options: optsys.source_offset_theta = options['source_offset_theta'] _log.debug("Source offset is (r,theta) = {},{}".format( optsys.source_offset_r, optsys.source_offset_theta)) # ---- set pupil intensity pupil_optic = None # no optic yet defined if isinstance( self.pupil, morphine_core.OpticalElement): # do we already have an object? pupil_optic = self.pupil full_pupil_path = None elif isinstance(self.pupil, str): # simple filename if os.path.exists(self.pupil): full_pupil_path = self.pupil else: raise IOError("File not found: " + full_pupil_path) elif isinstance(self.pupil, fits.HDUList): # pupil supplied as FITS HDUList object full_pupil_path = self.pupil else: raise TypeError("Not sure what to do with a pupil of that type:" + str(type(self.pupil))) # ---- set pupil OPD if isinstance(self.pupilopd, str): # simple filename full_opd_path = self.pupilopd if os.path.exists( self.pupilopd) else os.path.join(self._datapath, "OPD", self.pupilopd) elif hasattr(self.pupilopd, '__getitem__') and isinstance( self.pupilopd[0], str): # tuple with filename and slice full_opd_path = (self.pupilopd[0] if os.path.exists( self.pupilopd[0]) else os.path.join( self._datapath, "OPD", self.pupilopd[0]), self.pupilopd[1]) elif isinstance(self.pupilopd, fits.HDUList): # OPD supplied as FITS HDUList object full_opd_path = self.pupilopd # not a path per se but this works correctly to pass it to morphine elif self.pupilopd is None: full_opd_path = None else: raise TypeError( "Not sure what to do with a pupilopd of that type:" + str(type(self.pupilopd))) # ---- apply pupil intensity and OPD to the optical model optsys.add_pupil(name='Entrance Pupil', optic=pupil_optic, transmission=full_pupil_path, opd=full_opd_path, rotation=self._rotation) # Allow instrument subclass to add field-dependent aberrations aberration_optic = self._get_aberrations() if aberration_optic is not None: optsys.add_pupil(aberration_optic) # --- add the detector element. if fov_pixels is None: fov_pixels = np.round(fov_arcsec / self.pixelscale) if 'parity' in self.options: if self.options['parity'].lower() == 'odd' and np.remainder( fov_pixels, 2) == 0: fov_pixels += 1 if self.options['parity'].lower() == 'even' and np.remainder( fov_pixels, 2) == 1: fov_pixels += 1 optsys.add_detector(self.pixelscale, fov_pixels=fov_pixels, oversample=detector_oversample, name=self.name + " detector") return optsys