def init_optimization(self, train_x, init_y): # print('init_optimization', torch.rand(2)) # Initialize filter filter_init_method = getattr(self.params, 'filter_init_method', 'zeros') self.filter = TensorList([ x.new_zeros(1, cdim, sz[0], sz[1]) for x, cdim, sz in zip( train_x, self.compressed_dim, self.kernel_size) ]) if filter_init_method == 'zeros': pass elif filter_init_method == 'randn': for f in self.filter: f.normal_(0, 1 / f.numel()) else: raise ValueError('Unknown "filter_init_method"') # Get parameters self.params.update_projection_matrix = getattr( self.params, 'update_projection_matrix', True) and self.params.use_projection_matrix optimizer = getattr(self.params, 'optimizer', 'GaussNewtonCG') # Setup factorized joint optimization if self.params.update_projection_matrix: self.joint_problem = FactorizedConvProblem( self.init_training_samples, init_y, self.filter_reg, self.fparams.attribute('projection_reg'), self.params, self.init_sample_weights, self.projection_activation, self.response_activation) # Variable containing both filter and projection matrix joint_var = self.filter.concat(self.projection_matrix) # Initialize optimizer analyze_convergence = getattr(self.params, 'analyze_convergence', False) if optimizer == 'GaussNewtonCG': self.joint_optimizer = GaussNewtonCG( self.joint_problem, joint_var, debug=(self.params.debug >= 1), plotting=(self.params.debug >= 3), analyze=analyze_convergence, visdom=self.visdom) elif optimizer == 'GradientDescentL2': self.joint_optimizer = GradientDescentL2( self.joint_problem, joint_var, self.params.optimizer_step_length, self.params.optimizer_momentum, plotting=(self.params.debug >= 3), debug=(self.params.debug >= 1), visdom=self.visdom) # Do joint optimization if isinstance(self.params.init_CG_iter, (list, tuple)): self.joint_optimizer.run(self.params.init_CG_iter) else: self.joint_optimizer.run( self.params.init_CG_iter // self.params.init_GN_iter, self.params.init_GN_iter) if analyze_convergence: opt_name = 'CG' if getattr(self.params, 'CG_optimizer', True) else 'GD' for val_name, values in zip(['loss', 'gradient'], [ self.joint_optimizer.losses, self.joint_optimizer.gradient_mags ]): val_str = ' '.join( ['{:.8e}'.format(v.item()) for v in values]) file_name = '{}_{}.txt'.format(opt_name, val_name) with open(file_name, 'a') as f: f.write(val_str + '\n') raise RuntimeError('Exiting') # Re-project samples with the new projection matrix compressed_samples = self.project_sample(self.init_training_samples, self.projection_matrix) for train_samp, init_samp in zip(self.training_samples, compressed_samples): train_samp[:init_samp.shape[0], ...] = init_samp self.hinge_mask = None # Initialize optimizer self.conv_problem = ConvProblem(self.training_samples, self.y, self.filter_reg, self.sample_weights, self.response_activation) if optimizer == 'GaussNewtonCG': self.filter_optimizer = ConjugateGradient( self.conv_problem, self.filter, fletcher_reeves=self.params.fletcher_reeves, direction_forget_factor=self.params.direction_forget_factor, debug=(self.params.debug >= 1), plotting=(self.params.debug >= 3), visdom=self.visdom) elif optimizer == 'GradientDescentL2': self.filter_optimizer = GradientDescentL2( self.conv_problem, self.filter, self.params.optimizer_step_length, self.params.optimizer_momentum, debug=(self.params.debug >= 1), plotting=(self.params.debug >= 3), visdom=self.visdom) # Transfer losses from previous optimization if self.params.update_projection_matrix: self.filter_optimizer.residuals = self.joint_optimizer.residuals self.filter_optimizer.losses = self.joint_optimizer.losses if not self.params.update_projection_matrix: self.filter_optimizer.run(self.params.init_CG_iter) # Post optimization self.filter_optimizer.run(self.params.post_init_CG_iter) # Free memory del self.init_training_samples if self.params.use_projection_matrix: del self.joint_problem, self.joint_optimizer
def init_optimization(self, train_x, init_y): # Initialize filter filter_init_method = getattr(self.params, 'filter_init_method', 'zeros') self.filter = TensorList([ np.zeros([1, cdim, sz[0], sz[1]], 'float32') for x, cdim, sz in zip(train_x, self.compressed_dim, self.kernel_size) ]) if filter_init_method == 'zeros': pass elif filter_init_method == 'ones': for idx, f in enumerate(self.filter): self.filter[idx] = np.ones(f.shape, 'float32') / np.prod( f.shape) elif filter_init_method == 'np_randn': rng = np.random.RandomState(0) for idx, f in enumerate(self.filter): self.filter[idx] = rng.normal( size=f.shape, loc=0, scale=1 / np.prod(f.shape)).astype('float32') elif filter_init_method == 'randn': for idx, f in enumerate(self.filter): with fluid.dygraph.guard(): self.filter[idx] = layers.gaussian_random( f.shape, std=1 / np.prod(f.shape)).numpy() else: raise ValueError('Unknown "filter_init_method"') # Get parameters self.params.update_projection_matrix = getattr( self.params, 'update_projection_matrix', True) and self.params.use_projection_matrix optimizer = getattr(self.params, 'optimizer', 'GaussNewtonCG') # Setup factorized joint optimization if self.params.update_projection_matrix: self.joint_problem = FactorizedConvProblem( self.init_training_samples, init_y, self.filter_reg, self.fparams.attribute('projection_reg'), self.params, self.init_sample_weights, self.projection_activation, self.response_activation) # Variable containing both filter and projection matrix joint_var = self.filter.concat(self.projection_matrix) # Initialize optimizer analyze_convergence = getattr(self.params, 'analyze_convergence', False) if optimizer == 'GaussNewtonCG': self.joint_optimizer = GaussNewtonCG( self.joint_problem, joint_var, plotting=(self.params.debug >= 3), analyze=True, fig_num=(12, 13, 14)) elif optimizer == 'GradientDescentL2': self.joint_optimizer = GradientDescentL2( self.joint_problem, joint_var, self.params.optimizer_step_length, self.params.optimizer_momentum, plotting=(self.params.debug >= 3), debug=analyze_convergence, fig_num=(12, 13)) # Do joint optimization if isinstance(self.params.init_CG_iter, (list, tuple)): self.joint_optimizer.run(self.params.init_CG_iter) else: self.joint_optimizer.run( self.params.init_CG_iter // self.params.init_GN_iter, self.params.init_GN_iter) # Get back filter and optimizer len_x = len(self.joint_optimizer.x) self.filter = self.joint_optimizer.x[:len_x // 2] # w2 in paper self.projection_matrix = self.joint_optimizer.x[len_x // 2:] # w1 in paper if analyze_convergence: opt_name = 'CG' if getattr(self.params, 'CG_optimizer', True) else 'GD' for val_name, values in zip(['loss', 'gradient'], [ self.joint_optimizer.losses, self.joint_optimizer.gradient_mags ]): val_str = ' '.join( ['{:.8e}'.format(v.item()) for v in values]) file_name = '{}_{}.txt'.format(opt_name, val_name) with open(file_name, 'a') as f: f.write(val_str + '\n') raise RuntimeError('Exiting') # Re-project samples with the new projection matrix compressed_samples = self.project_sample(self.init_training_samples, self.projection_matrix) for train_samp, init_samp in zip(self.training_samples, compressed_samples): for idx in range(init_samp.shape[0]): train_samp[idx] = init_samp[idx] self.hinge_mask = None # Initialize optimizer self.conv_problem = ConvProblem(self.training_samples, self.y, self.filter_reg, self.sample_weights, self.response_activation) if optimizer == 'GaussNewtonCG': self.filter_optimizer = ConjugateGradient( self.conv_problem, self.filter, fletcher_reeves=self.params.fletcher_reeves, direction_forget_factor=self.params.direction_forget_factor, debug=(self.params.debug >= 3), fig_num=(12, 13)) elif optimizer == 'GradientDescentL2': self.filter_optimizer = GradientDescentL2( self.conv_problem, self.filter, self.params.optimizer_step_length, self.params.optimizer_momentum, debug=(self.params.debug >= 3), fig_num=12) # Transfer losses from previous optimization if self.params.update_projection_matrix: self.filter_optimizer.residuals = self.joint_optimizer.residuals self.filter_optimizer.losses = self.joint_optimizer.losses if not self.params.update_projection_matrix: self.filter_optimizer.run(self.params.init_CG_iter) # Post optimization self.filter_optimizer.run(self.params.post_init_CG_iter) self.filter = self.filter_optimizer.x # Free memory del self.init_training_samples if self.params.use_projection_matrix: del self.joint_problem, self.joint_optimizer