def solve(self, model, app_args, amc_cfg, services, steps_per_episode): msglogger.info("AMC: Using coach") # When we import the graph_manager from the ADC_DDPG preset, we implicitly instruct # Coach to create and use our DistillerWrapperEnvironment environment. # So Distiller calls Coach, which creates the environment, trains the agent, and ends. if amc_cfg.agent_algo == "DDPG": from examples.auto_compression.amc.rl_libs.coach.presets.ADC_DDPG import ( graph_manager, agent_params) graph_manager.agent_params.exploration.noise_schedule = ExponentialSchedule( amc_cfg.ddpg_cfg.initial_training_noise, 0, amc_cfg.ddpg_cfg.training_noise_decay) # Number of iterations to train graph_manager.agent_params.algorithm.num_consecutive_training_steps = steps_per_episode #graph_manager.agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(1) graph_manager.agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps( steps_per_episode) # Heatup graph_manager.heatup_steps = EnvironmentEpisodes( amc_cfg.ddpg_cfg.num_heatup_episodes) # Replay buffer size graph_manager.agent_params.memory.max_size = ( MemoryGranularity.Transitions, amc_cfg.ddpg_cfg.replay_buffer_size) amc_cfg.ddpg_cfg.training_noise_decay = amc_cfg.ddpg_cfg.training_noise_decay**( 1. / steps_per_episode) elif "ClippedPPO" in amc_cfg.agent_algo: from examples.auto_compression.amc.rl_libs.coach.presets.ADC_ClippedPPO import graph_manager, agent_params elif "TD3" in amc_cfg.agent_algo: from examples.auto_compression.amc.rl_libs.coach.presets.ADC_TD3 import graph_manager, agent_params else: raise ValueError( "The agent algorithm you are trying to use (%s) is not supported" % amc_cfg.amc_agent_algo) # Number of training steps n_training_episodes = amc_cfg.ddpg_cfg.num_training_episodes graph_manager.improve_steps = EnvironmentEpisodes(n_training_episodes) # Don't evaluate until the end graph_manager.steps_between_evaluation_periods = EnvironmentEpisodes( n_training_episodes) # These parameters are passed to the Distiller environment env_cfg = { 'model': model, 'app_args': app_args, 'amc_cfg': amc_cfg, 'services': services } graph_manager.env_params.additional_simulator_parameters = env_cfg coach_logs_dir = os.path.join(msglogger.logdir, 'coach') os.mkdir(coach_logs_dir) task_parameters = TaskParameters(experiment_path=coach_logs_dir) # Set Coach's PRNG seed if app_args.seed is not None: task_parameters.seed = app_args.seed graph_manager.create_graph(task_parameters) graph_manager.improve()
def coach_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn): task_parameters = TaskParameters(framework_type="tensorflow", experiment_path="./experiments/test") extra_params = {'save_checkpoint_secs': None, 'render': True} task_parameters.__dict__.update(extra_params) # Create a dictionary of parameters that Coach will handover to CNNEnvironment # Once it creates it. if True: exploration_noise = 0.5 #exploration_noise = 0.25 exploitation_decay = 0.996 graph_manager.env_params.additional_simulator_parameters = { 'model': model, 'dataset': dataset, 'arch': arch, 'data_loader': data_loader, 'validate_fn': validate_fn, 'save_checkpoint_fn': save_checkpoint_fn, #'action_range': (0.10, 0.95), 'action_range': (0.70, 0.95), 'onehot_encoding': False, 'normalize_obs': True, 'desired_reduction': None, 'reward_fn': lambda top1, top5, vloss, total_macs: -1 * (1-top5/100) * math.log(total_macs) #'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs) #'reward_fn': lambda top1, total_macs: -1 * max(1-top1/100, 0.25) * math.log(total_macs) #'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs/100000) #'reward_fn': lambda top1, total_macs: top1/100 * total_macs/self.dense_model_macs } else: exploration_noise = 0.5 #exploration_noise = 0.25 exploitation_decay = 0.996 graph_manager.env_params.additional_simulator_parameters = { 'model': model, 'dataset': dataset, 'arch': arch, 'data_loader': data_loader, 'validate_fn': validate_fn, 'save_checkpoint_fn': save_checkpoint_fn, 'action_range': (0.10, 0.95), 'onehot_encoding': False, 'normalize_obs': True, 'desired_reduction': 1.5e8, 'reward_fn': lambda top1, total_macs: top1/100 #'reward_fn': lambda top1, total_macs: min(top1/100, 0.75) } #msglogger.debug('Experiment configuarion:\n' + json.dumps(graph_manager.env_params.additional_simulator_parameters, indent=2)) steps_per_episode = 13 agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([(ConstantSchedule(exploration_noise), EnvironmentSteps(100*steps_per_episode)), (ExponentialSchedule(exploration_noise, 0, exploitation_decay), EnvironmentSteps(300*steps_per_episode))]) graph_manager.create_graph(task_parameters) graph_manager.improve()
def coach_adc(model, dataset, arch, optimizer_data, validate_fn, save_checkpoint_fn, train_fn): # task_parameters = TaskParameters(framework_type="tensorflow", # experiment_path="./experiments/test") # extra_params = {'save_checkpoint_secs': None, # 'render': True} # task_parameters.__dict__.update(extra_params) task_parameters = TaskParameters(experiment_path=logger.get_experiment_path('adc')) conv_cnt = count_conv_layer(model) # Create a dictionary of parameters that Coach will handover to CNNEnvironment # Once it creates it. services = distiller.utils.MutableNamedTuple({ 'validate_fn': validate_fn, 'save_checkpoint_fn': save_checkpoint_fn, 'train_fn': train_fn}) app_args = distiller.utils.MutableNamedTuple({ 'dataset': dataset, 'arch': arch, 'optimizer_data': optimizer_data}) if True: amc_cfg = distiller.utils.MutableNamedTuple({ #'action_range': (0.20, 0.95), 'action_range': (0.20, 0.80), 'onehot_encoding': False, 'normalize_obs': True, 'desired_reduction': None, 'reward_fn': lambda top1, top5, vloss, total_macs: -1 * (1-top1/100) * math.log(total_macs), 'conv_cnt': conv_cnt, 'max_reward': -1000}) else: amc_cfg = distiller.utils.MutableNamedTuple({ 'action_range': (0.10, 0.95), 'onehot_encoding': False, 'normalize_obs': True, 'desired_reduction': 1.5e8, 'reward_fn': lambda top1, top5, vloss, total_macs: top1/100, #'reward_fn': lambda top1, total_macs: min(top1/100, 0.75), 'conv_cnt': conv_cnt, 'max_reward': -1000}) # These parameters are passed to the Distiller environment graph_manager.env_params.additional_simulator_parameters = {'model': model, 'app_args': app_args, 'amc_cfg': amc_cfg, 'services': services} exploration_noise = 0.5 exploitation_decay = 0.996 steps_per_episode = conv_cnt agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([ (ConstantSchedule(exploration_noise), EnvironmentSteps(100*steps_per_episode)), (ExponentialSchedule(exploration_noise, 0, exploitation_decay), EnvironmentSteps(300*steps_per_episode))]) graph_manager.create_graph(task_parameters) graph_manager.improve()
def test_exponential_schedule(): # decreasing schedule schedule = ExponentialSchedule(10, 3, 0.99) current_power = 1 for i in range(100): assert round(schedule.current_value,6) == round(10*current_power,6) current_power *= 0.99 schedule.step() for i in range(100): schedule.step() assert schedule.current_value == 3
def test_piece_wise_schedule(): # decreasing schedule schedule = PieceWiseSchedule( [(LinearSchedule(1, 3, 10), EnvironmentSteps(5)), (ConstantSchedule(4), EnvironmentSteps(10)), (ExponentialSchedule(3, 1, 0.99), EnvironmentSteps(10)) ] ) target_values = np.append(np.linspace(1, 2, 6), np.ones(11)*4) for i in range(16): assert round(schedule.current_value, 4) == round(target_values[i], 4) schedule.step() current_power = 1 for i in range(10): assert round(schedule.current_value, 4) == round(3*current_power, 4) current_power *= 0.99 schedule.step()
def do_adc_internal(model, args, optimizer_data, validate_fn, save_checkpoint_fn, train_fn): dataset = args.dataset arch = args.arch perform_thinning = True # args.amc_thinning num_ft_epochs = args.amc_ft_epochs action_range = args.amc_action_range np.random.seed() conv_cnt = count_conv_layer(model) msglogger.info("Executing AMC: RL agent - %s RL library - %s", args.amc_agent_algo, RLLIB) # Create a dictionary of parameters that Coach will handover to DistillerWrapperEnvironment # Once it creates it. services = distiller.utils.MutableNamedTuple({ 'validate_fn': validate_fn, 'save_checkpoint_fn': save_checkpoint_fn, 'train_fn': train_fn }) app_args = distiller.utils.MutableNamedTuple({ 'dataset': dataset, 'arch': arch, 'optimizer_data': optimizer_data }) amc_cfg = distiller.utils.MutableNamedTuple({ 'protocol': args.amc_protocol, 'agent_algo': args.amc_agent_algo, 'perform_thinning': perform_thinning, 'num_ft_epochs': num_ft_epochs, 'action_range': action_range, 'conv_cnt': conv_cnt, 'reward_frequency': args.amc_reward_frequency }) #net_wrapper = NetworkWrapper(model, app_args, services) #return sample_networks(net_wrapper, services) if args.amc_protocol == "accuracy-guaranteed": amc_cfg.target_density = None amc_cfg.reward_fn = lambda env, top1, top5, vloss, total_macs: -( 1 - top1 / 100) * math.log(total_macs) amc_cfg.action_constrain_fn = None elif args.amc_protocol == "mac-constrained": amc_cfg.target_density = args.amc_target_density amc_cfg.reward_fn = lambda env, top1, top5, vloss, total_macs: top1 / 100 #(90.5 - top1) / 10 amc_cfg.action_constrain_fn = DistillerWrapperEnvironment.get_action elif args.amc_protocol == "mac-constrained-experimental": amc_cfg.target_density = args.amc_target_density amc_cfg.reward_fn = experimental_reward_fn amc_cfg.action_constrain_fn = None else: raise ValueError("{} is not supported currently".format( args.amc_protocol)) steps_per_episode = conv_cnt if args.amc_agent_algo == "DDPG": amc_cfg.heatup_noise = 0.5 amc_cfg.initial_training_noise = 0.5 amc_cfg.training_noise_decay = 0.996 # 0.998 amc_cfg.num_heatup_epochs = args.amc_heatup_epochs amc_cfg.num_training_epochs = args.amc_training_epochs training_noise_duration = amc_cfg.num_training_epochs * steps_per_episode heatup_duration = amc_cfg.num_heatup_epochs * steps_per_episode if amc_cfg.agent_algo == "Random-policy": return random_agent( DistillerWrapperEnvironment(model, app_args, amc_cfg, services)) if RLLIB == "spinup": msglogger.info("AMC: Using spinup") env1 = DistillerWrapperEnvironment(model, app_args, amc_cfg, services) env2 = DistillerWrapperEnvironment(model, app_args, amc_cfg, services) ddpg_spinup(env1, env2) else: msglogger.info("AMC: Using coach") # When we import the graph_manager from the ADC_DDPG preset, we implicitly instruct # Coach to create and use our DistillerWrapperEnvironment environment. # So Distiller calls Coach, which creates the environment, trains the agent, and ends. if args.amc_agent_algo == "DDPG": from examples.automated_deep_compression.presets.ADC_DDPG import graph_manager, agent_params agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule( [(ConstantSchedule(amc_cfg.heatup_noise), EnvironmentSteps(heatup_duration)), (ExponentialSchedule(amc_cfg.initial_training_noise, 0, amc_cfg.training_noise_decay), EnvironmentSteps(training_noise_duration))]) # agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0) elif "ClippedPPO" in args.amc_agent_algo: from examples.automated_deep_compression.presets.ADC_ClippedPPO import graph_manager, agent_params # These parameters are passed to the Distiller environment graph_manager.env_params.additional_simulator_parameters = { 'model': model, 'app_args': app_args, 'amc_cfg': amc_cfg, 'services': services } coach_logs_dir = os.path.join(msglogger.logdir, 'coach') os.mkdir(coach_logs_dir) task_parameters = TaskParameters(experiment_path=coach_logs_dir) graph_manager.create_graph(task_parameters) graph_manager.improve()
agent_params.network_wrappers['actor'].heads_parameters[ 0].activation_function = 'sigmoid' #agent_params.network_wrappers['critic'].clip_gradients = 100 #agent_params.network_wrappers['actor'].clip_gradients = 100 agent_params.algorithm.rate_for_copying_weights_to_target = 0.01 # Tau pg. 11 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps( 1) agent_params.algorithm.discount = 1 agent_params.memory.max_size = (MemoryGranularity.Transitions, 2000) agent_params.exploration = TruncatedNormalParameters( ) # AdditiveNoiseParameters() steps_per_episode = 13 agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([ (ConstantSchedule(0.5), EnvironmentSteps(100 * steps_per_episode)), (ExponentialSchedule(0.5, 0, 0.996), EnvironmentSteps(300 * steps_per_episode)) ]) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1) agent_params.input_filter = MujocoInputFilter() agent_params.output_filter = MujocoOutputFilter() agent_params.network_wrappers['actor'].learning_rate = 0.0001 agent_params.network_wrappers['critic'].learning_rate = 0.001 ############################## # Gym # ############################## env_params = GymEnvironmentParameters() env_params.level = '../automated_deep_compression/ADC.py:CNNEnvironment' vis_params = VisualizationParameters() vis_params.dump_parameters_documentation = False